Skip to content

Commit 5140751

Browse files
Fix some new type warnings from mypy
1 parent 0c0087b commit 5140751

File tree

5 files changed

+100
-14
lines changed

5 files changed

+100
-14
lines changed

elasticsearch/dsl/_async/document.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
9696

9797
@classmethod
9898
def _get_using(cls, using: Optional[AsyncUsingType] = None) -> AsyncUsingType:
99-
return cast(AsyncUsingType, using or cls._index._using)
99+
return using or cls._index._using
100100

101101
@classmethod
102102
def _get_connection(

elasticsearch/dsl/_sync/document.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ class Document(DocumentBase, metaclass=IndexMeta):
9292

9393
@classmethod
9494
def _get_using(cls, using: Optional[UsingType] = None) -> UsingType:
95-
return cast(UsingType, using or cls._index._using)
95+
return using or cls._index._using
9696

9797
@classmethod
9898
def _get_connection(cls, using: Optional[UsingType] = None) -> "Elasticsearch":

elasticsearch/dsl/field.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ def _deserialize(self, data: Any) -> Union[datetime, date]:
12901290
if isinstance(data, datetime):
12911291
if self._default_timezone and data.tzinfo is None:
12921292
data = data.replace(tzinfo=self._default_timezone)
1293-
return data
1293+
return cast(datetime, data)
12941294
if isinstance(data, date):
12951295
return data
12961296
if isinstance(data, int):
@@ -3689,6 +3689,11 @@ class SemanticText(Field):
36893689
by using the Update mapping API. Use the Create inference API to
36903690
create the endpoint. If not specified, the inference endpoint
36913691
defined by inference_id will be used at both index and query time.
3692+
:arg chunking_settings: Settings for chunking text into smaller
3693+
passages. If specified, these will override the chunking settings
3694+
sent in the inference endpoint associated with inference_id. If
3695+
chunking settings are updated, they will not be applied to
3696+
existing documents until they are reindexed.
36923697
"""
36933698

36943699
name = "semantic_text"
@@ -3699,6 +3704,9 @@ def __init__(
36993704
meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
37003705
inference_id: Union[str, "DefaultType"] = DEFAULT,
37013706
search_inference_id: Union[str, "DefaultType"] = DEFAULT,
3707+
chunking_settings: Union[
3708+
"types.ChunkingSettings", Dict[str, Any], "DefaultType"
3709+
] = DEFAULT,
37023710
**kwargs: Any,
37033711
):
37043712
if meta is not DEFAULT:
@@ -3707,6 +3715,8 @@ def __init__(
37073715
kwargs["inference_id"] = inference_id
37083716
if search_inference_id is not DEFAULT:
37093717
kwargs["search_inference_id"] = search_inference_id
3718+
if chunking_settings is not DEFAULT:
3719+
kwargs["chunking_settings"] = chunking_settings
37103720
super().__init__(*args, **kwargs)
37113721

37123722

elasticsearch/dsl/query.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,49 @@ def __init__(
13821382
min_term_freq: Union[int, "DefaultType"] = DEFAULT,
13831383
min_word_length: Union[int, "DefaultType"] = DEFAULT,
13841384
routing: Union[str, "DefaultType"] = DEFAULT,
1385-
stop_words: Union[str, Sequence[str], "DefaultType"] = DEFAULT,
1385+
stop_words: Union[
1386+
Literal[
1387+
"_arabic_",
1388+
"_armenian_",
1389+
"_basque_",
1390+
"_bengali_",
1391+
"_brazilian_",
1392+
"_bulgarian_",
1393+
"_catalan_",
1394+
"_cjk_",
1395+
"_czech_",
1396+
"_danish_",
1397+
"_dutch_",
1398+
"_english_",
1399+
"_estonian_",
1400+
"_finnish_",
1401+
"_french_",
1402+
"_galician_",
1403+
"_german_",
1404+
"_greek_",
1405+
"_hindi_",
1406+
"_hungarian_",
1407+
"_indonesian_",
1408+
"_irish_",
1409+
"_italian_",
1410+
"_latvian_",
1411+
"_lithuanian_",
1412+
"_norwegian_",
1413+
"_persian_",
1414+
"_portuguese_",
1415+
"_romanian_",
1416+
"_russian_",
1417+
"_serbian_",
1418+
"_sorani_",
1419+
"_spanish_",
1420+
"_swedish_",
1421+
"_thai_",
1422+
"_turkish_",
1423+
"_none_",
1424+
],
1425+
Sequence[str],
1426+
"DefaultType",
1427+
] = DEFAULT,
13861428
unlike: Union[
13871429
Union[str, "types.LikeDocument"],
13881430
Sequence[Union[str, "types.LikeDocument"]],

elasticsearch/dsl/types.py

Lines changed: 44 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,48 @@ def __init__(
142142
super().__init__(kwargs)
143143

144144

145+
class ChunkingSettings(AttrDict[Any]):
146+
"""
147+
:arg strategy: (required) The chunking strategy: `sentence` or `word`.
148+
Defaults to `sentence` if omitted.
149+
:arg max_chunk_size: (required) The maximum size of a chunk in words.
150+
This value cannot be higher than `300` or lower than `20` (for
151+
`sentence` strategy) or `10` (for `word` strategy). Defaults to
152+
`250` if omitted.
153+
:arg overlap: The number of overlapping words for chunks. It is
154+
applicable only to a `word` chunking strategy. This value cannot
155+
be higher than half the `max_chunk_size` value. Defaults to `100`
156+
if omitted.
157+
:arg sentence_overlap: The number of overlapping sentences for chunks.
158+
It is applicable only for a `sentence` chunking strategy. It can
159+
be either `1` or `0`. Defaults to `1` if omitted.
160+
"""
161+
162+
strategy: Union[str, DefaultType]
163+
max_chunk_size: Union[int, DefaultType]
164+
overlap: Union[int, DefaultType]
165+
sentence_overlap: Union[int, DefaultType]
166+
167+
def __init__(
168+
self,
169+
*,
170+
strategy: Union[str, DefaultType] = DEFAULT,
171+
max_chunk_size: Union[int, DefaultType] = DEFAULT,
172+
overlap: Union[int, DefaultType] = DEFAULT,
173+
sentence_overlap: Union[int, DefaultType] = DEFAULT,
174+
**kwargs: Any,
175+
):
176+
if strategy is not DEFAULT:
177+
kwargs["strategy"] = strategy
178+
if max_chunk_size is not DEFAULT:
179+
kwargs["max_chunk_size"] = max_chunk_size
180+
if overlap is not DEFAULT:
181+
kwargs["overlap"] = overlap
182+
if sentence_overlap is not DEFAULT:
183+
kwargs["sentence_overlap"] = sentence_overlap
184+
super().__init__(kwargs)
185+
186+
145187
class ClassificationInferenceOptions(AttrDict[Any]):
146188
"""
147189
:arg num_top_classes: Specifies the number of top class predictions to
@@ -1593,11 +1635,7 @@ class InnerHits(AttrDict[Any]):
15931635
DefaultType,
15941636
]
15951637
seq_no_primary_term: Union[bool, DefaultType]
1596-
fields: Union[
1597-
Union[str, InstrumentedField],
1598-
Sequence[Union[str, InstrumentedField]],
1599-
DefaultType,
1600-
]
1638+
fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType]
16011639
sort: Union[
16021640
Union[Union[str, InstrumentedField], "SortOptions"],
16031641
Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],
@@ -1632,11 +1670,7 @@ def __init__(
16321670
DefaultType,
16331671
] = DEFAULT,
16341672
seq_no_primary_term: Union[bool, DefaultType] = DEFAULT,
1635-
fields: Union[
1636-
Union[str, InstrumentedField],
1637-
Sequence[Union[str, InstrumentedField]],
1638-
DefaultType,
1639-
] = DEFAULT,
1673+
fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] = DEFAULT,
16401674
sort: Union[
16411675
Union[Union[str, InstrumentedField], "SortOptions"],
16421676
Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],

0 commit comments

Comments
 (0)