-
Notifications
You must be signed in to change notification settings - Fork 801
/
Copy pathparent_child.py
275 lines (224 loc) · 8.22 KB
/
parent_child.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
# Licensed to Elasticsearch B.V. under one or more contributor
# license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright
# ownership. Elasticsearch B.V. licenses this file to you under
# the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Complex data model example modeling stackoverflow-like data.
It is used to showcase several key features of elasticsearch-dsl:
* Object and Nested fields: see User and Comment classes and fields they
are used in
* method add_comment is used to add comments
* Parent/Child relationship
* See the Join field on Post creating the relationship between Question
and Answer
* Meta.matches allows the hits from same index to be wrapped in proper
classes
* to see how child objects are created see Question.add_answer
* Question.search_answers shows how to query for children of a
particular parent
"""
import os
from datetime import datetime
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
from elasticsearch_dsl import (
Date,
Document,
InnerDoc,
Join,
Keyword,
Long,
Search,
Text,
connections,
mapped_field,
)
class User(InnerDoc):
"""
Class used to represent a denormalized user stored on other objects.
"""
id: int = mapped_field(Long())
signed_up: Optional[datetime] = mapped_field(Date())
username: str = mapped_field(Text(fields={"keyword": Keyword()}))
email: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()}))
location: Optional[str] = mapped_field(Text(fields={"keyword": Keyword()}))
class Comment(InnerDoc):
"""
Class wrapper for nested comment objects.
"""
author: User
created: datetime
content: str
class Post(Document):
"""
Base class for Question and Answer containing the common fields.
"""
author: User
if TYPE_CHECKING:
# definitions here help type checkers understand additional arguments
# that are allowed in the constructor
_routing: str = mapped_field(default=None)
_id: Optional[int] = mapped_field(default=None)
created: Optional[datetime] = mapped_field(default=None)
body: str = mapped_field(default="")
comments: List[Comment] = mapped_field(default_factory=list)
question_answer: Any = mapped_field(
Join(relations={"question": "answer"}), default_factory=dict
)
@classmethod
def _matches(cls, hit: Dict[str, Any]) -> bool:
# Post is an abstract class, make sure it never gets used for
# deserialization
return False
class Index:
name = "test-qa-site"
settings = {
"number_of_shards": 1,
"number_of_replicas": 0,
}
def add_comment(
self,
user: User,
content: str,
created: Optional[datetime] = None,
commit: Optional[bool] = True,
) -> Comment:
c = Comment(author=user, content=content, created=created or datetime.now())
self.comments.append(c)
if commit:
self.save()
return c
def save(self, **kwargs: Any) -> None: # type: ignore[override]
# if there is no date, use now
if self.created is None:
self.created = datetime.now()
super().save(**kwargs)
class Question(Post):
tags: List[str] = mapped_field(
default_factory=list
) # .tags will return empty list if not present
title: str = mapped_field(Text(fields={"keyword": Keyword()}), default="")
@classmethod
def _matches(cls, hit: Dict[str, Any]) -> bool:
"""Use Question class for parent documents"""
return bool(hit["_source"]["question_answer"] == "question")
@classmethod
def search(cls, **kwargs: Any) -> Search: # type: ignore[override]
return cls._index.search(**kwargs).filter("term", question_answer="question")
def add_answer(
self,
user: User,
body: str,
created: Optional[datetime] = None,
accepted: bool = False,
commit: Optional[bool] = True,
) -> "Answer":
answer = Answer(
# required make sure the answer is stored in the same shard
_routing=self.meta.id,
# set up the parent/child mapping
question_answer={"name": "answer", "parent": self.meta.id},
# pass in the field values
author=user,
created=created,
body=body,
is_accepted=accepted,
)
if commit:
answer.save()
return answer
def search_answers(self) -> Search:
# search only our index
s = Answer.search()
# filter for answers belonging to us
s = s.filter("parent_id", type="answer", id=self.meta.id)
# add routing to only go to specific shard
s = s.params(routing=self.meta.id)
return s
def get_answers(self) -> List[Any]:
"""
Get answers either from inner_hits already present or by searching
elasticsearch.
"""
if "inner_hits" in self.meta and "answer" in self.meta.inner_hits:
return cast(List[Any], self.meta.inner_hits["answer"].hits)
return [a for a in self.search_answers()]
def save(self, **kwargs: Any) -> None: # type: ignore[override]
self.question_answer = "question"
super().save(**kwargs)
class Answer(Post):
is_accepted: bool = mapped_field(default=False)
@classmethod
def _matches(cls, hit: Dict[str, Any]) -> bool:
"""Use Answer class for child documents with child name 'answer'"""
return (
isinstance(hit["_source"]["question_answer"], dict)
and hit["_source"]["question_answer"].get("name") == "answer"
)
@classmethod
def search(cls, **kwargs: Any) -> Search: # type: ignore[override]
return cls._index.search(**kwargs).exclude("term", question_answer="question")
def get_question(self) -> Optional[Question]:
# cache question in self.meta
# any attributes set on self would be interpreted as fields
if "question" not in self.meta:
self.meta.question = Question.get(
id=self.question_answer.parent, index=self.meta.index
)
return cast(Optional[Question], self.meta.question)
def save(self, **kwargs: Any) -> None: # type: ignore[override]
# set routing to parents id automatically
self.meta.routing = self.question_answer.parent
super().save(**kwargs)
def setup() -> None:
"""Create an IndexTemplate and save it into elasticsearch."""
index_template = Post._index.as_composable_template("base", priority=100)
index_template.save()
def main() -> Answer:
# initiate the default connection to elasticsearch
connections.create_connection(hosts=[os.environ["ELASTICSEARCH_URL"]])
# create index
setup()
# user objects to use
nick = User(
id=47,
signed_up=datetime(2017, 4, 3),
username="fxdgear",
email="[email protected]",
location="Colorado",
)
honza = User(
id=42,
signed_up=datetime(2013, 4, 3),
username="honzakral",
email="[email protected]",
location="Prague",
)
# create a question object
question = Question(
_id=1,
author=nick,
tags=["elasticsearch", "python"],
title="How do I use elasticsearch from Python?",
body="""
I want to use elasticsearch, how do I do it from Python?
""",
)
question.save()
answer = question.add_answer(honza, "Just use `elasticsearch-py`!")
# close the connection
connections.get_connection().close()
return answer
if __name__ == "__main__":
main()