Skip to content

Commit f07365f

Browse files
authored
Added epoch_md5 document ID type (elastic#28)
* Added epoch_md5 document id type * Updated README and added SHA1 id to the document id benchmark * Refactoring of document id challenge following review * Updated note about disk space requirements * Updated note about index size and disk space requirements * Updates following review
1 parent aa3f097 commit f07365f

File tree

6 files changed

+682
-137
lines changed

6 files changed

+682
-137
lines changed

README.md

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,34 @@ The table below shows the track parameters that can be adjusted along with defau
188188
| --------- | ----------- | ---- | ------------- |
189189
| `bulk_indexing_clients` | Number of bulk indexing clients/connections | `int` | `32` |
190190

191+
### 10) document_id_evaluation
192+
193+
This challenge examines the indexing throughput as a function of shard size as well as the resulting storage requirements for a set of different types of document IDs. For each document ID type, it indexes 200 million documents into a single-shard index, which should be about 40GB in size. Once all data has been indexed, index statistics are recorded before and after a forcemerge down to a single segment.
194+
195+
The following document id types are benchmarked:
196+
197+
`auto` - This test uses document IDs autogenerated by Elasticsearch. This allows Elasticsearch to optimize for indexing speed as the operation can not be an update.
198+
199+
`uuid` - This test uses a UUID4 as document ID. This is largely random in nature and we have removed `-` characters that never change from it to make it a bit shorter.
200+
201+
`sha1` - This test uses a SHA1 hash formatted as a hexadecimal string as document ID.
202+
203+
`epoch_uuid` - This test uses an UUID string prefixed by the hexadecimal representation of an epoch timestamp. This makes identifiers largely ordered over time, which can have a positive impact on indexing throughput.
204+
205+
`epoch_md5` - This test uses an base64 encoded MD5 hash prefixed by the hexadecimal representation of an epoch timestamp. This makes identifiers largely ordered over time, which can have a positive impact on indexing throughput.
206+
207+
`epoch_md5-10pct/60s` - This test uses the `epoch_md5` identifier described above, but simulates a portion of events arriving delayed by setting the timestamp to 60s (1 minute) in the past for 10% of events.
208+
209+
`epoch_md5-10pct/300s` - This test uses the `epoch_md5` identifier described above, but simulates a portion of events arriving delayed by setting the timestamp to 300s (5 minutes) in the past for 10% of events.
210+
211+
Note that this challenge will generate up to ~300GB of data on disk and will require additional space for merging and overhead. Make sure around 400GB of disk space is available before running this to be on the safe side.
212+
213+
The table below shows the track parameters that can be adjusted along with default values:
214+
215+
| Parameter | Explanation | Type | Default Value |
216+
| --------- | ----------- | ---- | ------------- |
217+
| `bulk_indexing_clients` | Number of bulk indexing clients/connections | `int` | `20` |
218+
191219
## Custom parameter sources
192220

193221
### elasticlogs\_bulk\_source
Lines changed: 92 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -1,222 +1,182 @@
11
{% set p_bulk_indexing_clients = (bulk_indexing_clients | default(20)) %}
2-
{% set p_iterations = bulk_indexing_iterations | default(100000) %}
2+
{% set p_iterations = bulk_indexing_iterations | default(200000) %}
33
{% set p_iterations_per_client = (p_iterations / p_bulk_indexing_clients) | int %}
44

55
{
66
"name": "document_id_evaluation",
7-
"description": "Index about 20GB of data into five single shard indices using different document ID types. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.",
7+
"description": "Indexes about 40GB of data into seven single shard indices using different document ID types. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.",
88
"meta": {
99
"client_count": {{ p_bulk_indexing_clients }},
1010
"benchmark_type": "document-id-evaluation"
1111
},
1212
"schedule": [
13+
{% for id in [{ 'type': 'auto', 'desc': 'auto' },
14+
{ 'type': 'uuid', 'desc': 'uuid' },
15+
{ 'type': 'sha1', 'desc': 'sha1' },
16+
{ 'type': 'epoch_uuid', 'desc': 'epoch_uuid' },
17+
{ 'type': 'epoch_md5', 'desc': 'epoch_md5'} ] %}
1318
{
14-
"name": "deleteindex_elasticlogs-auto",
19+
"name": "deleteindex_elasticlogs-{{ id['desc'] }}",
1520
"operation": {
1621
"operation-type": "delete-index",
17-
"index": "elasticlogs-auto"
18-
}
19-
},
20-
{
21-
"name": "deleteindex_elasticlogs-uuid",
22-
"operation": {
23-
"operation-type": "delete-index",
24-
"index": "elasticlogs-uuid"
25-
}
26-
},
27-
{
28-
"name": "deleteindex_elasticlogs-epoch-no_delay",
29-
"operation": {
30-
"operation-type": "delete-index",
31-
"index": "elasticlogs-epoch-no_delay"
32-
}
33-
},
34-
{
35-
"name": "deleteindex_elasticlogs-epoch-10pct_60s",
36-
"operation": {
37-
"operation-type": "delete-index",
38-
"index": "elasticlogs-epoch-10pct_60s"
39-
}
40-
},
41-
{
42-
"name": "deleteindex_elasticlogs-epoch-10pct_300s",
43-
"operation": {
44-
"operation-type": "delete-index",
45-
"index": "elasticlogs-epoch-10pct_300s"
46-
}
22+
"index": "elasticlogs-{{ id['desc'] }}"
23+
},
24+
"include-in-reporting": false
4725
},
4826
{
49-
"name": "create_elasticlogs-auto",
27+
"name": "create_elasticlogs-{{ id['desc'] }}",
5028
"operation": {
5129
"operation-type": "createindex",
52-
"index_name": "elasticlogs-auto",
30+
"index_name": "elasticlogs-{{ id['desc'] }}",
5331
"index_template_body": {
54-
"template": "elasticlogs-auto",
32+
"template": "elasticlogs-{{ id['desc'] }}",
5533
"settings": {
5634
"index.refresh_interval": "5s",
5735
"index.codec": "best_compression",
36+
"index.translog.retention.size": "10mb",
5837
"index.number_of_replicas": 0,
5938
"index.number_of_shards": 1
6039
},
6140
"mappings": "mappings.json",
6241
"aliases": {}
6342
},
64-
"index_template_name": "elasticlogs-auto"
65-
}
43+
"index_template_name": "elasticlogs-{{ id['desc'] }}"
44+
},
45+
"include-in-reporting": false
6646
},
6747
{
68-
"name": "index-append-1000-elasticlogs-auto",
48+
"name": "index-append-1000-elasticlogs-{{ id['desc'] }}",
6949
"operation": {
7050
"operation-type": "bulk",
7151
"param-source": "elasticlogs_bulk",
72-
"index": "elasticlogs-auto",
73-
"bulk-size": 1000
52+
"index": "elasticlogs-{{ id['desc'] }}",
53+
"bulk-size": 1000,
54+
"id_type": "{{ id['type'] }}"
7455
},
7556
"iterations": {{ p_iterations_per_client }},
7657
"clients": {{ p_bulk_indexing_clients }},
7758
"meta": {
78-
"id_mode": "auto"
59+
"id_mode": "{{ id['desc'] }}"
7960
}
8061
},
8162
{
82-
"name": "create_elasticlogs-uuid",
63+
"name": "indicesstats-elasticlogs-{{ id['desc'] }}",
8364
"operation": {
84-
"operation-type": "createindex",
85-
"index_name": "elasticlogs-uuid",
86-
"index_template_body": {
87-
"template": "elasticlogs-uuid",
88-
"settings": {
89-
"index.refresh_interval": "5s",
90-
"index.codec": "best_compression",
91-
"index.number_of_replicas": 0,
92-
"index.number_of_shards": 1
93-
},
94-
"mappings": "mappings.json",
95-
"aliases": {}
96-
},
97-
"index_template_name": "elasticlogs-uuid"
98-
}
99-
},
100-
{
101-
"name": "index-append-1000-elasticlogs-uuid",
102-
"operation": {
103-
"operation-type": "bulk",
104-
"param-source": "elasticlogs_bulk",
105-
"index": "elasticlogs-uuid",
106-
"bulk-size": 1000,
107-
"id_type": "uuid"
65+
"operation-type": "indicesstats",
66+
"index_pattern": "elasticlogs-{{ id['desc'] }}"
10867
},
109-
"iterations": {{ p_iterations_per_client }},
110-
"clients": {{ p_bulk_indexing_clients }},
11168
"meta": {
112-
"id_mode": "uuid"
69+
"forcemerged": "no",
70+
"id_mode": "{{ id['desc'] }}"
11371
}
11472
},
73+
11574
{
116-
"name": "create_elasticlogs-epoch-no_delay",
75+
"name": "force-merge-{{ id['desc'] }}",
11776
"operation": {
118-
"operation-type": "createindex",
119-
"index_name": "elasticlogs-epoch-no_delay",
120-
"index_template_body": {
121-
"template": "elasticlogs-epoch-no_delay",
122-
"settings": {
123-
"index.refresh_interval": "5s",
124-
"index.codec": "best_compression",
125-
"index.number_of_replicas": 0,
126-
"index.number_of_shards": 1
127-
},
128-
"mappings": "mappings.json",
129-
"aliases": {}
130-
},
131-
"index_template_name": "elasticlogs-epoch-no_delay"
132-
}
77+
"operation-type": "force-merge",
78+
"max-num-segments": 1
79+
},
80+
"clients": 1
13381
},
134-
{
135-
"name": "index-append-1000-elasticlogs-epoch-no_delay",
82+
{
83+
"name": "indicesstats-elasticlogs-fm-{{ id['desc'] }}",
13684
"operation": {
137-
"operation-type": "bulk",
138-
"param-source": "elasticlogs_bulk",
139-
"index": "elasticlogs-epoch-no_delay",
140-
"bulk-size": 1000,
141-
"id_type": "epoch_uuid"
85+
"operation-type": "indicesstats",
86+
"index_pattern": "elasticlogs-{{ id['desc'] }}"
14287
},
143-
"iterations": {{ p_iterations_per_client }},
144-
"clients": {{ p_bulk_indexing_clients }},
14588
"meta": {
146-
"id_mode": "epoch_uuid-no_delay"
89+
"forcemerged": "yes",
90+
"id_mode": "{{ id['desc'] }}"
14791
}
14892
},
93+
{% endfor %}
94+
{% for id in [{ 'type': 'epoch_md5', 'desc': 'epoch_md5-10pct_60s', 'delay': 60 },
95+
{ 'type': 'epoch_md5', 'desc': 'epoch_md5-10pct_300s', 'delay': 300 }] %}
96+
{
97+
"name": "deleteindex_elasticlogs-{{ id['desc'] }}",
98+
"operation": {
99+
"operation-type": "delete-index",
100+
"index": "elasticlogs-{{ id['desc'] }}"
101+
},
102+
"include-in-reporting": false
103+
},
149104
{
150-
"name": "create_elasticlogs-epoch-10pct_60s",
105+
"name": "create_elasticlogs-{{ id['desc'] }}",
151106
"operation": {
152107
"operation-type": "createindex",
153-
"index_name": "elasticlogs-epoch-10pct_60s",
108+
"index_name": "elasticlogs-{{ id['desc'] }}",
154109
"index_template_body": {
155-
"template": "elasticlogs-epoch-10pct_60s",
110+
"template": "elasticlogs-{{ id['desc'] }}",
156111
"settings": {
157112
"index.refresh_interval": "5s",
158113
"index.codec": "best_compression",
114+
"index.translog.retention.size": "10mb",
159115
"index.number_of_replicas": 0,
160116
"index.number_of_shards": 1
161117
},
162118
"mappings": "mappings.json",
163119
"aliases": {}
164120
},
165-
"index_template_name": "elasticlogs-epoch-10pct_60s"
166-
}
121+
"index_template_name": "elasticlogs-{{ id['desc'] }}"
122+
},
123+
"include-in-reporting": false
167124
},
168125
{
169-
"name": "index-append-1000-elasticlogs-epoch-10pct_60s",
126+
"name": "index-append-1000-elasticlogs-{{ id['desc'] }}",
170127
"operation": {
171128
"operation-type": "bulk",
172129
"param-source": "elasticlogs_bulk",
173-
"index": "elasticlogs-epoch-10pct_60s",
130+
"index": "elasticlogs-{{ id['desc'] }}",
174131
"bulk-size": 1000,
175-
"id_type": "epoch_uuid",
132+
"id_type": "{{ id['type'] }}",
176133
"id_delay_probability": 0.1,
177-
"id_delay_secs": 60
134+
"id_delay_secs": {{ id['delay'] }}
178135
},
179136
"iterations": {{ p_iterations_per_client }},
180137
"clients": {{ p_bulk_indexing_clients }},
181138
"meta": {
182-
"id_mode": "epoch_uuid-10pct/60s"
139+
"id_mode": "{{ id['desc'] }}"
183140
}
184141
},
185142
{
186-
"name": "create_elasticlogs-epoch-10pct_300s",
143+
"name": "indicesstats-elasticlogs-{{ id['desc'] }}",
187144
"operation": {
188-
"operation-type": "createindex",
189-
"index_name": "elasticlogs-epoch-10pct_300s",
190-
"index_template_body": {
191-
"template": "elasticlogs-epoch-10pct_300s",
192-
"settings": {
193-
"index.refresh_interval": "5s",
194-
"index.codec": "best_compression",
195-
"index.number_of_replicas": 0,
196-
"index.number_of_shards": 1
197-
},
198-
"mappings": "mappings.json",
199-
"aliases": {}
200-
},
201-
"index_template_name": "elasticlogs-epoch-10pct_300s"
145+
"operation-type": "indicesstats",
146+
"index_pattern": "elasticlogs-{{ id['desc'] }}"
147+
},
148+
"meta": {
149+
"forcemerged": "no",
150+
"id_mode": "{{ id['desc'] }}"
202151
}
203152
},
153+
204154
{
205-
"name": "index-append-1000-elasticlogs-epoch-10pct_300s",
155+
"name": "force-merge-{{ id['desc'] }}",
206156
"operation": {
207-
"operation-type": "bulk",
208-
"param-source": "elasticlogs_bulk",
209-
"index": "elasticlogs-epoch-10pct_300s",
210-
"bulk-size": 1000,
211-
"id_type": "epoch_uuid",
212-
"id_delay_probability": 0.1,
213-
"id_delay_secs": 300
157+
"operation-type": "force-merge",
158+
"max-num-segments": 1
159+
},
160+
"clients": 1
161+
},
162+
{
163+
"name": "indicesstats-elasticlogs-fm-{{ id['desc'] }}",
164+
"operation": {
165+
"operation-type": "indicesstats",
166+
"index_pattern": "elasticlogs-{{ id['desc'] }}"
214167
},
215-
"iterations": {{ p_iterations_per_client }},
216-
"clients": {{ p_bulk_indexing_clients }},
217168
"meta": {
218-
"id_mode": "epoch_uuid-10pct/300s"
169+
"forcemerged": "yes",
170+
"id_mode": "{{ id['desc'] }}"
219171
}
172+
},
173+
{% endfor %}
174+
{
175+
"name": "refresh-final",
176+
"operation": "refresh",
177+
"iterations": 1,
178+
"clients": 1,
179+
"include-in-reporting": false
220180
}
221181
]
222182
}

0 commit comments

Comments
 (0)