Skip to content

Commit 4557e72

Browse files
Measure raw event size
With this commit we measure raw event size and add it is a new field in the generated documents. Relates elastic#35
1 parent ede1773 commit 4557e72

12 files changed

+103
-80
lines changed

README.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,14 @@ The track can be run by specifying the following runtime parameters: `--track=ev
1919

2020
Another option is to download the repository and point to it using the `--track-path` command line parameter.
2121

22+
## Track parameters supported by all challenges
23+
24+
Note: In general, track parameters are only defined for a subset of the challenges so please refer to the documentation of the respective challenge for a list of supported track parameters. Only the parameters documented in the table below are guaranteed to work with all challenges as intended.
25+
26+
| Parameter | Explanation | Type | Default Value |
27+
| --------- | ----------- | ---- | ------------- |
28+
| `record_raw_event_size` | Adds a new field `_raw_event_size` to the index which contains the size of the raw logging event in bytes. | `bool` | `False` |
29+
2230
## Available Challenges
2331

2432
### 1) append-no-conflicts

eventdata/challenges/bulk-size-evaluation.json

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,8 @@
138138
"operation": {
139139
"operation-type": "bulk",
140140
"param-source": "elasticlogs_bulk",
141-
"bulk-size": 1000
141+
"bulk-size": 1000,
142+
"record_raw_event_size": {{p_record_raw_event_size}}
142143
},
143144
"warmup-time-period": 300,
144145
"time-period": 600,
@@ -163,7 +164,8 @@
163164
"operation": {
164165
"operation-type": "bulk",
165166
"param-source": "elasticlogs_bulk",
166-
"bulk-size": 100
167+
"bulk-size": 100,
168+
"record_raw_event_size": {{p_record_raw_event_size}}
167169
},
168170
"warmup-time-period": 300,
169171
"time-period": 600,
@@ -188,7 +190,8 @@
188190
"operation": {
189191
"operation-type": "bulk",
190192
"param-source": "elasticlogs_bulk",
191-
"bulk-size": 10
193+
"bulk-size": 10,
194+
"record_raw_event_size": {{p_record_raw_event_size}}
192195
},
193196
"warmup-time-period": 300,
194197
"time-period": 600,

eventdata/challenges/bulk-update.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"id_seq_probability": 0.4,
4949
"id_seq_low_id_bias": false,
5050
"bulk-size": {{ bulk_size | default(1000) }},
51+
"record_raw_event_size": {{p_record_raw_event_size}},
5152
"index": "elasticlogs"
5253
},
5354
"iterations": {{ p_iterations_per_client }},

eventdata/challenges/document_id_benchmark.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"param-source": "elasticlogs_bulk",
4848
"index": "elasticlogs-warmup",
4949
"bulk-size": 1000,
50+
"record_raw_event_size": {{p_record_raw_event_size}},
5051
"id_type": "auto"
5152
},
5253
"iterations": 200,
@@ -103,6 +104,7 @@
103104
"param-source": "elasticlogs_bulk",
104105
"index": "elasticlogs-{{ id['desc'] }}",
105106
"bulk-size": 1000,
107+
"record_raw_event_size": {{p_record_raw_event_size}},
106108
"id_type": "{{ id['type'] }}"
107109
},
108110
"iterations": {{ p_iterations_per_client }},
@@ -182,6 +184,7 @@
182184
"param-source": "elasticlogs_bulk",
183185
"index": "elasticlogs-{{ id['desc'] }}",
184186
"bulk-size": 1000,
187+
"record_raw_event_size": {{p_record_raw_event_size}},
185188
"id_type": "{{ id['type'] }}",
186189
"id_delay_probability": 0.1,
187190
"id_delay_secs": {{ id['delay'] }}

eventdata/challenges/elasticlogs-continuous-index-and-query.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,8 @@
3636
"operation-type": "bulk",
3737
"index": "elasticlogs_q_write",
3838
"param-source": "elasticlogs_bulk",
39-
"bulk-size": {{ p1_bulk_size | default(1000) | int }}
39+
"bulk-size": {{ p1_bulk_size | default(1000) | int }},
40+
"record_raw_event_size": {{p_record_raw_event_size}}
4041
},
4142
"clients": {{ p1_bulk_indexing_clients }},
4243
"meta": {
@@ -71,7 +72,8 @@
7172
"operation-type": "bulk",
7273
"index": "elasticlogs_q_write",
7374
"param-source": "elasticlogs_bulk",
74-
"bulk-size": {{ p2_bulk_size | default(1000) | int }}
75+
"bulk-size": {{ p2_bulk_size | default(1000) | int }},
76+
"record_raw_event_size": {{p_record_raw_event_size}}
7577
},
7678
"target-throughput": {{ p2_ops }},
7779
"clients": {{ p2_bulk_indexing_clients }},

eventdata/challenges/frozen.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@
4141
"operation-type": "bulk",
4242
"param-source": "elasticlogs_bulk",
4343
"index": "elasticlogs",
44-
"bulk-size": 1000
44+
"bulk-size": 1000,
45+
"record_raw_event_size": {{p_record_raw_event_size}}
4546
},
4647
"iterations": {{p_bulk_idx_iterations}},
4748
"clients": 8

eventdata/challenges/large-shard-sizing.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"param-source": "elasticlogs_bulk",
4848
"index": "elasticlogs-auto",
4949
"bulk-size": 1000,
50+
"record_raw_event_size": {{p_record_raw_event_size}},
5051
"id_type": "auto"
5152
},
5253
"iterations": {{ p_ops_per_client }},
@@ -199,6 +200,7 @@
199200
"param-source": "elasticlogs_bulk",
200201
"index": "elasticlogs-{{ id_type }}",
201202
"bulk-size": 1000,
203+
"record_raw_event_size": {{p_record_raw_event_size}},
202204
"id_type": "{{ id_type }}"
203205
},
204206
"iterations": {{ p_ops_per_client }},

eventdata/elasticlogs-index.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212
"@timestamp": {
1313
"type": "date"
1414
},
15+
{%- if record_raw_event_size is defined and record_raw_event_size %}
16+
"_raw_event_size": {
17+
"type": "short"
18+
},
19+
{%- endif %}
1520
"source": {
1621
"type": "keyword",
1722
"ignore_above": 1024

eventdata/operations/generate-historic-data.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,5 +24,6 @@
2424
"daily_index": true,
2525
"starting_point": "2018-05-01:00:00:00",
2626
"acceleration_factor": 12,
27-
"bulk-size": 1000
27+
"bulk-size": 1000,
28+
"record_raw_event_size": {{p_record_raw_event_size}}
2829
}

eventdata/operations/indexing.json

Lines changed: 16 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -10,82 +10,44 @@ rolledover_indices_suffix_separator: used by the `delete_rolledover_index_patter
1010
{% set p_translog_sync = translog_sync | default('request') | lower %}
1111

1212
{% set p_disk_type = disk_type | default('ssd') | lower %}
13+
14+
{% set comma = joiner() %}
15+
{% for bulk_size in [50000, 20000, 10000, 5000, 2000, 1000, 500, 250, 125] %}
16+
{{comma()}}
1317
{
14-
"name": "index-append-50000",
15-
"operation-type": "bulk",
16-
"param-source": "elasticlogs_bulk",
17-
"bulk-size": 50000
18-
},
19-
{
20-
"name": "index-append-20000",
21-
"operation-type": "bulk",
22-
"param-source": "elasticlogs_bulk",
23-
"bulk-size": 20000
24-
},
25-
{
26-
"name": "index-append-10000",
27-
"operation-type": "bulk",
28-
"param-source": "elasticlogs_bulk",
29-
"bulk-size": 10000
30-
},
31-
{
32-
"name": "index-append-5000",
33-
"operation-type": "bulk",
34-
"param-source": "elasticlogs_bulk",
35-
"bulk-size": 5000
36-
},
37-
{
38-
"name": "index-append-2000",
39-
"operation-type": "bulk",
40-
"param-source": "elasticlogs_bulk",
41-
"bulk-size": 2000
42-
},
43-
{
44-
"name": "index-append-1000",
45-
"operation-type": "bulk",
46-
"param-source": "elasticlogs_bulk",
47-
"bulk-size": 1000
48-
},
49-
{
50-
"name": "index-append-500",
51-
"operation-type": "bulk",
52-
"param-source": "elasticlogs_bulk",
53-
"bulk-size": 500
54-
},
55-
{
56-
"name": "index-append-250",
57-
"operation-type": "bulk",
58-
"param-source": "elasticlogs_bulk",
59-
"bulk-size": 250
60-
},
61-
{
62-
"name": "index-append-125",
18+
"name": "index-append-{{bulk_size}}",
6319
"operation-type": "bulk",
6420
"param-source": "elasticlogs_bulk",
65-
"bulk-size": 125
66-
},
21+
"bulk-size": {{bulk_size}},
22+
"record_raw_event_size": {{p_record_raw_event_size}}
23+
}
24+
{% endfor%}
25+
,
6726
{
6827
"name": "index-append-1000-shard-sizing",
6928
"operation-type": "bulk",
7029
"param-source": "elasticlogs_bulk",
7130
"index": "elasticlogs",
7231
"starting_point": "2017-01-01:02:00:00",
7332
"end_point": "2017-01-01:12:00:00",
74-
"bulk-size": 1000
33+
"bulk-size": 1000,
34+
"record_raw_event_size": {{p_record_raw_event_size}}
7535
},
7636
{
7737
"name": "index-append-1000-elasticlogs_q_write",
7838
"operation-type": "bulk",
7939
"param-source": "elasticlogs_bulk",
8040
"index": "elasticlogs_q_write",
81-
"bulk-size": 1000
41+
"bulk-size": 1000,
42+
"record_raw_event_size": {{p_record_raw_event_size}}
8243
},
8344
{
8445
"name": "index-append-1000-elasticlogs_i_write",
8546
"operation-type": "bulk",
8647
"param-source": "elasticlogs_bulk",
8748
"index": "elasticlogs_i_write",
88-
"bulk-size": 1000
49+
"bulk-size": 1000,
50+
"record_raw_event_size": {{p_record_raw_event_size}}
8951
},
9052
{
9153
"name": "create_elasticlogs_q_write",

eventdata/parameter_sources/randomevent.py

Lines changed: 53 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@ def __init__(self, params):
260260
for d in params['delete_fields']:
261261
self._delete_fields.append(d.split('.'))
262262

263+
self.record_raw_event_size = params.get("record_raw_event_size", False)
264+
263265
def generate_event(self):
264266
timestruct = self._timestamp_generator.generate_timestamp_struct()
265267
index_name = self.__generate_index_pattern(timestruct)
@@ -278,25 +280,57 @@ def generate_event(self):
278280
# set host name
279281
event["hostname"] = "web-{}-{}.elastic.co".format(event["geoip_continent_code"],random.randrange(1,3))
280282

281-
line = '{"@timestamp": "%s", ' \
282-
'"offset":%s, ' \
283-
'"source":"/usr/local/var/log/nginx/access.log","fileset":{"module":"nginx","name":"access"},"input":{"type":"log"},' \
284-
'"beat":{"version":"6.3.0","hostname":"%s","name":"%s"},' \
285-
'"prospector":{"type":"log"},' \
286-
'"nginx":{"access":{"user_name": "-",' \
287-
'"agent":"%s","user_agent": {"major": "%s","os": "%s","os_major": "%s","name": "%s","os_name": "%s","device": "%s"},' \
288-
'"remote_ip": "%s","remote_ip_list":["%s"],' \
289-
'"geoip":{"continent_name": "%s","city_name": "%s","country_name": "%s","country_iso_code": "%s","location":{"lat": %s,"lon": %s} },' \
290-
'"referrer":"%s",' \
291-
'"url": "%s","body_sent":{"bytes": %s},"method":"%s","response_code":%s,"http_version":"%s"} } }' % \
292-
(event["@timestamp"],
293-
event["offset"],
294-
event["hostname"],event["hostname"],
295-
event["agent"], event["useragent_major"], event["useragent_os"], event["useragent_os_major"], event["useragent_name"], event["useragent_os_name"], event["useragent_device"],
296-
event["clientip"], event["clientip"],
297-
event["geoip_continent_name"], event["geoip_city_name"], event["geoip_country_name"], event["geoip_country_iso_code"], event["geoip_location_lat"], event["geoip_location_lon"],
298-
event["referrer"],
299-
event["request"], event["bytes"], event["verb"], event["response"], event["httpversion"])
283+
# determine the raw event size (as if this were contained in nginx log file. We do not bother to
284+
# reformat the timestamp as this is not worth the overhead.
285+
if self.record_raw_event_size:
286+
raw_event = '%s - - [%s] "%s %s HTTP/%s" %s %s "%s" "%s"' % (event["clientip"], event["@timestamp"],
287+
event["verb"], event["request"],
288+
event["httpversion"], event["response"],
289+
event["bytes"], event["referrer"],
290+
event["agent"])
291+
# we are on the hot code path here and thus we want to avoid conditionally creating strings so we duplicate
292+
# the event.
293+
line = '{"@timestamp": "%s", ' \
294+
'"_raw_event_size":%d, ' \
295+
'"offset":%s, ' \
296+
'"source":"/usr/local/var/log/nginx/access.log","fileset":{"module":"nginx","name":"access"},"input":{"type":"log"},' \
297+
'"beat":{"version":"6.3.0","hostname":"%s","name":"%s"},' \
298+
'"prospector":{"type":"log"},' \
299+
'"nginx":{"access":{"user_name": "-",' \
300+
'"agent":"%s","user_agent": {"major": "%s","os": "%s","os_major": "%s","name": "%s","os_name": "%s","device": "%s"},' \
301+
'"remote_ip": "%s","remote_ip_list":["%s"],' \
302+
'"geoip":{"continent_name": "%s","city_name": "%s","country_name": "%s","country_iso_code": "%s","location":{"lat": %s,"lon": %s} },' \
303+
'"referrer":"%s",' \
304+
'"url": "%s","body_sent":{"bytes": %s},"method":"%s","response_code":%s,"http_version":"%s"} } }' % \
305+
(event["@timestamp"],
306+
len(raw_event),
307+
event["offset"],
308+
event["hostname"],event["hostname"],
309+
event["agent"], event["useragent_major"], event["useragent_os"], event["useragent_os_major"], event["useragent_name"], event["useragent_os_name"], event["useragent_device"],
310+
event["clientip"], event["clientip"],
311+
event["geoip_continent_name"], event["geoip_city_name"], event["geoip_country_name"], event["geoip_country_iso_code"], event["geoip_location_lat"], event["geoip_location_lon"],
312+
event["referrer"],
313+
event["request"], event["bytes"], event["verb"], event["response"], event["httpversion"])
314+
else:
315+
line = '{"@timestamp": "%s", ' \
316+
'"offset":%s, ' \
317+
'"source":"/usr/local/var/log/nginx/access.log","fileset":{"module":"nginx","name":"access"},"input":{"type":"log"},' \
318+
'"beat":{"version":"6.3.0","hostname":"%s","name":"%s"},' \
319+
'"prospector":{"type":"log"},' \
320+
'"nginx":{"access":{"user_name": "-",' \
321+
'"agent":"%s","user_agent": {"major": "%s","os": "%s","os_major": "%s","name": "%s","os_name": "%s","device": "%s"},' \
322+
'"remote_ip": "%s","remote_ip_list":["%s"],' \
323+
'"geoip":{"continent_name": "%s","city_name": "%s","country_name": "%s","country_iso_code": "%s","location":{"lat": %s,"lon": %s} },' \
324+
'"referrer":"%s",' \
325+
'"url": "%s","body_sent":{"bytes": %s},"method":"%s","response_code":%s,"http_version":"%s"} } }' % \
326+
(event["@timestamp"],
327+
event["offset"],
328+
event["hostname"],event["hostname"],
329+
event["agent"], event["useragent_major"], event["useragent_os"], event["useragent_os_major"], event["useragent_name"], event["useragent_os_name"], event["useragent_device"],
330+
event["clientip"], event["clientip"],
331+
event["geoip_continent_name"], event["geoip_city_name"], event["geoip_country_name"], event["geoip_country_iso_code"], event["geoip_location_lat"], event["geoip_location_lon"],
332+
event["referrer"],
333+
event["request"], event["bytes"], event["verb"], event["response"], event["httpversion"])
300334

301335
return line, index_name, self._type
302336

eventdata/track.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
{% import "rally.helpers" as rally with context %}
22

33
{% set p_bulk_indexing_clients = (bulk_indexing_clients | default(8)) %}
4+
{% set p_record_raw_event_size = record_raw_event_size | default(False) | tojson %}
45

56
{
67
"version": 2,

0 commit comments

Comments
 (0)