From f122d3466e78e8f7994a5fc87bea8917cbd6098e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 9 Mar 2020 11:28:08 +0100 Subject: [PATCH 01/15] [DOCS] Adds painless transform examples. --- .../transform/painless-examples.asciidoc | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 docs/reference/transform/painless-examples.asciidoc diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc new file mode 100644 index 0000000000000..e2255f5c8f90e --- /dev/null +++ b/docs/reference/transform/painless-examples.asciidoc @@ -0,0 +1,80 @@ +[role="xpack"] +[testenv="basic"] +[[transform-painless-examples]] +=== Painless snippet examples for {transforms} +++++ +Painless examples for {transforms} +++++ + +These examples demonstrate how to use Painless in {transforms}. You can learn +more about Painless scripting language in the <>. The example +snippets below help you being able to infer how to use Painless for your use +case. + +* <> +* <> +* <> + + +[discrete] +[[painless-top-hits]] +==== Getting top hits by using scripted metric + +This example shows how to achieve the function of a +<> by using scripted metric +aggregation which provides a metric output. + +[source,js] +-------------------------------------------------- +"latest_doc": { + "scripted_metric": { + "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1> + "map_script": "def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); if (current_date > state.timestamp_latest) {state.timestamp_latest = current_date;state.last_doc = new HashMap(params['_source']);}", <2> + "combine_script": "return state", <3> + "reduce_script": "def last_doc = '';def timestamp_latest = 0L; for (s in states) {if (s.timestamp_latest > (timestamp_latest)) {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} return last_doc" <4> + } +} +-------------------------------------------------- +// NOTCONSOLE + +<1> Creates a long type `timestamp_latest` and a string type `last_doc` in the +`state` object. +<2> Defines `current_date`based on the timestamp of the document, then compares +`current_date` with `state.timestamp_latest`. Based on the comparison, returns +`state.last_doc` from the shard. +<3> The `combine_script` returns `state` from each shard. +<4> The `reduce_script` iterates through the value of `s.timestamp_latest` +returned by each shard and returns the document with the latest timestamp +(`last_doc`). In the response, the top hit (in other words, the `latest_doc`) is +nested below the `latest_doc` field. + +Check the +{ref}/search-aggregations-metrics-scripted-metric-aggregation.html#_scope_of_scripts[scope of scripts] +for detailed explanation on the respective scripts. + + +You can retrieve the last value in a similar way: + +[source,js] +-------------------------------------------------- +"latest_value": { + "scripted_metric": { + "init_script": "state.timestamp_latest = 0L; state.last_value = ''", + "map_script": "def current_date = doc['date'].getValue().toInstant().toEpochMilli(); if (current_date > state.timestamp_latest) {state.timestamp_latest = current_date;state.last_value = params['_source']['value'];}", + "combine_script": "return state", + "reduce_script": "def last_value = '';def timestamp_latest = 0L; for (s in states) {if (s.timestamp_latest > (timestamp_latest)) {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} return last_value" + } +} +-------------------------------------------------- +// NOTCONSOLE + + +[discrete] +[[painless-time-features]] +==== Getting time features as scripted fields + + +[discrete] +[[painless-group-by]] +==== Using Painless in `group_by` + From db025da8b34936451c93ce4722a0d933f79a8d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 9 Mar 2020 11:31:31 +0100 Subject: [PATCH 02/15] [DOCS] Adds painles examples to index.asciidoc. --- docs/reference/transform/index.asciidoc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/reference/transform/index.asciidoc b/docs/reference/transform/index.asciidoc index 595cbdef56d98..bdf506c6f695b 100644 --- a/docs/reference/transform/index.asciidoc +++ b/docs/reference/transform/index.asciidoc @@ -15,6 +15,7 @@ your data. * <> * <> * <> +* <> * <> * <> @@ -24,5 +25,6 @@ include::checkpoints.asciidoc[] include::api-quickref.asciidoc[] include::ecommerce-tutorial.asciidoc[] include::examples.asciidoc[] +include::painless-examples.asciidoc[] include::troubleshooting.asciidoc[] include::limitations.asciidoc[] \ No newline at end of file From 39b4b66731e346674436790164ac2d517d562d29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 9 Mar 2020 11:41:05 +0100 Subject: [PATCH 03/15] [DOCS] Adds formatting. --- docs/reference/transform/painless-examples.asciidoc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index e2255f5c8f90e..0711b9eea3550 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -7,9 +7,10 @@ ++++ These examples demonstrate how to use Painless in {transforms}. You can learn -more about Painless scripting language in the <>. The example -snippets below help you being able to infer how to use Painless for your use -case. +more about the Painless scripting language in the +https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.html[Painless guide]. +The example snippets below help you being able to infer how to use Painless for +your use case. * <> * <> @@ -39,9 +40,9 @@ aggregation which provides a metric output. <1> Creates a long type `timestamp_latest` and a string type `last_doc` in the `state` object. -<2> Defines `current_date`based on the timestamp of the document, then compares -`current_date` with `state.timestamp_latest`. Based on the comparison, returns -`state.last_doc` from the shard. +<2> Defines `current_date` based on the timestamp of the document, then compares +`current_date` with `state.timestamp_latest`, finally returns `state.last_doc` +from the shard. <3> The `combine_script` returns `state` from each shard. <4> The `reduce_script` iterates through the value of `s.timestamp_latest` returned by each shard and returns the document with the latest timestamp From 67249c1449ce44c9662b967a082eed93a309b386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 9 Mar 2020 16:19:56 +0100 Subject: [PATCH 04/15] [DOCS] Minor adjustments. --- docs/reference/transform/painless-examples.asciidoc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 0711b9eea3550..17ad9d016ba60 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -38,11 +38,11 @@ aggregation which provides a metric output. -------------------------------------------------- // NOTCONSOLE -<1> Creates a long type `timestamp_latest` and a string type `last_doc` in the -`state` object. -<2> Defines `current_date` based on the timestamp of the document, then compares -`current_date` with `state.timestamp_latest`, finally returns `state.last_doc` -from the shard. +<1> The `init_script` creates a long type `timestamp_latest` and a string type +`last_doc` in the `state` object. +<2> The `map_script` defines `current_date` based on the timestamp of the +document, then compares `current_date` with `state.timestamp_latest`, finally +returns `state.last_doc` from the shard. <3> The `combine_script` returns `state` from each shard. <4> The `reduce_script` iterates through the value of `s.timestamp_latest` returned by each shard and returns the document with the latest timestamp From 0b74dd4b12310c521307e55eb49cfeef17610729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 10 Mar 2020 11:48:54 +0100 Subject: [PATCH 05/15] [DOCS] Adds time features as scripted fields example. --- .../transform/painless-examples.asciidoc | 48 ++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 17ad9d016ba60..b7f3230a2b0f0 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -53,7 +53,6 @@ Check the {ref}/search-aggregations-metrics-scripted-metric-aggregation.html#_scope_of_scripts[scope of scripts] for detailed explanation on the respective scripts. - You can retrieve the last value in a similar way: [source,js] @@ -74,6 +73,53 @@ You can retrieve the last value in a similar way: [[painless-time-features]] ==== Getting time features as scripted fields +This example shows how to extract time based features by using Painless. The +snippet uses an index where `@timestamp` is defined as a `date` type field. + +[source,js] +-------------------------------------------------- +"script_fields": { + "hour_of_day": { <1> + "script": { + "lang": "painless", + "source": """ + ZonedDateTime date = doc['@timestamp'].value; <2> + return date.getHour(); <3> + """ + } + }, + "month_of_year": { <4> + "script": { + "lang": "painless", + "source": """ + ZonedDateTime date = doc['@timestamp'].value; <5> + return date.getMonthValue(); <6> + """ + } + }, + "original": { <7> + "script": { + "lang": "painless", + "source": """ + ZonedDateTime date = doc['@timestamp'].value; <8> + return date; <9> + """ + } + } + } + -------------------------------------------------- + // NOTCONSOLE + +<1> Contains the Painless script that returns the hour of the day. +<2> Sets `date` based on the timestamp of the document. +<3> Returns the hour value from `date`. +<4> Contains the Painless script that returns the month of the year. +<5> Sets `date` based on the timestamp of the document. +<6> Returns the month value from `date`. +<7> Contains the Painless script that returns the entire date value. +<8> Sets `date` based on the timestamp of the document. +<9> Returns the full `date` value. + [discrete] [[painless-group-by]] From 4c1e3114d4967c3b73eaf4f819e344fa02d995a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 10 Mar 2020 12:03:52 +0100 Subject: [PATCH 06/15] [DOCS] Fixes code block closure. --- docs/reference/transform/painless-examples.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index b7f3230a2b0f0..345c614116997 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -107,8 +107,8 @@ snippet uses an index where `@timestamp` is defined as a `date` type field. } } } - -------------------------------------------------- - // NOTCONSOLE +-------------------------------------------------- +// NOTCONSOLE <1> Contains the Painless script that returns the hour of the day. <2> Sets `date` based on the timestamp of the document. From 03c2d28a07ba74dc289fe175e2b746b4c76617c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 10 Mar 2020 17:11:43 +0100 Subject: [PATCH 07/15] [DOCS] Minor adjustments. --- .../transform/painless-examples.asciidoc | 24 +++++-------------- 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 345c614116997..5b7ff7d3ad778 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -8,9 +8,7 @@ These examples demonstrate how to use Painless in {transforms}. You can learn more about the Painless scripting language in the -https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.html[Painless guide]. -The example snippets below help you being able to infer how to use Painless for -your use case. +https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.html[Painless guide]. * <> * <> @@ -21,9 +19,10 @@ your use case. [[painless-top-hits]] ==== Getting top hits by using scripted metric -This example shows how to achieve the function of a -<> by using scripted metric -aggregation which provides a metric output. +This example shows how to find the latest document, in other words the document +with the earliest timestamp. From a technical perspective, it helps to achieve +the function of a <> by using +scripted metric aggregation which provides a metric output. [source,js] -------------------------------------------------- @@ -96,15 +95,6 @@ snippet uses an index where `@timestamp` is defined as a `date` type field. return date.getMonthValue(); <6> """ } - }, - "original": { <7> - "script": { - "lang": "painless", - "source": """ - ZonedDateTime date = doc['@timestamp'].value; <8> - return date; <9> - """ - } } } -------------------------------------------------- @@ -116,12 +106,10 @@ snippet uses an index where `@timestamp` is defined as a `date` type field. <4> Contains the Painless script that returns the month of the year. <5> Sets `date` based on the timestamp of the document. <6> Returns the month value from `date`. -<7> Contains the Painless script that returns the entire date value. -<8> Sets `date` based on the timestamp of the document. -<9> Returns the full `date` value. [discrete] [[painless-group-by]] ==== Using Painless in `group_by` + From 318ab7c525cbaed9cc4447e756e6376324924c87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Wed, 11 Mar 2020 15:55:50 +0100 Subject: [PATCH 08/15] [DOCS] Adds group-by example and table. --- .../transform/painless-examples.asciidoc | 132 ++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 5b7ff7d3ad778..119d11be68ddc 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -112,4 +112,136 @@ snippet uses an index where `@timestamp` is defined as a `date` type field. [[painless-group-by]] ==== Using Painless in `group_by` +It is possible to base the `group_by` property of a {transform} on the output of +a script. The following example uses the {kib} sample web logs dataset. The goal +here is to make the {transform} output easier to understand through normalizing +the value of the fields that the data is grouped by. + +[source,console] +-------------------------------------------------- +POST _transform/_preview +{ + "source": { + "index": [ <1> + "kibana_sample_data_logs" + ] + }, + "pivot": { + "group_by": { + "agent": { + "terms": { + "script": { <2> + "source": """String agent = doc['agent.keyword'].value; + if (agent.contains("MSIE")) { + return "internet explorer"; + } else if (agent.contains("AppleWebKit")) { + return "safari"; + } else if (agent.contains('Firefox')) { + return "firefox"; + } else { return agent }""", + "lang": "painless" + } + } + } + }, + "aggregations": { <3> + "200": { + "filter": { + "term": { + "response": "200" + } + } + }, + "404": { + "filter": { + "term": { + "response": "404" + } + } + }, + "503": { + "filter": { + "term": { + "response": "503" + } + } + } + } + }, + "dest": { <4> + "index": "pivot_logs" + } +} +-------------------------------------------------- +// TEST[skip:setup kibana sample data] + +<1> Specifies the source index or indices. +<2> The script defines an `agent` string based on the `agent` field of the +documents, then iterates through the values. If an `agent` field contains +"MSIE", than the script returns "Internet Explorer". If it contains +`AppleWebKit`, it returns "safari". It returns "firefox" if the field value +contains "Firefox". Finally, in every other case, the value of the field is +returned. +<3> The aggregations object contains filters that narrow down the results to +documents that contains `200`, `404`, or `503` values in the `response` field. +<4> Specifies the destination index of the {transform}. + +The API returns the following result: + +[source,js] +-------------------------------------------------- +{ + "preview" : [ + { + "agent" : "firefox", + "200" : 4931, + "404" : 259, + "503" : 172 + }, + { + "agent" : "internet explorer", + "200" : 3674, + "404" : 210, + "503" : 126 + }, + { + "agent" : "safari", + "200" : 4227, + "404" : 332, + "503" : 143 + } + ], + "mappings" : { + "properties" : { + "200" : { + "type" : "long" + }, + "agent" : { + "type" : "keyword" + }, + "404" : { + "type" : "long" + }, + "503" : { + "type" : "long" + } + } + } +} +-------------------------------------------------- +// NOTCONSOLE + +You can see that the `agent` values are simplified so it is easier to interpret +them. The table below shows how normalization modifies the output of the +{transform} in our example compared to the non-normalized values. + +[width="50%"] + +|=== +| Non-normalized `agent` value | Normalized `agent` value + +| "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)" | "internet explorer" +| "Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.50 Safari/534.24" | "safari" +| "Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1" | "firefox" +|=== From dca1e21da89b47a78c76b5192956c2ea5f4e452b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Fri, 13 Mar 2020 16:19:27 +0100 Subject: [PATCH 09/15] [DOCS] Adds code snippet for the bucket script example. --- .../transform/painless-examples.asciidoc | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 119d11be68ddc..d6024a711e507 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -13,6 +13,7 @@ https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.ht * <> * <> * <> +* <> [discrete] @@ -245,3 +246,51 @@ them. The table below shows how normalization modifies the output of the | "Mozilla/5.0 (X11; Linux x86_64; rv:6.0a1) Gecko/20110421 Firefox/6.0a1" | "firefox" |=== + +[discrete] +[[painless-bucket-script]] +==== Getting duration by using bucket script + + + +[source,console] +-------------------------------------------------- +PUT _data_frame/transforms/data_log +{ + "source": { + "index": "kibana_sample_data_logs" + }, + "dest": { + "index": "data-logs-by-client" + }, + "pivot": { + "group_by": { + "machine.os": {"terms": {"field": "machine.os.keyword"}}, + "machine.ip": {"terms": {"field": "clientip"}} + }, + "aggregations": { + "time_frame.lte": { + "max": { + "field": "timestamp" + } + }, + "time_frame.gte": { + "min": { + "field": "timestamp" + } + }, + "time_length": { + "bucket_script": { + "buckets_path": { + "min": "time_frame.gte.value", + "max": "time_frame.lte.value" + }, + "script": "params.max - params.min" + } + } + } + } +} +-------------------------------------------------- +// TEST[skip:setup kibana sample data] + From f8bc975df376c8e3b91c1661831b5df0b0c47d6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 16 Mar 2020 12:59:17 +0100 Subject: [PATCH 10/15] [DOCS] Bucket script pt 2. --- .../transform/painless-examples.asciidoc | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index d6024a711e507..45a33623020d0 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -251,7 +251,10 @@ them. The table below shows how normalization modifies the output of the [[painless-bucket-script]] ==== Getting duration by using bucket script - +This example shows you how to get the duration of a session by client IP from a +data log by using +{ref}/search-aggregations-pipeline-bucket-script-aggregation.html[bucket script]. +The example uses the the {kib} sample web logs dataset. [source,console] -------------------------------------------------- @@ -279,13 +282,13 @@ PUT _data_frame/transforms/data_log "field": "timestamp" } }, - "time_length": { + "time_length": { <1> "bucket_script": { - "buckets_path": { + "buckets_path": { <2> "min": "time_frame.gte.value", "max": "time_frame.lte.value" }, - "script": "params.max - params.min" + "script": "params.max - params.min" <3> } } } @@ -294,3 +297,9 @@ PUT _data_frame/transforms/data_log -------------------------------------------------- // TEST[skip:setup kibana sample data] +<1> To define the length of the sessions, we use a bucket script. +<2> The bucket path is a map of script variables and their associated path to +the buckets you want to use for the variable. In this particular case, `min` and +`max` are variables mapped to `time_frame.gte.value` and `time_frame.lte.value`. +<3> Finally, the script substracts the start date of the session from the end +date which results in the duration of the session. From 1868afec9adb68fe6cfb484c5dccfdff7c39108e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 16 Mar 2020 13:02:17 +0100 Subject: [PATCH 11/15] [DOCS] Minor adjustments. --- docs/reference/transform/painless-examples.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 45a33623020d0..7b89c510f876b 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -254,7 +254,7 @@ them. The table below shows how normalization modifies the output of the This example shows you how to get the duration of a session by client IP from a data log by using {ref}/search-aggregations-pipeline-bucket-script-aggregation.html[bucket script]. -The example uses the the {kib} sample web logs dataset. +The example uses the {kib} sample web logs dataset. [source,console] -------------------------------------------------- From 52fb38c11642bbe7d026b29e574c222fb31e6fcd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Mon, 16 Mar 2020 15:54:39 +0100 Subject: [PATCH 12/15] [DOCS] Adjusts indentation. --- .../transform/painless-examples.asciidoc | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 7b89c510f876b..a99af0e3f72cb 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -1,7 +1,7 @@ [role="xpack"] [testenv="basic"] [[transform-painless-examples]] -=== Painless snippet examples for {transforms} +=== Painless examples for {transforms} ++++ Painless examples for {transforms} ++++ @@ -20,7 +20,7 @@ https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.ht [[painless-top-hits]] ==== Getting top hits by using scripted metric -This example shows how to find the latest document, in other words the document +This snippet shows how to find the latest document, in other words the document with the earliest timestamp. From a technical perspective, it helps to achieve the function of a <> by using scripted metric aggregation which provides a metric output. @@ -30,9 +30,20 @@ scripted metric aggregation which provides a metric output. "latest_doc": { "scripted_metric": { "init_script": "state.timestamp_latest = 0L; state.last_doc = ''", <1> - "map_script": "def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); if (current_date > state.timestamp_latest) {state.timestamp_latest = current_date;state.last_doc = new HashMap(params['_source']);}", <2> + "map_script": """ <2> + def current_date = doc['@timestamp'].getValue().toInstant().toEpochMilli(); + if (current_date > state.timestamp_latest) + {state.timestamp_latest = current_date; + state.last_doc = new HashMap(params['_source']);} + """, "combine_script": "return state", <3> - "reduce_script": "def last_doc = '';def timestamp_latest = 0L; for (s in states) {if (s.timestamp_latest > (timestamp_latest)) {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} return last_doc" <4> + "reduce_script": """ <4> + def last_doc = ''; + def timestamp_latest = 0L; + for (s in states) {if (s.timestamp_latest > (timestamp_latest)) + {timestamp_latest = s.timestamp_latest; last_doc = s.last_doc;}} + return last_doc + """ } } -------------------------------------------------- @@ -60,9 +71,20 @@ You can retrieve the last value in a similar way: "latest_value": { "scripted_metric": { "init_script": "state.timestamp_latest = 0L; state.last_value = ''", - "map_script": "def current_date = doc['date'].getValue().toInstant().toEpochMilli(); if (current_date > state.timestamp_latest) {state.timestamp_latest = current_date;state.last_value = params['_source']['value'];}", + "map_script": """ + def current_date = doc['date'].getValue().toInstant().toEpochMilli(); + if (current_date > state.timestamp_latest) + {state.timestamp_latest = current_date; + state.last_value = params['_source']['value'];} + """, "combine_script": "return state", - "reduce_script": "def last_value = '';def timestamp_latest = 0L; for (s in states) {if (s.timestamp_latest > (timestamp_latest)) {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} return last_value" + "reduce_script": """ + def last_value = ''; + def timestamp_latest = 0L; + for (s in states) {if (s.timestamp_latest > (timestamp_latest)) + {timestamp_latest = s.timestamp_latest; last_value = s.last_value;}} + return last_value + """ } } -------------------------------------------------- @@ -73,7 +95,7 @@ You can retrieve the last value in a similar way: [[painless-time-features]] ==== Getting time features as scripted fields -This example shows how to extract time based features by using Painless. The +This snippet shows how to extract time based features by using Painless. The snippet uses an index where `@timestamp` is defined as a `date` type field. [source,js] From 0be57e30c9aef2654fd06e091b89c162e6e38b45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 17 Mar 2020 09:18:58 +0100 Subject: [PATCH 13/15] Update docs/reference/transform/painless-examples.asciidoc Co-Authored-By: Lisa Cawley --- docs/reference/transform/painless-examples.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index a99af0e3f72cb..853685daf3a76 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -8,7 +8,7 @@ These examples demonstrate how to use Painless in {transforms}. You can learn more about the Painless scripting language in the -https://www.elastic.co/guide/en/elasticsearch/painless/current/painless-guide.html[Painless guide]. +{painless}/painless-guide.html[Painless guide]. * <> * <> From 2f02c2d3e928df58c2dfa5ed0c78eac0eaeded74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 17 Mar 2020 09:19:07 +0100 Subject: [PATCH 14/15] Update docs/reference/transform/painless-examples.asciidoc Co-Authored-By: Lisa Cawley --- docs/reference/transform/painless-examples.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 853685daf3a76..07456ff927596 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -61,7 +61,7 @@ returned by each shard and returns the document with the latest timestamp nested below the `latest_doc` field. Check the -{ref}/search-aggregations-metrics-scripted-metric-aggregation.html#_scope_of_scripts[scope of scripts] +<> for detailed explanation on the respective scripts. You can retrieve the last value in a similar way: From d177dd460cfb0ee87fab93b75777ea76c9c7c728 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Thu, 19 Mar 2020 10:57:41 +0100 Subject: [PATCH 15/15] [DOCS] Explains new HashMap(...) in the first example. --- docs/reference/transform/painless-examples.asciidoc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index a99af0e3f72cb..726b49a51f1ce 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -53,7 +53,9 @@ scripted metric aggregation which provides a metric output. `last_doc` in the `state` object. <2> The `map_script` defines `current_date` based on the timestamp of the document, then compares `current_date` with `state.timestamp_latest`, finally -returns `state.last_doc` from the shard. +returns `state.last_doc` from the shard. By using `new HashMap(...)` we copy the +source document, this is important whenever you want to pass the full source +object from one phase to the next. <3> The `combine_script` returns `state` from each shard. <4> The `reduce_script` iterates through the value of `s.timestamp_latest` returned by each shard and returns the document with the latest timestamp