@@ -78,3 +78,103 @@ include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification
78
78
`prediction_field_type`::
79
79
(Optional, string)
80
80
include::{es-repo-dir}/ml/ml-shared.asciidoc[tag=inference-config-classification-prediction-field-type]
81
+
82
+
83
+ [[inference-bucket-agg-example]]
84
+ ==== Example
85
+
86
+ The following snippet aggregates a web log by `client_ip` and extracts a number
87
+ of features via metric and bucket sub-aggregations as input to the {infer}
88
+ aggregation configured with a model trained to identify suspicious client IPs:
89
+
90
+ [source,console]
91
+ -------------------------------------------------
92
+ GET kibana_sample_data_logs/_search
93
+ {
94
+ "size": 0,
95
+ "aggs": {
96
+ "client_ip": { <1>
97
+ "composite": {
98
+ "sources": [
99
+ {
100
+ "client_ip": {
101
+ "terms": {
102
+ "field": "clientip"
103
+ }
104
+ }
105
+ }
106
+ ]
107
+ },
108
+ "aggs": { <2>
109
+ "url_dc": {
110
+ "cardinality": {
111
+ "field": "url.keyword"
112
+ }
113
+ },
114
+ "bytes_sum": {
115
+ "sum": {
116
+ "field": "bytes"
117
+ }
118
+ },
119
+ "geo_src_dc": {
120
+ "cardinality": {
121
+ "field": "geo.src"
122
+ }
123
+ },
124
+ "geo_dest_dc": {
125
+ "cardinality": {
126
+ "field": "geo.dest"
127
+ }
128
+ },
129
+ "responses_total": {
130
+ "value_count": {
131
+ "field": "timestamp"
132
+ }
133
+ },
134
+ "success": {
135
+ "filter": {
136
+ "term": {
137
+ "response": "200"
138
+ }
139
+ }
140
+ },
141
+ "error404": {
142
+ "filter": {
143
+ "term": {
144
+ "response": "404"
145
+ }
146
+ }
147
+ },
148
+ "error503": {
149
+ "filter": {
150
+ "term": {
151
+ "response": "503"
152
+ }
153
+ }
154
+ },
155
+ "malicious_client_ip": { <3>
156
+ "inference": {
157
+ "model_id": "malicious_clients_model",
158
+ "buckets_path": {
159
+ "response_count": "responses_total",
160
+ "url_dc": "url_dc",
161
+ "bytes_sum": "bytes_sum",
162
+ "geo_src_dc": "geo_src_dc",
163
+ "geo_dest_dc": "geo_dest_dc",
164
+ "success": "success._count",
165
+ "error404": "error404._count",
166
+ "error503": "error503._count"
167
+ }
168
+ }
169
+ }
170
+ }
171
+ }
172
+ }
173
+ }
174
+ -------------------------------------------------
175
+ // TEST[skip:setup kibana sample data]
176
+
177
+ <1> A composite bucket aggregation that aggregates the data by `client_ip`.
178
+ <2> A series of metrics and bucket sub-aggregations.
179
+ <3> {infer-cap} bucket aggregation that contains the model ID and maps the
180
+ aggregation names to the model's input fields.
0 commit comments