From 3c0288ee987578dc8520e5ea15a02ed60b22dd9a Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 22 Jul 2016 18:51:36 -0400 Subject: [PATCH] Consolify term and phrase suggester docs This includes a working example of reverse filters to support correcting prefix errors. --- docs/build.gradle | 7 + docs/reference/indices/flush.asciidoc | 10 +- docs/reference/search/suggesters.asciidoc | 237 +++++------------- .../search/suggesters/phrase-suggest.asciidoc | 236 +++++++++-------- docs/reference/search/uri-request.asciidoc | 8 +- 5 files changed, 209 insertions(+), 289 deletions(-) diff --git a/docs/build.gradle b/docs/build.gradle index 26560ce064a4f..5459c9a754f24 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -63,6 +63,13 @@ buildRestTests.docs = fileTree(projectDir) { Closure setupTwitter = { String name, int count -> buildRestTests.setups[name] = ''' + - do: + indices.create: + index: twitter + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 - do: bulk: index: twitter diff --git a/docs/reference/indices/flush.asciidoc b/docs/reference/indices/flush.asciidoc index acf2b7c1a6ad1..5864c16d4c1d6 100644 --- a/docs/reference/indices/flush.asciidoc +++ b/docs/reference/indices/flush.asciidoc @@ -153,18 +153,18 @@ sync-flushed: -------------------------------------------------- { "_shards": { - "total": 10, - "successful": 10, + "total": 2, + "successful": 2, "failed": 0 }, "twitter": { - "total": 10, - "successful": 10, + "total": 2, + "successful": 2, "failed": 0 } } -------------------------------------------------- -// TESTRESPONSE[s/"successful": 10/"successful": 5/] +// TESTRESPONSE[s/"successful": 2/"successful": 1/] Here is what it looks like when one shard group failed due to pending operations: diff --git a/docs/reference/search/suggesters.asciidoc b/docs/reference/search/suggesters.asciidoc index 71a62262338c7..2da4a6073074a 100644 --- a/docs/reference/search/suggesters.asciidoc +++ b/docs/reference/search/suggesters.asciidoc @@ -10,15 +10,25 @@ The suggest request part is either defined alongside the query part in a [source,js] -------------------------------------------------- -curl -s -XPOST 'localhost:9200/_search' -d '{ +POST twitter/_search +{ "query" : { - ... + "match": { + "message": "tring out Elasticsearch" + } }, "suggest" : { - ... + "my-suggestion" : { + "text" : "trying out Elasticsearch", + "term" : { + "field" : "message" + } + } } -}' +} -------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] Suggest requests executed against the `_suggest` endpoint should omit the surrounding `suggest` element which is only used if the suggest @@ -26,15 +36,18 @@ request is part of a search. [source,js] -------------------------------------------------- -curl -XPOST 'localhost:9200/_suggest' -d '{ +POST _suggest +{ "my-suggestion" : { - "text" : "the amsterdma meetpu", + "text" : "tring out Elasticsearch", "term" : { - "field" : "body" + "field" : "message" } } -}' +} -------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] Several suggestions can be specified per request. Each suggestion is identified with an arbitrary name. In the example below two suggestions @@ -43,21 +56,24 @@ the `term` suggester, but have a different `text`. [source,js] -------------------------------------------------- -"suggest" : { +POST _suggest +{ "my-suggest-1" : { - "text" : "the amsterdma meetpu", + "text" : "tring out Elasticsearch", "term" : { - "field" : "body" + "field" : "message" } }, "my-suggest-2" : { - "text" : "the rottredam meetpu", + "text" : "kmichy", "term" : { - "field" : "title" + "field" : "user" } } } -------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] The below suggest response example includes the suggestion response for `my-suggest-1` and `my-suggest-2`. Each suggestion part contains @@ -68,44 +84,35 @@ in the suggest text and if found an arbitrary number of options. [source,js] -------------------------------------------------- { - ... - "suggest": { - "my-suggest-1": [ - { - "text" : "amsterdma", - "offset": 4, - "length": 9, - "options": [ - ... - ] - }, - ... - ], - "my-suggest-2" : [ - ... - ] - } - ... + "_shards": ... + "my-suggest-1": [ { + "text": "tring", + "offset": 0, + "length": 5, + "options": [ {"text": "trying", "score": 0.8, "freq": 1 } ] + }, { + "text": "out", + "offset": 6, + "length": 3, + "options": [] + }, { + "text": "elasticsearch", + "offset": 10, + "length": 13, + "options": [] + } ], + "my-suggest-2": ... } -------------------------------------------------- +// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/] +// TESTRESPONSE[s/"my-suggest-2": \.\.\./"my-suggest-2": "$body.my-suggest-2"/] + Each options array contains an option object that includes the suggested text, its document frequency and score compared to the suggest entry text. The meaning of the score depends on the used suggester. The term suggester's score is based on the edit distance. -[source,js] --------------------------------------------------- -"options": [ - { - "text": "amsterdam", - "freq": 77, - "score": 0.8888889 - }, - ... -] --------------------------------------------------- - [float] [[global-suggest]] === Global suggest text @@ -116,157 +123,27 @@ and applies to the `my-suggest-1` and `my-suggest-2` suggestions. [source,js] -------------------------------------------------- -"suggest" : { - "text" : "the amsterdma meetpu", +POST _suggest +{ + "text" : "tring out Elasticsearch", "my-suggest-1" : { "term" : { - "field" : "title" + "field" : "message" } }, "my-suggest-2" : { "term" : { - "field" : "body" + "field" : "user" } } } -------------------------------------------------- +// CONSOLE The suggest text can in the above example also be specified as suggestion specific option. The suggest text specified on suggestion level override the suggest text on the global level. -[float] -=== Other suggest example - -In the below example we request suggestions for the following suggest -text: `devloping distibutd saerch engies` on the `title` field with a -maximum of 3 suggestions per term inside the suggest text. Note that in -this example we set `size` to `0`. This isn't required, but a -nice optimization. The suggestions are gathered in the `query` phase and -in the case that we only care about suggestions (so no hits) we don't -need to execute the `fetch` phase. - -[source,js] --------------------------------------------------- -curl -s -XPOST 'localhost:9200/_search' -d '{ - "size": 0, - "suggest" : { - "my-title-suggestions-1" : { - "text" : "devloping distibutd saerch engies", - "term" : { - "size" : 3, - "field" : "title" - } - } - } -}' --------------------------------------------------- - -The above request could yield the response as stated in the code example -below. As you can see if we take the first suggested options of each -suggestion entry we get `developing distributed search engines` as -result. - -[source,js] --------------------------------------------------- -{ - ... - "suggest": { - "my-title-suggestions-1": [ - { - "text": "devloping", - "offset": 0, - "length": 9, - "options": [ - { - "text": "developing", - "freq": 77, - "score": 0.8888889 - }, - { - "text": "deloping", - "freq": 1, - "score": 0.875 - }, - { - "text": "deploying", - "freq": 2, - "score": 0.7777778 - } - ] - }, - { - "text": "distibutd", - "offset": 10, - "length": 9, - "options": [ - { - "text": "distributed", - "freq": 217, - "score": 0.7777778 - }, - { - "text": "disributed", - "freq": 1, - "score": 0.7777778 - }, - { - "text": "distribute", - "freq": 1, - "score": 0.7777778 - } - ] - }, - { - "text": "saerch", - "offset": 20, - "length": 6, - "options": [ - { - "text": "search", - "freq": 1038, - "score": 0.8333333 - }, - { - "text": "smerch", - "freq": 3, - "score": 0.8333333 - }, - { - "text": "serch", - "freq": 2, - "score": 0.8 - } - ] - }, - { - "text": "engies", - "offset": 27, - "length": 6, - "options": [ - { - "text": "engines", - "freq": 568, - "score": 0.8333333 - }, - { - "text": "engles", - "freq": 3, - "score": 0.8333333 - }, - { - "text": "eggies", - "freq": 1, - "score": 0.8333333 - } - ] - } - ] - } - ... -} --------------------------------------------------- - include::suggesters/term-suggest.asciidoc[] include::suggesters/phrase-suggest.asciidoc[] @@ -274,5 +151,3 @@ include::suggesters/phrase-suggest.asciidoc[] include::suggesters/completion-suggest.asciidoc[] include::suggesters/context-suggest.asciidoc[] - - diff --git a/docs/reference/search/suggesters/phrase-suggest.asciidoc b/docs/reference/search/suggesters/phrase-suggest.asciidoc index 7ba1c93540b6f..487075a5677f8 100644 --- a/docs/reference/search/suggesters/phrase-suggest.asciidoc +++ b/docs/reference/search/suggesters/phrase-suggest.asciidoc @@ -17,36 +17,94 @@ co-occurrence and frequencies. ==== API Example -The `phrase` request is defined along side the query part in the json -request: +In general the `phrase` suggester requires special mapping up front to work. +The `phrase` suggester examples on this page need the following mapping to +work. The `reverse` analyzer is used only in the last example. [source,js] -------------------------------------------------- -curl -XPOST 'localhost:9200/_search' -d '{ - "suggest" : { - "text" : "Xor the Got-Jewel", - "simple_phrase" : { - "phrase" : { - "analyzer" : "body", - "field" : "bigram", - "size" : 1, - "real_word_error_likelihood" : 0.95, - "max_errors" : 0.5, - "gram_size" : 2, - "direct_generator" : [ { - "field" : "body", - "suggest_mode" : "always", - "min_word_length" : 1 - } ], - "highlight": { - "pre_tag": "", - "post_tag": "" +POST test +{ + "settings": { + "index": { + "number_of_shards": 1, + "analysis": { + "analyzer": { + "trigram": { + "type": "custom", + "tokenizer": "standard", + "filter": ["standard", "shingle"] + }, + "reverse": { + "type": "custom", + "tokenizer": "standard", + "filter": ["standard", "reverse"] + } + }, + "filter": { + "shingle": { + "type": "shingle", + "min_shingle_size": 2, + "max_shingle_size": 3 + } } } } + }, + "mappings": { + "test": { + "properties": { + "title": { + "type": "text", + "fields": { + "trigram": { + "type": "text", + "analyzer": "trigram" + }, + "reverse": { + "type": "text", + "analyzer": "reverse" + } + } + } + } + } + } +} +POST test/test +{"title": "noble warriors"} +POST test/test +{"title": "nobel prize"} +POST _refresh +-------------------------------------------------- +// TESTSETUP + +Once you have the analyzers and mappings set up you can use the `phrase` +suggester in the same spot you'd use the `term` suggester: + +[source,js] +-------------------------------------------------- +POST _suggest?pretty -d' +{ + "text": "noble prize", + "simple_phrase": { + "phrase": { + "field": "title.trigram", + "size": 1, + "gram_size": 3, + "direct_generator": [ { + "field": "title.trigram", + "suggest_mode": "always" + } ], + "highlight": { + "pre_tag": "", + "post_tag": "" + } + } } -}' +} -------------------------------------------------- +// CONSOLE The response contains suggestions scored by the most likely spell correction first. In this case we received the expected correction @@ -57,37 +115,23 @@ can contain misspellings (See parameter descriptions below). [source,js] -------------------------------------------------- - { - "took" : 5, - "timed_out" : false, - "_shards" : { - "total" : 5, - "successful" : 5, - "failed" : 0 - }, - "hits" : { - "total" : 2938, - "max_score" : 0.0, - "hits" : [ ] - }, - "suggest" : { - "simple_phrase" : [ { - "text" : "Xor the Got-Jewel", +{ + "_shards": ... + "simple_phrase" : [ + { + "text" : "noble prize", "offset" : 0, - "length" : 17, + "length" : 11, "options" : [ { - "text" : "xorr the god jewel", - "highlighted": "xorr the god jewel", - "score" : 0.17877324 - }, { - "text" : "xor the god jewel", - "highlighted": "xor the god jewel", - "score" : 0.14231323 - } ] - } ] - } + "text" : "nobel prize", + "highlighted": "nobel prize", + "score" : 0.40765354 + }] + } + ] } -------------------------------------------------- +// TESTRESPONSE[s/"_shards": .../"_shards": "$body._shards",/] ==== Basic Phrase suggest API parameters @@ -178,34 +222,34 @@ can contain misspellings (See parameter descriptions below). [source,js] -------------------------------------------------- -curl -XPOST 'localhost:9200/_search' -d { - "suggest" : { - "text" : "Xor the Got-Jewel", - "simple_phrase" : { - "phrase" : { - "field" : "bigram", - "size" : 1, - "direct_generator" : [ { - "field" : "body", - "suggest_mode" : "always", - "min_word_length" : 1 - } ], - "collate": { - "query": { <1> - "inline" : { - "match": { - "{{field_name}}" : "{{suggestion}}" <2> - } - } - }, - "params": {"field_name" : "title"}, <3> - "prune": true <4> - } - } - } - } - } +POST _suggest +{ + "text" : "noble prize", + "simple_phrase" : { + "phrase" : { + "field" : "title.trigram", + "size" : 1, + "direct_generator" : [ { + "field" : "title.trigram", + "suggest_mode" : "always", + "min_word_length" : 1 + } ], + "collate": { + "query": { <1> + "inline" : { + "match": { + "{{field_name}}" : "{{suggestion}}" <2> + } + } + }, + "params": {"field_name" : "title"}, <3> + "prune": true <4> + } + } + } +} -------------------------------------------------- +// CONSOLE <1> This query will be run once for every suggestion. <2> The `{{suggestion}}` variable will be replaced by the text of each suggestion. @@ -342,33 +386,27 @@ accept ordinary analyzer names. [source,js] -------------------------------------------------- -curl -s -XPOST 'localhost:9200/_search' -d { - "suggest" : { - "text" : "Xor the Got-Jewel", - "simple_phrase" : { - "phrase" : { - "analyzer" : "body", - "field" : "bigram", - "size" : 4, - "real_word_error_likelihood" : 0.95, - "confidence" : 2.0, - "gram_size" : 2, - "direct_generator" : [ { - "field" : "body", - "suggest_mode" : "always", - "min_word_length" : 1 - }, { - "field" : "reverse", - "suggest_mode" : "always", - "min_word_length" : 1, - "pre_filter" : "reverse", - "post_filter" : "reverse" - } ] - } +POST _suggest +{ + "text" : "obel prize", + "simple_phrase" : { + "phrase" : { + "field" : "title.trigram", + "size" : 1, + "direct_generator" : [ { + "field" : "title.trigram", + "suggest_mode" : "always" + }, { + "field" : "title.reverse", + "suggest_mode" : "always", + "pre_filter" : "reverse", + "post_filter" : "reverse" + } ] } } } -------------------------------------------------- +// CONSOLE `pre_filter` and `post_filter` can also be used to inject synonyms after candidates are generated. For instance for the query `captain usq` we diff --git a/docs/reference/search/uri-request.asciidoc b/docs/reference/search/uri-request.asciidoc index ba36992f6fbb9..95ce6a8ff6a29 100644 --- a/docs/reference/search/uri-request.asciidoc +++ b/docs/reference/search/uri-request.asciidoc @@ -21,19 +21,19 @@ And here is a sample response: "timed_out": false, "took": 62, "_shards":{ - "total" : 5, - "successful" : 5, + "total" : 1, + "successful" : 1, "failed" : 0 }, "hits":{ "total" : 1, - "max_score": 0.2876821, + "max_score": 1.3862944, "hits" : [ { "_index" : "twitter", "_type" : "tweet", "_id" : "0", - "_score": 0.2876821, + "_score": 1.3862944, "_source" : { "user" : "kimchy", "date" : "2009-11-15T14:12:12",