jtibshirani
diff --git a/‎docs/reference/query-dsl/combined-fields-query.asciidoc
Lines changed: 185 additions & 0 deletions b/‎docs/reference/query-dsl/combined-fields-query.asciidoc
Lines changed: 185 additions & 0 deletions
diff --git a/‎docs/reference/query-dsl/full-text-queries.asciidoc
Lines changed: 10 additions & 9 deletions b/‎docs/reference/query-dsl/full-text-queries.asciidoc
Lines changed: 10 additions & 9 deletions
diff --git a/‎docs/reference/query-dsl/multi-match-query.asciidoc
Lines changed: 10 additions & 1 deletion b/‎docs/reference/query-dsl/multi-match-query.asciidoc
Lines changed: 10 additions & 1 deletion
diff --git a/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml
Lines changed: 19 additions & 3 deletions b/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml
Lines changed: 19 additions & 3 deletions
diff --git a/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/360_combined_fields.yml
Lines changed: 42 additions & 0 deletions b/‎rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/360_combined_fields.yml
Lines changed: 42 additions & 0 deletions
diff --git a/‎server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java
Lines changed: 3 additions & 3 deletions b/‎server/src/internalClusterTest/java/org/elasticsearch/index/search/MatchPhraseQueryIT.java
Lines changed: 3 additions & 3 deletions
@@ -0,0 +1,185 @@
+[[query-dsl-combined-fields-query]]
+=== Combined fields
+++++
+<titleabbrev>Combined fields</titleabbrev>
+++++
+
+The `combined_fields` query supports searching multiple text fields as if their
+contents had been indexed into one combined field. It takes a term-centric
+view of the query: first it analyzes the query string into individual terms,
+then looks for each term in any of the fields. This query is particularly
+useful when a match could span multiple text fields, for example the `title`,
+`abstract` and `body` of an article:
+
+[source,console]
+--------------------------------------------------
+GET /_search
+{
+  "query": {
+    "combined_fields" : {
+      "query":      "database systems",
+      "fields":     [ "title", "abstract", "body"],
+      "operator":   "and"
+    }
+  }
+}
+--------------------------------------------------
+
+The `combined_fields` query takes a principled approach to scoring based on the
+simple BM25F formula described in
+http://www.staff.city.ac.uk/~sb317/papers/foundations_bm25_review.pdf[The Probabilistic Relevance Framework: BM25 and Beyond].
+When scoring matches, the query combines term and collection statistics across
+fields. This allows it to score each match as if the specified fields had been
+indexed into a single combined field. (Note that this is a best attempt --
+`combined_fields` makes some approximations and scores will not obey this
+model perfectly.)
+
+[WARNING]
+.Field number limit
+===================================================
+There is a limit on the number of fields that can be queried at once. It is
+defined by the `indices.query.bool.max_clause_count` <<search-settings>>
+which defaults to 1024.
+===================================================
+
+==== Per-field boosting
+
+Individual fields can be boosted with the caret (`^`) notation:
+
+[source,console]
+--------------------------------------------------
+GET /_search
+{
+  "query": {
+    "combined_fields" : {
+      "query" : "distributed consensus",
+      "fields" : [ "title^2", "body" ] <1>
+    }
+  }
+}
+--------------------------------------------------
+
+Field boosts are interpreted according to the combined field model. For example,
+if the `title` field has a boost of 2, the score is calculated as if each term
+in the title appeared twice in the synthetic combined field.
+
+NOTE: The `combined_fields` query requires that field boosts are greater than
+or equal to 1.0. Field boosts are allowed to be fractional.
+
+[[combined-field-top-level-params]]
+==== Top-level parameters for `combined_fields`
+
+`fields`::
+(Required, array of strings) List of fields to search. Field wildcard patterns
+are allowed. Only <<text,`text`>> fields are supported, and they must all have
+the same search <<analyzer,`analyzer`>>.
+
+`query`::
++
+--
+(Required, string) Text to search for in the provided `<fields>`.
+
+The `combined_fields` query <<analysis,analyzes>> the provided text before
+performing a search.
+--
+
+`auto_generate_synonyms_phrase_query`::
++
+--
+(Optional, Boolean) If `true`, <<query-dsl-match-query-phrase,match phrase>>
+queries are automatically created for multi-term synonyms. Defaults to `true`.
+
+See <<query-dsl-match-query-synonyms,Use synonyms with match query>> for an
+example.
+--
+
+`operator`::
++
+--
+(Optional, string) Boolean logic used to interpret text in the `query` value.
+Valid values are:
+
+`or` (Default)::
+For example, a `query` value of `database systems` is interpreted as `database
+OR systems`.
+
+`and`::
+For example, a `query` value of `database systems` is interpreted as `database
+AND systems`.
+--
+
+`minimum_should_match`::
++
+--
+(Optional, string) Minimum number of clauses that must match for a document to
+be returned. See the <<query-dsl-minimum-should-match, `minimum_should_match`
+parameter>> for valid values and more information.
+--
+
+`zero_terms_query`::
++
+--
+(Optional, string) Indicates whether no documents are returned if the `analyzer`
+removes all tokens, such as when using a `stop` filter. Valid values are:
+
+`none` (Default)::
+No documents are returned if the `analyzer` removes all tokens.
+
+`all`::
+Returns all documents, similar to a <<query-dsl-match-all-query,`match_all`>>
+query.
+
+See <<query-dsl-match-query-zero>> for an example.
+--
+
+===== Comparison to `multi_match` query
+
+The `combined_fields` query provides a principled way of matching and scoring
+across multiple <<text, `text`>> fields. To support this, it requires that all
+fields have the same search <<analyzer,`analyzer`>>.
+
+If you want a single query that handles fields of different types like
+keywords or numbers, then the <<query-dsl-multi-match-query,`multi_match`>>
+query may be a better fit. It supports both text and non-text fields, and
+accepts text fields that do not share the same analyzer.
+
+The main `multi_match` modes `best_fields` and `most_fields` take a
+field-centric view of the query. In contrast, `combined_fields` is
+term-centric: `operator` and `minimum_should_match` are applied per-term,
+instead of per-field. Concretely, a query like
+
+[source,console]
+--------------------------------------------------
+GET /_search
+{
+  "query": {
+    "combined_fields" : {
+      "query":      "database systems",
+      "fields":     [ "title", "abstract"],
+      "operator":   "and"
+    }
+  }
+}
+--------------------------------------------------
+
+is executed as
+
+    +(combined("database", fields:["title" "abstract"]))
+    +(combined("systems", fields:["title", "abstract"]))
+
+In other words, each term must be present in at least one field for a
+document to match.
+
+The `cross_fields` `multi_match` mode also takes a term-centric approach and
+applies `operator` and `minimum_should_match per-term`. The main advantage of
+`combined_fields` over `cross_fields` is its robust and interpretable approach
+to scoring based on the BM25F algorithm.
+
+[NOTE]
+.Custom similarities
+===================================================
+The `combined_fields` query currently only supports the `BM25` similarity
+(which is the default unless a <<index-modules-similarity, custom similarity>>
+is configured). <<similarity, Per-field similarities>> are also not allowed.
+Using `combined_fields` in either of these cases will result in an error.
+===================================================
@@ -1,9 +1,9 @@
 [[full-text-queries]]
 == Full text queries
 
-The full text queries enable you to search <<analysis,analyzed text fields>> such as the 
-body of an email. The query string is processed using the same analyzer that was applied to 
-the field during indexing. 
+The full text queries enable you to search <<analysis,analyzed text fields>> such as the
+body of an email. The query string is processed using the same analyzer that was applied to
+the field during indexing.
 
 The queries in this group are:
 
@@ -21,16 +21,15 @@ the last term, which is matched as a `prefix` query
 
 <<query-dsl-match-query-phrase,`match_phrase` query>>::
 Like the `match` query but used for matching exact phrases or word proximity matches.
-    
+
 <<query-dsl-match-query-phrase-prefix,`match_phrase_prefix` query>>::
 Like the `match_phrase` query, but does a wildcard search on the final word.
-  
+
 <<query-dsl-multi-match-query,`multi_match` query>>::
 The multi-field version of the `match` query.
 
-<<query-dsl-common-terms-query,`common` terms query>>::
-
-    A more specialized query which gives more preference to uncommon words.
+<<query-dsl-combined-fields-query,`combined_fields` query>>::
+Matches over multiple fields as if they had been indexed into one combined field.
 
 <<query-dsl-query-string-query,`query_string` query>>::
 Supports the compact Lucene <<query-string-syntax,query string syntax>>,
@@ -52,10 +51,12 @@ include::match-phrase-query.asciidoc[]
 
 include::match-phrase-prefix-query.asciidoc[]
 
+include::combined-fields-query.asciidoc[]
+
 include::multi-match-query.asciidoc[]
 
 include::common-terms-query.asciidoc[]
 
 include::query-string-query.asciidoc[]
 
-include::simple-query-string-query.asciidoc[]
+include::simple-query-string-query.asciidoc[]
@@ -192,7 +192,10 @@ This query is executed as:
 In other words, *all terms* must be present *in a single field* for a document
 to match.
 
-See <<type-cross-fields>> for a better solution.
+The <<query-dsl-combined-fields-query, `combined_fields`>> query offers a
+term-centric approach that handles `operator` and `minimum_should_match` on a
+per-term basis. The other multi-match mode <<type-cross-fields>> also
+addresses this issue.
 
 ===================================================
 
@@ -388,6 +391,12 @@ Also, accepts `analyzer`, `boost`, `operator`, `minimum_should_match`,
 `lenient`, `zero_terms_query` and `cutoff_frequency`, as explained in
 <<query-dsl-match-query, match query>>.
 
+WARNING: The `cross_fields` type blends field statistics in a way that does
+not always produce well-formed scores (for example scores can become
+negative). As an alternative, you can consider the
+<<query-dsl-combined-fields-query,`combined_fields`>> query, which is also
+term-centric but combines field statistics in a more robust way.
+
 [[cross-field-analysis]]
 ===== `cross_field` and analysis
 
 
@@ -24,11 +24,27 @@ setup:
       indices.refresh: {}
 
 ---
-"Basic":
+"Basic multi_match query":
   - do:
       search:
-        rest_total_hits_as_int: true
-        body: { "query" : {"multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } }, "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
+        body: {
+          "query" : { "multi_match" : { "query" : "quick brown fox", "fields" : [ "text*"] } },
+          "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
+
+  - match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
+  - match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
+  - match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
+
+---
+"Basic combined_fields query":
+  - skip:
+      version: " - 7.99.99"
+      reason: "combined fields query is not yet backported"
+  - do:
+      search:
+        body: {
+          "query" : { "combined_fields" : { "query" : "quick brown fox", "fields" : [ "text*"] } },
+          "highlight" : { "type" : "unified", "fields" : { "*" : {} } } }
 
   - match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
   - match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
 
@@ -0,0 +1,42 @@
+setup:
+  - do:
+      indices.create:
+        index:  test
+        body:
+          mappings:
+            properties:
+              title:
+                type: text
+              abstract:
+                type: text
+              body:
+                type: text
+
+  - do:
+      index:
+        index: test
+        id: 1
+        body:
+          title: "Time, Clocks and the Ordering of Events in a Distributed System"
+          abstract: "The concept of one event happening before another..."
+          body: "The concept of time is fundamental to our way of thinking..."
+        refresh: true
+
+---
+"Test combined_fields query":
+  - skip:
+      version: " - 7.99.99"
+      reason: "combined fields query is not yet backported"
+  - do:
+      search:
+        index: test
+        body:
+          query:
+            combined_fields:
+              query: "time event"
+              fields: ["abstract", "body"]
+              operator: "and"
+
+  - match: { hits.total.value: 1 }
+  - match: { hits.hits.0._id: "1" }
+
@@ -13,7 +13,7 @@
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
-import org.elasticsearch.index.search.MatchQueryParser.ZeroTermsQuery;
+import org.elasticsearch.index.query.ZeroTermsQueryOption;
 import org.elasticsearch.test.ESIntegTestCase;
 import org.junit.Before;
 
@@ -47,11 +47,11 @@ public void testZeroTermsQuery() throws ExecutionException, InterruptedException
         MatchPhraseQueryBuilder baseQuery = matchPhraseQuery("name", "the who")
             .analyzer("standard_stopwords");
 
-        MatchPhraseQueryBuilder matchNoneQuery = baseQuery.zeroTermsQuery(ZeroTermsQuery.NONE);
+        MatchPhraseQueryBuilder matchNoneQuery = baseQuery.zeroTermsQuery(ZeroTermsQueryOption.NONE);
         SearchResponse matchNoneResponse = client().prepareSearch(INDEX).setQuery(matchNoneQuery).get();
         assertHitCount(matchNoneResponse, 0L);
 
-        MatchPhraseQueryBuilder matchAllQuery = baseQuery.zeroTermsQuery(ZeroTermsQuery.ALL);
+        MatchPhraseQueryBuilder matchAllQuery = baseQuery.zeroTermsQuery(ZeroTermsQueryOption.ALL);
         SearchResponse matchAllResponse = client().prepareSearch(INDEX).setQuery(matchAllQuery).get();
         assertHitCount(matchAllResponse, 2L);
     }