jasontedor
diff --git a/‎buildSrc/src/main/resources/checkstyle_suppressions.xml
-2 b/‎buildSrc/src/main/resources/checkstyle_suppressions.xml
-2
diff --git a/‎docs/reference/how-to/general.asciidoc
-91 b/‎docs/reference/how-to/general.asciidoc
-91
diff --git a/‎docs/reference/index-modules/similarity.asciidoc
+12-2 b/‎docs/reference/index-modules/similarity.asciidoc
+12-2
diff --git a/‎docs/reference/mapping/dynamic/field-mapping.asciidoc
+1-1 b/‎docs/reference/mapping/dynamic/field-mapping.asciidoc
+1-1
diff --git a/‎docs/reference/mapping/dynamic/templates.asciidoc
+16-5 b/‎docs/reference/mapping/dynamic/templates.asciidoc
+16-5
diff --git a/‎docs/reference/query-dsl/query-string-syntax.asciidoc
+4-21 b/‎docs/reference/query-dsl/query-string-syntax.asciidoc
+4-21
diff --git a/‎modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/support/MultiValuesSource.java
+1-1 b/‎modules/aggs-matrix-stats/src/main/java/org/elasticsearch/search/aggregations/support/MultiValuesSource.java
+1-1
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java
+1-1 b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateMethodValueSource.java
+1-1
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateObjectValueSource.java
+1-1 b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/DateObjectValueSource.java
+1-1
diff --git a/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java
+1-1 b/‎modules/lang-expression/src/main/java/org/elasticsearch/script/expression/FieldDataValueSource.java
+1-1
diff --git a/‎modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RatedRequestsTests.java
+1 b/‎modules/rank-eval/src/test/java/org/elasticsearch/index/rankeval/RatedRequestsTests.java
+1
diff --git a/‎modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4SizeHeaderFrameDecoder.java
+11-9 b/‎modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4SizeHeaderFrameDecoder.java
+11-9
diff --git a/‎server/src/main/java/org/elasticsearch/common/blobstore/BlobContainer.java
+23 b/‎server/src/main/java/org/elasticsearch/common/blobstore/BlobContainer.java
+23
diff --git a/‎server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobContainer.java
+46-2 b/‎server/src/main/java/org/elasticsearch/common/blobstore/fs/FsBlobContainer.java
+46-2
diff --git a/‎server/src/main/java/org/elasticsearch/common/util/SingleObjectCache.java
+5 b/‎server/src/main/java/org/elasticsearch/common/util/SingleObjectCache.java
+5
@@ -505,8 +505,6 @@
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]settings[/\\]ClusterSettingsIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]shards[/\\]ClusterSearchShardsIT.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]cluster[/\\]structure[/\\]RoutingIteratorTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]blobstore[/\\]FsBlobStoreContainerTests.java" checks="LineLength" />
-  <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]blobstore[/\\]FsBlobStoreTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]breaker[/\\]MemoryCircuitBreakerTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]geo[/\\]ShapeBuilderTests.java" checks="LineLength" />
   <suppress files="server[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]common[/\\]hash[/\\]MessageDigestsTests.java" checks="LineLength" />
 
@@ -40,94 +40,3 @@ better. For instance if a user searches for two words `foo` and `bar`, a match
 across different chapters is probably very poor, while a match within the same
 paragraph is likely good.
 
-[float]
-[[sparsity]]
-=== Avoid sparsity
-
-The data-structures behind Lucene, which Elasticsearch relies on in order to
-index and store data, work best with dense data, ie. when all documents have the
-same fields. This is especially true for fields that have norms enabled (which
-is the case for `text` fields by default) or doc values enabled (which is the
-case for numerics, `date`, `ip` and `keyword` by default).
-
-The reason is that Lucene internally identifies documents with so-called doc
-ids, which are integers between 0 and the total number of documents in the
-index. These doc ids are used for communication between the internal APIs of
-Lucene: for instance searching on a term with a `match` query produces an
-iterator of doc ids, and these doc ids are then used to retrieve the value of
-the `norm` in order to compute a score for these documents. The way this `norm`
-lookup is implemented currently is by reserving one byte for each document.
-The `norm` value for a given doc id can then be retrieved by reading the
-byte at index `doc_id`. While this is very efficient and helps Lucene quickly
-have access to the `norm` values of every document, this has the drawback that
-documents that do not have a value will also require one byte of storage.
-
-In practice, this means that if an index has `M` documents, norms will require
-`M` bytes of storage *per field*, even for fields that only appear in a small
-fraction of the documents of the index. Although slightly more complex with doc
-values due to the fact that doc values have multiple ways that they can be
-encoded depending on the type of field and on the actual data that the field
-stores, the problem is very similar. In case you wonder: `fielddata`, which was
-used in Elasticsearch pre-2.0 before being replaced with doc values, also
-suffered from this issue, except that the impact was only on the memory
-footprint since `fielddata` was not explicitly materialized on disk.
-
-Note that even though the most notable impact of sparsity is on storage
-requirements, it also has an impact on indexing speed and search speed since
-these bytes for documents that do not have a field still need to be written
-at index time and skipped over at search time.
-
-It is totally fine to have a minority of sparse fields in an index. But beware
-that if sparsity becomes the rule rather than the exception, then the index
-will not be as efficient as it could be.
-
-This section mostly focused on `norms` and `doc values` because those are the
-two features that are most affected by sparsity. Sparsity also affect the
-efficiency of the inverted index (used to index `text`/`keyword` fields) and
-dimensional points (used to index `geo_point` and numerics) but to a lesser
-extent.
-
-Here are some recommendations that can help avoid sparsity:
-
-[float]
-==== Avoid putting unrelated data in the same index
-
-You should avoid putting documents that have totally different structures into
-the same index in order to avoid sparsity. It is often better to put these
-documents into different indices, you could also consider giving fewer shards
-to these smaller indices since they will contain fewer documents overall.
-
-Note that this advice does not apply in the case that you need to use
-parent/child relations between your documents since this feature is only
-supported on documents that live in the same index.
-
-[float]
-==== Normalize document structures
-
-Even if you really need to put different kinds of documents in the same index,
-maybe there are opportunities to reduce sparsity. For instance if all documents
-in the index have a timestamp field but some call it `timestamp` and others
-call it `creation_date`, it would help to rename it so that all documents have
-the same field name for the same data.
-
-[float]
-==== Avoid types
-
-Types might sound like a good way to store multiple tenants in a single index.
-They are not: given that types store everything in a single index, having
-multiple types that have different fields in a single index will also cause
-problems due to sparsity as described above. If your types do not have very
-similar mappings, you might want to consider moving them to a dedicated index.
-
-[float]
-==== Disable `norms` and `doc_values` on sparse fields
-
-If none of the above recommendations apply in your case, you might want to
-check whether you actually need `norms` and `doc_values` on your sparse fields.
-`norms` can be disabled if producing scores is not necessary on a field, this is
-typically true for fields that are only used for filtering. `doc_values` can be
-disabled on fields that are neither used for sorting nor for aggregations.
-Beware that this decision should not be made lightly since these parameters
-cannot be changed on a live index, so you would have to reindex if you realize
-that you need `norms` or `doc_values`.
-
@@ -341,7 +341,18 @@ Which yields:
 // TESTRESPONSE[s/"took": 12/"took" : $body.took/]
 // TESTRESPONSE[s/OzrdjxNtQGaqs4DmioFw9A/$body.hits.hits.0._node/]
 
-You might have noticed that a significant part of the script depends on
+WARNING: While scripted similarities provide a lot of flexibility, there is
+a set of rules that they need to satisfy. Failing to do so could make
+Elasticsearch silently return wrong top hits or fail with internal errors at
+search time:
+
+ - Returned scores must be positive.
+ - All other variables remaining equal, scores must not decrease when
+   `doc.freq` increases.
+ - All other variables remaining equal, scores must not increase when
+   `doc.length` increases.
+
+You might have noticed that a significant part of the above script depends on
 statistics that are the same for every document. It is possible to make the
 above slightly more efficient by providing an `weight_script` which will
 compute the document-independent part of the score and will be available
@@ -506,7 +517,6 @@ GET /index/_search?explain=true
 
 ////////////////////
 
-
 Type name: `scripted`
 
 [float]
 
@@ -135,6 +135,6 @@ PUT my_index/_doc/1
 }
 --------------------------------------------------
 // CONSOLE
-<1> The `my_float` field is added as a <<number,`double`>> field.
+<1> The `my_float` field is added as a <<number,`float`>> field.
 <2> The `my_integer` field is added as a <<number,`long`>> field.
 
@@ -46,11 +46,22 @@ name as an existing template, it will replace the old version.
 [[match-mapping-type]]
 ==== `match_mapping_type`
 
-The `match_mapping_type` matches on the datatype detected by
-<<dynamic-field-mapping,dynamic field mapping>>, in other words, the datatype
-that Elasticsearch thinks the field should have.  Only the following datatypes
-can be automatically detected: `boolean`, `date`, `double`, `long`, `object`,
-`string`.  It also accepts `*` to match all datatypes.
+The `match_mapping_type` is the datatype detected by the json parser. Since
+JSON doesn't allow to distinguish a `long` from an `integer` or a `double` from
+a `float`, it will always choose the wider datatype, ie. `long` for integers
+and `double` for floating-point numbers.
+
+The following datatypes may be automatically detected:
+
+ - `boolean` when `true` or `false` are encountered.
+ - `date` when <<date-detection,date detection>> is enabled and a string is
+   found that matches any of the configured date formats.
+ - `double` for numbers with a decimal part.
+ - `long` for numbers without a decimal part.
+ - `object` for objects, also called hashes.
+ - `string` for character strings.
+
+`*` may also be used in order to match all datatypes.
 
 For example, if we wanted to map all integer fields as `integer` instead of
 `long`, and all `string` fields as both `text` and `keyword`, we
 
@@ -233,26 +233,10 @@ states that:
 * `news` must not be present
 * `quick` and `brown` are optional -- their presence increases the relevance
 
-The familiar operators `AND`, `OR` and `NOT` (also written `&&`, `||` and `!`)
-are also supported.  However, the effects of these operators can be more
-complicated than is obvious at first glance.  `NOT` takes precedence over
-`AND`, which takes precedence over `OR`.  While the `+` and `-` only affect
-the term to the right of the operator, `AND` and `OR` can affect the terms to
-the left and right.
-
-****
-Rewriting the above query using `AND`, `OR` and `NOT` demonstrates the
-complexity:
-
-`quick OR brown AND fox AND NOT news`::
-
-This is incorrect, because `brown` is now a required term.
-
-`(quick OR brown) AND fox AND NOT news`::
-
-This is incorrect because at least one of `quick` or `brown` is now required
-and the search for those terms would be scored differently from the original
-query.
+The familiar boolean operators `AND`, `OR` and `NOT` (also written `&&`, `||`
+and `!`) are also supported but beware that they do not honor the usual
+precedence rules, so parentheses should be used whenever multiple operators are
+used together. For instance the previous query could be rewritten as:
 
 `((quick AND fox) OR (brown AND fox) OR fox) AND NOT news`::
 
@@ -270,7 +254,6 @@ would look like this:
         }
     }
 
-****
 
 ===== Grouping
 
 
@@ -47,7 +47,7 @@ public NumericDoubleValues getField(final int ordinal, LeafReaderContext ctx) th
             if (ordinal > names.length) {
                 throw new IndexOutOfBoundsException("ValuesSource array index " + ordinal + " out of bounds");
             }
-            return multiValueMode.select(values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);
+            return multiValueMode.select(values[ordinal].doubleValues(ctx));
         }
     }
 
 
@@ -54,7 +54,7 @@ class DateMethodValueSource extends FieldDataValueSource {
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
         final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
             @Override
             public double doubleVal(int docId) throws IOException {
 
@@ -56,7 +56,7 @@ class DateObjectValueSource extends FieldDataValueSource {
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
         MutableDateTime joda = new MutableDateTime(0, DateTimeZone.UTC);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
             @Override
             public double doubleVal(int docId) throws IOException {
 
@@ -68,7 +68,7 @@ public int hashCode() {
     @SuppressWarnings("rawtypes") // ValueSource uses a rawtype
     public FunctionValues getValues(Map context, LeafReaderContext leaf) throws IOException {
         AtomicNumericFieldData leafData = (AtomicNumericFieldData) fieldData.load(leaf);
-        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues(), 0d);
+        NumericDoubleValues docValues = multiValueMode.select(leafData.getDoubleValues());
         return new DoubleDocValues(this) {
           @Override
           public double doubleVal(int doc) throws IOException {
 
@@ -131,6 +131,7 @@ public void testXContentRoundtrip() throws IOException {
         }
     }
 
+    @AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/31104")
     public void testXContentParsingIsNotLenient() throws IOException {
         RatedRequest testItem = createTestItem(randomBoolean());
         XContentType xContentType = randomFrom(XContentType.values());
 
@@ -37,17 +37,19 @@ final class Netty4SizeHeaderFrameDecoder extends ByteToMessageDecoder {
     protected void decode(ChannelHandlerContext ctx, ByteBuf in, List<Object> out) throws Exception {
         try {
             BytesReference networkBytes = Netty4Utils.toBytesReference(in);
-            int messageLength = TcpTransport.readMessageLength(networkBytes) + HEADER_SIZE;
-            // If the message length is -1, we have not read a complete header. If the message length is
-            // greater than the network bytes available, we have not read a complete frame.
-            if (messageLength != -1 && messageLength <= networkBytes.length()) {
-                final ByteBuf message = in.skipBytes(HEADER_SIZE);
-                // 6 bytes would mean it is a ping. And we should ignore.
-                if (messageLength != 6) {
-                    out.add(message);
+            int messageLength = TcpTransport.readMessageLength(networkBytes);
+            // If the message length is -1, we have not read a complete header.
+            if (messageLength != -1) {
+                int messageLengthWithHeader = messageLength + HEADER_SIZE;
+                // If the message length is greater than the network bytes available, we have not read a complete frame.
+                if (messageLengthWithHeader <= networkBytes.length()) {
+                    final ByteBuf message = in.skipBytes(HEADER_SIZE);
+                    // 6 bytes would mean it is a ping. And we should ignore.
+                    if (messageLengthWithHeader != 6) {
+                        out.add(message);
+                    }
                 }
             }
-
         } catch (IllegalArgumentException ex) {
             throw new TooLongFrameException(ex);
         }
 
@@ -74,6 +74,29 @@ public interface BlobContainer {
      */
     void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException;
 
+    /**
+     * Reads blob content from the input stream and writes it to the container in a new blob with the given name,
+     * using an atomic write operation if the implementation supports it. When the BlobContainer implementation
+     * does not provide a specific implementation of writeBlobAtomic(String, InputStream, long), then
+     * the {@link #writeBlob(String, InputStream, long)} method is used.
+     *
+     * This method assumes the container does not already contain a blob of the same blobName.  If a blob by the
+     * same name already exists, the operation will fail and an {@link IOException} will be thrown.
+     *
+     * @param   blobName
+     *          The name of the blob to write the contents of the input stream to.
+     * @param   inputStream
+     *          The input stream from which to retrieve the bytes to write to the blob.
+     * @param   blobSize
+     *          The size of the blob to be written, in bytes.  It is implementation dependent whether
+     *          this value is used in writing the blob to the repository.
+     * @throws  FileAlreadyExistsException if a blob by the same name already exists
+     * @throws  IOException if the input stream could not be read, or the target blob could not be written to.
+     */
+    default void writeBlobAtomic(final String blobName, final InputStream inputStream, final long blobSize) throws IOException {
+        writeBlob(blobName, inputStream, blobSize);
+    }
+
     /**
      * Deletes a blob with giving name, if the blob exists. If the blob does not exist,
      * this method throws a NoSuchFileException.
 
@@ -19,11 +19,12 @@
 
 package org.elasticsearch.common.blobstore.fs;
 
-import org.elasticsearch.core.internal.io.IOUtils;
+import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.blobstore.BlobMetaData;
 import org.elasticsearch.common.blobstore.BlobPath;
 import org.elasticsearch.common.blobstore.support.AbstractBlobContainer;
 import org.elasticsearch.common.blobstore.support.PlainBlobMetaData;
+import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.core.internal.io.Streams;
 
 import java.io.BufferedInputStream;
@@ -56,8 +57,9 @@
  */
 public class FsBlobContainer extends AbstractBlobContainer {
 
-    protected final FsBlobStore blobStore;
+    private static final String TEMP_FILE_PREFIX = "pending-";
 
+    protected final FsBlobStore blobStore;
     protected final Path path;
 
     public FsBlobContainer(FsBlobStore blobStore, BlobPath blobPath, Path path) {
@@ -131,6 +133,48 @@ public void writeBlob(String blobName, InputStream inputStream, long blobSize) t
         IOUtils.fsync(path, true);
     }
 
+    @Override
+    public void writeBlobAtomic(final String blobName, final InputStream inputStream, final long blobSize) throws IOException {
+        final String tempBlob = tempBlobName(blobName);
+        final Path tempBlobPath = path.resolve(tempBlob);
+        try {
+            try (OutputStream outputStream = Files.newOutputStream(tempBlobPath, StandardOpenOption.CREATE_NEW)) {
+                Streams.copy(inputStream, outputStream);
+            }
+            IOUtils.fsync(tempBlobPath, false);
+
+            final Path blobPath = path.resolve(blobName);
+            // If the target file exists then Files.move() behaviour is implementation specific
+            // the existing file might be replaced or this method fails by throwing an IOException.
+            if (Files.exists(blobPath)) {
+                throw new FileAlreadyExistsException("blob [" + blobPath + "] already exists, cannot overwrite");
+            }
+            Files.move(tempBlobPath, blobPath, StandardCopyOption.ATOMIC_MOVE);
+        } catch (IOException ex) {
+            try {
+                deleteBlobIgnoringIfNotExists(tempBlob);
+            } catch (IOException e) {
+                ex.addSuppressed(e);
+            }
+            throw ex;
+        } finally {
+            IOUtils.fsync(path, true);
+        }
+    }
+
+    public static String tempBlobName(final String blobName) {
+        return "pending-" + blobName + "-" + UUIDs.randomBase64UUID();
+    }
+
+    /**
+     * Returns true if the blob is a leftover temporary blob.
+     *
+     * The temporary blobs might be left after failed atomic write operation.
+     */
+    public static boolean isTempBlobName(final String blobName) {
+        return blobName.startsWith(TEMP_FILE_PREFIX);
+    }
+
     @Override
     public void move(String source, String target) throws IOException {
         Path sourcePath = path.resolve(source);
 
@@ -64,6 +64,11 @@ public T getOrRefresh() {
         return cached;
     }
 
+    /** Return the potentially stale cached entry. */
+    protected final T getNoRefresh() {
+        return cached;
+    }
+
     /**
      * Returns a new instance to cache
      */
Original file line number	Diff line number	Diff line change
`@@ -135,6 +135,6 @@ PUT my_index/_doc/1`
`135`	`135`	`}`
`136`	`136`	`--------------------------------------------------`
`137`	`137`	`// CONSOLE`
`138`		-<1> The `my_float` field is added as a <<number,`double`>> field.
	`138`	+<1> The `my_float` field is added as a <<number,`float`>> field.
`139`	`139`	<2> The `my_integer` field is added as a <<number,`long`>> field.
`140`	`140`
Original file line number	Diff line number	Diff line change
`@@ -47,7 +47,7 @@ public NumericDoubleValues getField(final int ordinal, LeafReaderContext ctx) th`
`47`	`47`	`if (ordinal > names.length) {`
`48`	`48`	`throw new IndexOutOfBoundsException("ValuesSource array index " + ordinal + " out of bounds");`
`49`	`49`	`}`
`50`		`- return multiValueMode.select(values[ordinal].doubleValues(ctx), Double.NEGATIVE_INFINITY);`
	`50`	`+ return multiValueMode.select(values[ordinal].doubleValues(ctx));`
`51`	`51`	`}`
`52`	`52`	`}`
`53`	`53`
Original file line number	Diff line number	Diff line change
`@@ -131,6 +131,7 @@ public void testXContentRoundtrip() throws IOException {`
`131`	`131`	`}`
`132`	`132`	`}`
`133`	`133`
	`134`	`+ @AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/31104")`
`134`	`135`	`public void testXContentParsingIsNotLenient() throws IOException {`
`135`	`136`	`RatedRequest testItem = createTestItem(randomBoolean());`
`136`	`137`	`XContentType xContentType = randomFrom(XContentType.values());`