Merge pull request elastic#29 from jimczi/challenges/index_sorting

jimczi · web-flow · commit 6ca49ceb9205 · 2017-05-02T10:38:46.000+02:00
Add index sorting challenges to nightly Closes elastic#27
diff --git a/external/pages/adhoc/geonames/index.html b/external/pages/adhoc/geonames/index.html
@@ -103,6 +103,10 @@ <h3>Overview</h3>
                 <li><strong>Append Fast</strong>: Indexes the whole document corpus using a setup that will lead to a larger indexing
                     throughput than the default settings. Document ids are unique so all index operations are append only.
                 </li>
+                <li><strong>Append Fast Sorted</strong>: Indexes the whole document corpus in an index sorted by country_code field in ascending order
+                  and using a setup that will lead to a larger indexing throughput than the default settings.
+                  Document ids are unique so all index operations are append only.
+                </li>
                 <li><strong>Id Conflicts</strong>: Indexes the whole document corpus using a setup that will lead to a larger indexing
                     throughput than the default settings. Rally will produce duplicate ids in 25% of all documents (not configurable) so we
                     can simulate a scenario with appends most of the time and some updates in between.
diff --git a/external/pages/adhoc/nyc_taxis/index.html b/external/pages/adhoc/nyc_taxis/index.html
@@ -94,10 +94,12 @@ <h3>Overview</h3>
                 it to evaluate the performance of Elasticsearch for structured data. We run the following variation (which we call
                 "challenge" in Rally):</p>
             <ul>
-                <li><strong>Append</strong>: Indexes the whole document corpus using Elasticsearch default settings. We only adjust the
-                    number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns
-                    green. Document ids are unique so all index operations are append only. After that a couple of queries are run in
-                    parallel by multiple clients.
+                <li><strong>Append</strong>: Indexes the whole document corpus using a setup that will lead to a larger indexing throughput than the default settings
+                   and produce a smaller index (higher compression rate). Document ids are unique so all index operations are append only.
+                </li>
+                <li><strong>Append Sorted</strong>: Indexes the whole document corpus in an index sorted by pickup_datetime field in descending order (most recent first)
+                   and using a setup that will lead to a larger indexing throughput than the default settings and produce a smaller index (higher compression rate).
+                   Document ids are unique so all index operations are append only.
                 </li>
             </ul>
             <p>The benchmarks are run either for an out of the box configuration of Elasticsearch but with a larger heap of 4GB. For more
diff --git a/external/pages/adhoc/pmc/index.html b/external/pages/adhoc/pmc/index.html
@@ -102,6 +102,9 @@ <h3>Overview</h3>
                 <li><strong>Append Fast</strong>: Indexes the whole document corpus using a setup that will lead to a larger indexing
                     throughput than the default settings. Document ids are unique so all index operations are append only.
                 </li>
+                <li><strong>Append Fast Sorted</strong>: Indexes the whole document corpusin an index sorted by timestamp field in descending order (most recent first)
+                   and using a setup that will lead to a larger indexing throughput than the default settings. Document ids are unique so all index operations are append only.
+                </li>
                 <li><strong>Id Conflicts</strong>: Indexes the whole document corpus using a setup that will lead to a larger indexing
                     throughput than the default settings. Rally will produce duplicate ids in 25% of all documents (not configurable) so we
                     can simulate a scenario with appends most of the time and some updates in between.
diff --git a/night_rally.py b/night_rally.py
@@ -20,6 +20,7 @@
     ["append-no-conflicts", "defaults"],
     ["append-no-conflicts-index-only", "4gheap"],
     ["append-fast-no-conflicts", "4gheap"],
+    ["append-fast-sorted-no-conflicts", "4gheap"],
     ["append-fast-with-conflicts", "4gheap"],
     ["append-no-conflicts-index-only-1-replica", "two_nodes"],
     ["append-no-conflicts-index-only", "verbose_iw"]
@@ -41,18 +42,21 @@
     ["append-no-conflicts-index-only", "defaults"],
     ["append-no-conflicts", "4gheap"],
     ["append-fast-no-conflicts", "4gheap"],
+    ["append-fast-sorted-no-conflicts", "4gheap"],
     ["append-fast-with-conflicts", "4gheap"],
     ["append-no-conflicts-index-only-1-replica", "two_nodes"]
 ]
 
 tracks["nyc_taxis"] = [
-    ["append-no-conflicts", "4gheap"]
+    ["append-no-conflicts", "4gheap"],
+    ["append-sorted-no-conflicts-index-only", "4gheap"]
 ]
 
 tracks["logging"] = [
     ["append-no-conflicts-index-only", "defaults"],
     ["append-no-conflicts", "4gheap"],
-    ["append-fast-no-conflicts", "4gheap"]
+    ["append-fast-no-conflicts", "4gheap"],
+    ["append-fast-sorted-no-conflicts", "4gheap"]
 ]
 
 # default challenge / car per track
@@ -124,7 +128,7 @@ def ensure_dir(directory):
 def sanitize(text):
     """
     Sanitizes the input text so it is safe to use as an environment name in Rally.
-    
+
     :param text: A text to sanitize
     """
     return text.lower().replace(" ", "-").replace(".", "_")