Measure maximum throughput in a separate step

danielmitterdorfer · web-flow · commit fb8327e5cb35 · 2019-09-26T10:22:07.000+02:00
With this commit we measure maximum throughput in a separate step. This ensures that the system shows less fluctuations in throughput when in throttled mode. Relates elastic#46
diff --git a/eventdata/challenges/daily-log-volume-index-and-query.json b/eventdata/challenges/daily-log-volume-index-and-query.json
@@ -16,6 +16,31 @@
     "benchmark_type": "logs-fixed-daily-volume"
   },
   "schedule": [
+    {
+      "name": "measure-maximum-utilization",
+       "operation": {
+          "operation-type": "bulk",
+          "param-source": "elasticlogs_bulk",
+          "index": "throughput-test-elasticlogs-2999-01-01",
+          "bulk-size": {{p_bulk_size}},
+          "daily_logging_volume": "{{p_daily_logging_volume}}",
+          "number_of_days": 1,
+          "record_raw_event_size": false
+       },
+      {# Whatever is shorter will win - either we run for this long or we finished ingesting the daily logging volume #}
+      "time-period": 600,
+      "schedule": "utilization",
+      "record-response-times": true,
+      "clients": {{ p_bulk_indexing_clients }},
+      "include-in-reporting": false
+    },
+    {
+      "name": "delete-measurement-index",
+      "operation": {
+        "operation-type": "delete-index",
+        "index": "throughput-test-elasticlogs-2999-01-01"
+      }
+    },
 {% set comma = joiner() %}
 {% for day in range(p_number_of_days) %}
 {% set utilization = (day + 1) / p_number_of_days %}
@@ -25,6 +50,7 @@
     {
       "parallel": {
         "completed-by": "{{bulk_index_task_name}}",
+        {# We are assuming that indexing one day of logs takes longer than the warmup-time-period #}
         "warmup-time-period": 600,
         "tasks": [
           {
@@ -40,8 +66,6 @@
               "number_of_days": 1,
               "record_raw_event_size": {{p_record_raw_event_size}}
             },
-            {# We are assuming that indexing one day of logs takes longer than the warmup-time-period #}
-            "warmup-time-period": 600,
             "schedule": "utilization",
             "target-utilization": {{ utilization }},
             "clients": {{ p_bulk_indexing_clients }},
diff --git a/eventdata/schedulers/utilization_scheduler.py b/eventdata/schedulers/utilization_scheduler.py
@@ -22,57 +22,60 @@
 
 
 class UtilizationBasedScheduler:
+    RESPONSE_TIMES = []
     """
-    This scheduler schedules events at 100% utilization (unthrottled) during the warmup time-period. It tracks this
-    period itself (i.e. independently of Rally) using the task parameter ``warmup-time-period``. During this period
-    it gathers response time metrics. The median response time and the provided target utilization (via the task
-    parameter ``target-utilization``) determine the average waiting time during the actual measurement phase of the
-    benchmark. In order to avoid that clients coordinate, we randomize waiting time using a Poisson distribution.
+    This scheduler schedules events at 100% utilization (unthrottled) if it is in recording mode (enabled by setting 
+    ``record-response-times`` to ``True``). Otherwise it runs in measurement mode where median response time and the
+     provided target utilization (via the task parameter ``target-utilization``) determine the average waiting time. 
+     To prevent clients from coordinating (i.e. executing requests at exactly the same time), we randomize waiting 
+     time using a Poisson distribution.
     """
     def __init__(self, params, perf_counter=time.perf_counter):
         self.logger = logging.getLogger(__name__)
         self.perf_counter = perf_counter
-        self.target_utilization = float(params["target-utilization"])
-        if self.target_utilization <= 0.0 or self.target_utilization > 1.0:
-            raise ValueError("target-utilization must be in the range (0.0, 1.0] but is {}".format(
-                self.target_utilization))
-        self.warmup_time_period = int(params["warmup-time-period"])
-        # to determine the target utilization
-        self.response_times = []
-        self.start_warmup = None
-        self.end_warmup = None
-        self.in_warmup = None
-        self.last_request_start = None
-        # determined by the utilization calculation
-        self.wait_time = None
+        self.recording = params.get("record-response-times", False)
+        if self.recording:
+            self.logger.info("Running in recording mode.")
+            self.last_request_start = None
+        else:
+            self.logger.info("Running in measurement mode.")
+            self.target_utilization = float(params["target-utilization"])
+            if self.target_utilization <= 0.0 or self.target_utilization > 1.0:
+                raise ValueError("target-utilization must be in the range (0.0, 1.0] but is {}".format(
+                    self.target_utilization))
+            response_times = UtilizationBasedScheduler.RESPONSE_TIMES
+            if len(response_times) == 0:
+                raise ValueError("No response times recorded. Please run first with 'record-response-times'.")
+            median_response_time_at_full_utilization = statistics.median(response_times)
+            self.time_between_requests = median_response_time_at_full_utilization * (1 / self.target_utilization)
+            self.logger.info("Time between requests is [%.3f] seconds for a utilization of [%.2f]%% (based on "
+                             "[%d] samples with a median response time of [%.3f] seconds).",
+                             self.time_between_requests, (self.target_utilization * 100), len(response_times),
+                             median_response_time_at_full_utilization)
 
     def next(self, current):
-        if self.in_warmup is None:
-            self.in_warmup = True
-            self.start_warmup = self.perf_counter()
-            self.end_warmup = self.start_warmup + self.warmup_time_period
-            self.last_request_start = self.start_warmup
-            return 0
-        elif self.in_warmup:
+        if self.recording:
             now = self.perf_counter()
-            self.response_times.append(now - self.last_request_start)
+            # skip the very first sample
+            if self.last_request_start is not None:
+                UtilizationBasedScheduler.RESPONSE_TIMES.append(now - self.last_request_start)
             self.last_request_start = now
-            if now >= self.end_warmup:
-                self.in_warmup = False
-                median_response_time_at_full_utilization = statistics.median(self.response_times)
-                # To determine the waiting time we need to subtract the (expected) response time from the total expected
-                # response time.
-                self.wait_time = median_response_time_at_full_utilization * ((1 / self.target_utilization) - 1)
-                self.logger.info("Waiting time is [%.2f] seconds for a utilization of [%.2f]%% (based on [%d] samples).",
-                                 self.wait_time, (self.target_utilization * 100), len(self.response_times))
             # run unthrottled while determining the target utilization
             return 0
 
         if self.target_utilization == 1.0:
             return 0
         else:
             # don't let every client send requests at the same time
-            return current + random.expovariate(1 / self.wait_time)
+            return current + random.expovariate(1 / self.time_between_requests)
+
+    # intended for testing
+    @classmethod
+    def reset_recorded_response_times(cls):
+        UtilizationBasedScheduler.RESPONSE_TIMES = []
 
     def __str__(self):
-        return "Utilization scheduler with target utilization of {:.2f}%.".format(self.target_utilization * 100)
+        if self.recording:
+            return "Utilization scheduler in recording mode."
+        else:
+            return "Utilization scheduler with target utilization of {:.2f}%.".format(self.target_utilization * 100)
diff --git a/tests/schedulers/utilization_scheduler_test.py b/tests/schedulers/utilization_scheduler_test.py
@@ -21,6 +21,11 @@
 from eventdata.schedulers.utilization_scheduler import UtilizationBasedScheduler
 
 
+@pytest.fixture()
+def reset_recorded_times():
+    UtilizationBasedScheduler.reset_recorded_response_times()
+
+
 class StaticPerfCounter:
     def __init__(self, start):
         self.now = start
@@ -29,28 +34,44 @@ def __call__(self, *args, **kwargs):
         return self.now
 
 
+@pytest.mark.usefixtures("reset_recorded_times")
 def test_invalid_target_utilization():
     with pytest.raises(ValueError) as ex:
         UtilizationBasedScheduler(params={
             "target-utilization": 200.432,
-            "warmup-time-period": 100
+            "record-response-times": False
         })
 
     assert "target-utilization must be in the range (0.0, 1.0] but is 200.432" == str(ex.value)
 
     with pytest.raises(ValueError) as ex:
         UtilizationBasedScheduler(params={
             "target-utilization": 0.0,
-            "warmup-time-period": 100
+            "record-response-times": False
         })
 
     assert "target-utilization must be in the range (0.0, 1.0] but is 0.0" == str(ex.value)
 
 
+@pytest.mark.usefixtures("reset_recorded_times")
+def test_no_response_times_recorded():
+    with pytest.raises(ValueError) as ex:
+        UtilizationBasedScheduler(params={
+            "target-utilization": 0.5,
+            "record-response-times": False
+        })
+
+    assert "No response times recorded. Please run first with 'record-response-times'." == str(ex.value)
+
+
+@pytest.mark.usefixtures("reset_recorded_times")
 def test_valid_params():
+    # simulate that response times have been recorded previously...
+    UtilizationBasedScheduler.RESPONSE_TIMES.append(1)
+
     s = UtilizationBasedScheduler(params={
         "target-utilization": 0.0000001,
-        "warmup-time-period": 100
+        "record-response-times": False
     })
 
     assert s is not None
@@ -63,60 +84,60 @@ def test_valid_params():
     assert s is not None
 
 
+@pytest.mark.usefixtures("reset_recorded_times")
 def test_unthrottled_calculation():
     perf_counter = StaticPerfCounter(start=0)
 
     s = UtilizationBasedScheduler(params={
-        "target-utilization": 1.0,
-        "warmup-time-period": 100
+        "record-response-times": True
     }, perf_counter=perf_counter)
 
+    # simulate two requests 10 seconds apart
+    assert s.next(0) == 0
+    perf_counter.now = 10
     assert s.next(0) == 0
-    assert s.in_warmup
-    assert s.start_warmup == 0
-    assert s.end_warmup == 100
 
-    # simulate end of warmup
-    perf_counter.now = 100
-    assert s.next(100) == 0
-    assert not s.in_warmup
+    s = UtilizationBasedScheduler(params={
+        "target-utilization": 1.0,
+        "record-response-times": False
+    }, perf_counter=perf_counter)
 
-    # normal mode of operation
+    # normal mode of operation (unthrottled)
     assert s.next(200) == 0
     assert s.next(300) == 0
 
 
+@pytest.mark.usefixtures("reset_recorded_times")
 def test_throttled_calculation():
     perf_counter = StaticPerfCounter(start=0)
 
     s = UtilizationBasedScheduler(params={
-        "target-utilization": 0.1,
-        "warmup-time-period": 100
+        "record-response-times": True
     }, perf_counter=perf_counter)
 
-    # warmup phase, response time is always 20 seconds
+    # recording phase, response time is always 20 seconds
+    next_scheduled = 0
     for t in range(0, 100, 20):
         perf_counter.now = t
-        assert s.next(t) == 0
-        assert s.in_warmup
-        assert s.start_warmup == 0
-        assert s.end_warmup == 100
-
-    # simulate end of warmup
-    perf_counter.now = 100
-    assert s.next(100) == 0
-    assert not s.in_warmup
-    # 20 seconds * (1 / target utilization - 1) = 20 seconds * (1 / 0.1 - 1) = 20 seconds * 9 = 180 seconds
-    assert s.wait_time == 180
+        next_scheduled = s.next(next_scheduled)
+        assert next_scheduled == 0
+
+    # now we're in throttled mode
+    s = UtilizationBasedScheduler(params={
+        "target-utilization": 0.1,
+        "record-response-times": False
+    }, perf_counter=perf_counter)
+    # 20 seconds * (1 / target utilization) = 20 seconds * (1 / 0.1) = 20 seconds * 10 = 200 seconds
+    assert s.time_between_requests == 200
 
     # normal mode of operation
-    t = 101
     waiting_times = []
-    while t < 1000000:
-        next_request = s.next(t)
-        waiting_times.append((next_request - t))
+    next_scheduled = 0
+    while next_scheduled < 1000000:
+        next_request = s.next(next_scheduled)
+        waiting_times.append((next_request - next_scheduled))
         # 20 seconds is our expected response time
-        t = next_request + 20
+        next_scheduled = next_request
 
-    # mean response time should approach 180 seconds
-    assert 170 <= statistics.mean(waiting_times) <= 190
+    # mean response time should approach 200 seconds
+    assert 190 <= statistics.mean(waiting_times) <= 210