RUBY-3314 Implement variable iterations for benchmarks (#2771)

jamis · web-flow · commit 4bde723e7e55 · 2023-08-17T08:08:30.000-06:00
diff --git a/.gitignore b/.gitignore
@@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock
 .env.private*
 .env
 build
+profile/benchmarking/data
diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb
@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 # Copyright (C) 2015-2020 MongoDB Inc.
 #
@@ -23,107 +22,59 @@
 require_relative 'benchmarking/parallel'
 
 module Mongo
-
   # Module with all functionality for running driver benchmark tests.
   #
   # @since 2.2.3
   module Benchmarking
-
     extend self
 
-    # The current path.
-    #
-    # @return [ String ] The current path.
-    #
-    # @since 2.2.3
-    CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze
-
-    # The path to data files used in Benchmarking tests.
-    #
     # @return [ String ] Path to Benchmarking test files.
-    #
-    # @since 2.2.3
-    DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze
+    DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze
 
-    # The file containing the single tweet document.
-    #
-    # @return [ String ] The file containing the tweet document.
-    #
-    # @since 2.2.3
-    TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze
+    # @return [ String ] The file containing the single tweet document.
+    TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze
 
-    # The file containing the single small document.
-    #
-    # @return [ String ] The file containing the small document.
-    #
-    # @since 2.2.3
-    SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze
+    # @return [ String ] The file containing the single small document.
+    SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze
 
-    # The file containing the single large document.
-    #
-    # @return [ String ] The file containing the large document.
-    #
-    # @since 2.2.3
-    LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze
+    # @return [ String ] The file containing the single large document.
+    LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze
 
-    # The file to upload when testing GridFS.
-    #
-    # @return [ String ] The file containing the GridFS test data.
-    #
-    # @since 2.2.3
-    GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze
+    # @return [ String ] The file to upload when testing GridFS.
+    GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze
 
-    # The file path and base name for the LDJSON files.
-    #
     # @return [ String ] The file path and base name for the LDJSON files.
-    #
-    # @since 2.2.3
-    LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze
+    LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze
 
-    # The file path and base name for the outputted LDJSON files.
-    #
-    # @return [ String ] The file path and base name for the outputted LDJSON files.
-    #
-    # @since 2.2.3
-    LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze
+    # @return [ String ] The file path and base name for the emitted LDJSON files.
+    LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze
 
-    # The file path and base name for the GRIDFS files to upload.
-    #
     # @return [ String ] The file path and base name for the GRIDFS files to upload.
-    #
-    # @since 2.2.3
-    GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze
+    GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze
 
-    # The file path and base name for the outputted GRIDFS downloaded files.
-    #
-    # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files.
-    #
-    # @since 2.2.3
-    GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze
+    # @return [ String ] The file path and base name for the emitted GRIDFS downloaded files.
+    GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze
 
-    # The default number of test repetitions.
-    #
     # @return [ Integer ] The number of test repetitions.
-    #
-    # @since 2.2.3
-    TEST_REPETITIONS = 100.freeze
+    TEST_REPETITIONS = 100
 
-    # The number of default warmup repetitions of the test to do before
-    # recording times.
-    #
-    # @return [ Integer ] The default number of warmup repetitions.
+    # Convenience helper for loading the single tweet document.
     #
-    # @since 2.2.3
-    WARMUP_REPETITIONS = 10.freeze
-
+    # @return [ Hash ] a single parsed JSON document
     def tweet_document
       Benchmarking.load_file(TWEET_DOCUMENT_FILE).first
     end
 
+    # Convenience helper for loading the single small document.
+    #
+    # @return [ Hash ] a single parsed JSON document
     def small_document
       Benchmarking.load_file(SMALL_DOCUMENT_FILE).first
     end
 
+    # Convenience helper for loading the single large document.
+    #
+    # @return [ Hash ] a single parsed JSON document
     def large_document
       Benchmarking.load_file(LARGE_DOCUMENT_FILE).first
     end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
@@ -1,11 +1,8 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 module Mongo
-
   # Helper functions used by benchmarking tasks
   module Benchmarking
-
     extend self
 
     # Load a json file and represent each document as a Hash.
@@ -19,7 +16,7 @@ module Benchmarking
     #
     # @since 2.2.3
     def load_file(file_name)
-      File.open(file_name, "r") do |f|
+      File.open(file_name, 'r') do |f|
         f.each_line.collect do |line|
           parse_json(line)
         end
@@ -39,8 +36,47 @@ def load_file(file_name)
     # @since 2.2.3
     def parse_json(document)
       JSON.parse(document).tap do |doc|
-        if doc['_id'] && doc['_id']['$oid']
-          doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid'])
+        doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
+      end
+    end
+
+    # The spec requires that most benchmarks use a variable number of
+    # iterations, defined as follows:
+    #
+    # * iterations should loop for at least 1 minute cumulative execution
+    #   time
+    # * iterations should stop after 100 iterations or 5 minutes cumulative
+    #   execution time, whichever is shorter
+    #
+    # This method will yield once for each iteration.
+    #
+    # @param [ Integer ] max_iterations the maximum number of iterations to
+    #   attempt (default: 100)
+    # @param [ Integer ] min_time the minimum number of seconds to spend
+    #   iterating
+    # @param [ Integer ] max_time the maximum number of seconds to spend
+    #   iterating.
+    #
+    # @return [ Array<Float> ] the timings for each iteration
+    def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block)
+      [].tap do |results|
+        iteration_count = 0
+        cumulative_time = 0
+
+        loop do
+          timing = Benchmark.realtime(&block)
+
+          iteration_count += 1
+          cumulative_time += timing
+          results.push timing
+
+          # always stop after the maximum time has elapsed, regardless of
+          # iteration count.
+          break if cumulative_time > max_time
+
+          # otherwise, break if the minimum time has elapsed, and the maximum
+          # number of iterations have been reached.
+          break if cumulative_time >= min_time && iteration_count >= max_iterations
         end
       end
     end
@@ -56,7 +92,8 @@ def parse_json(document)
     #
     # @since 2.2.3
     def median(values)
-      values.sort![values.size/2-1]
+      i = (values.size / 2) - 1
+      values.sort[i]
     end
   end
 end
diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb
@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 # Copyright (C) 2015-2020 MongoDB Inc.
 #
@@ -17,13 +16,11 @@
 
 module Mongo
   module Benchmarking
-
     # These tests focus on BSON encoding and decoding; they are client-side only and
     # do not involve any transmission of data to or from the server.
     #
     # @since 2.2.3
     module Micro
-
       extend self
 
       # Run a micro benchmark test.
@@ -38,10 +35,11 @@ module Micro
       #
       # @since 2.2.3
       def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
-        file_name = type.to_s << "_bson.json"
+        file_name = type.to_s << '_bson.json'
         GC.disable
-        file_path = [Benchmarking::DATA_PATH, file_name].join('/')
+        file_path = [ Benchmarking::DATA_PATH, file_name ].join('/')
         puts "#{action} : #{send(action, file_path, repetitions)}"
+        GC.enable
       end
 
       # Run an encoding micro benchmark test.
@@ -59,16 +57,8 @@ def encode(file_name, repetitions)
         data = Benchmarking.load_file(file_name)
         document = BSON::Document.new(data.first)
 
-        # WARMUP_REPETITIONS.times do
-        #   doc.to_bson
-        # end
-
-        results = repetitions.times.collect do
-          Benchmark.realtime do
-            10_000.times do
-              document.to_bson
-            end
-          end
+        results = Benchmarking.benchmark(max_iterations: repetitions) do
+          10_000.times { document.to_bson }
         end
         Benchmarking.median(results)
       end
@@ -88,18 +78,13 @@ def decode(file_name, repetitions)
         data = Benchmarking.load_file(file_name)
         buffer = BSON::Document.new(data.first).to_bson
 
-        # WARMUP_REPETITIONS.times do
-        #   BSON::Document.from_bson(buffers.shift)
-        # end
-
-        results = repetitions.times.collect do
-          Benchmark.realtime do
-            10_000.times do
-              BSON::Document.from_bson(buffer)
-              buffer.rewind!
-            end
+        results = Benchmarking.benchmark(max_iterations: repetitions) do
+          10_000.times do
+            BSON::Document.from_bson(buffer)
+            buffer.rewind!
           end
         end
+
         Benchmarking.median(results)
       end
     end