RUBY-3313 Record benchmark percentiles (mongodb#2772)

jamis · comandeo-mongo · commit 6e88e3e97cad · 2024-01-24T17:43:04.000+01:00
* RUBY-3314 Implement variable iterations for benchmarks

* report percentiles along with the median

* rename Benchmarking::Micro to Benchmarking::BSON

* refactoring to appease rubocop
diff --git a/.rubocop.yml b/.rubocop.yml
@@ -86,6 +86,9 @@ Style/Documentation:
   Exclude:
     - 'spec/**/*'
 
+Style/FormatStringToken:
+  Enabled: false
+
 Style/ModuleFunction:
   EnforcedStyle: extend_self
 
diff --git a/Rakefile b/Rakefile
@@ -135,24 +135,32 @@ require_relative "profile/benchmarking"
 
 # Some require data files, available from the drivers team. See the comments above each task for details."
 namespace :benchmark do
-  desc "Run the driver benchmark tests."
-
-  namespace :micro do
-    desc "Run the common driver micro benchmarking tests"
+  desc "Run the bson benchmarking tests"
+  task :bson do
+    puts "BSON BENCHMARK"
+    Mongo::Benchmarking.report({
+      bson: Mongo::Benchmarking::BSON.run_all({
+        flat: %i[ encode decode ],
+        deep: %i[ encode decode ],
+        full: %i[ encode decode ],
+      })
+    })
+  end
 
+  namespace :bson do
     namespace :flat do
       desc "Benchmarking for flat bson documents."
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: FLAT:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:flat, :encode)
+        puts "BSON BENCHMARK :: FLAT :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: FLAT:: DECODE"
-        Mongo::Benchmarking::Micro.run(:flat, :decode)
+        puts "BSON BENCHMARK :: FLAT :: DECODE"
+        Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } })
       end
     end
 
@@ -161,14 +169,14 @@ namespace :benchmark do
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: DEEP:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:deep, :encode)
+        puts "BSON BENCHMARK :: DEEP :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: DEEP:: DECODE"
-        Mongo::Benchmarking::Micro.run(:deep, :decode)
+        puts "BSON BENCHMARK :: DEEP :: DECODE"
+        Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } })
       end
     end
 
@@ -177,14 +185,14 @@ namespace :benchmark do
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: FULL:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:full, :encode)
+        puts "BSON BENCHMARK :: FULL :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: FULL:: DECODE"
-        Mongo::Benchmarking::Micro.run(:full, :decode)
+        puts "BSON BENCHMARK :: FULL :: DECODE"
+        Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } })
       end
     end
   end
diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb
@@ -16,7 +16,7 @@
 
 require 'benchmark'
 require_relative 'benchmarking/helper'
-require_relative 'benchmarking/micro'
+require_relative 'benchmarking/bson'
 require_relative 'benchmarking/single_doc'
 require_relative 'benchmarking/multi_doc'
 require_relative 'benchmarking/parallel'
diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb
@@ -18,74 +18,82 @@ module Mongo
   module Benchmarking
     # These tests focus on BSON encoding and decoding; they are client-side only and
     # do not involve any transmission of data to or from the server.
-    #
-    # @since 2.2.3
-    module Micro
+    module BSON
       extend self
 
-      # Run a micro benchmark test.
+      # Runs all of the benchmarks specified by the given mapping.
+      #
+      # @example Run a collection of benchmarks.
+      #   Benchmarking::BSON.run_all(
+      #     flat: %i[ encode decode ],
+      #     deep: %i[ encode decode ],
+      #     full: %i[ encode decode ]
+      #   )
+      #
+      # @return [ Hash ] a hash of the results for each benchmark
+      def run_all(map)
+        {}.tap do |results|
+          map.each do |type, actions|
+            results[type] = {}
+
+            actions.each do |action|
+              results[type][action] = run(type, action)
+            end
+          end
+        end
+      end
+
+      # Run a BSON benchmark test.
       #
       # @example Run a test.
-      #   Benchmarking::Micro.run(:flat)
+      #   Benchmarking::BSON.run(:flat)
       #
       # @param [ Symbol ] type The type of test to run.
-      # @param [ Integer ] repetitions The number of test repetitions.
-      #
-      # @return [ Numeric ] The test results.
+      # @param [ :encode | :decode ] action The action to perform.
       #
-      # @since 2.2.3
-      def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
-        file_name = type.to_s << '_bson.json'
-        GC.disable
-        file_path = [ Benchmarking::DATA_PATH, file_name ].join('/')
-        puts "#{action} : #{send(action, file_path, repetitions)}"
-        GC.enable
+      # @return [ Array<Number> ] The test results for each iteration
+      def run(type, action)
+        file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
+        Benchmarking.without_gc { send(action, file_path) }
       end
 
-      # Run an encoding micro benchmark test.
+      # Run an encoding BSON benchmark test.
       #
       # @example Run an encoding test.
-      #   Benchmarking::Micro.encode(file_name)
+      #   Benchmarking::BSON.encode(file_name)
       #
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
       #
-      # @return [ Numeric ] The median of the results.
-      #
-      # @since 2.2.3
-      def encode(file_name, repetitions)
+      # @return [ Array<Numeric> ] The list of the results for each iteration
+      def encode(file_name)
         data = Benchmarking.load_file(file_name)
-        document = BSON::Document.new(data.first)
+        document = ::BSON::Document.new(data.first)
 
-        results = Benchmarking.benchmark(max_iterations: repetitions) do
+        Benchmarking.benchmark do
           10_000.times { document.to_bson }
         end
-        Benchmarking.median(results)
       end
 
-      # Run a decoding micro benchmark test.
+      # Run a decoding BSON benchmark test.
       #
       # @example Run an decoding test.
-      #   Benchmarking::Micro.decode(file_name)
+      #   Benchmarking::BSON.decode(file_name)
       #
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
       #
-      # @return [ Numeric ] The median of the results.
-      #
-      # @since 2.2.3
-      def decode(file_name, repetitions)
+      # @return [ Array<Numeric> ] The list of the results for each iteration
+      def decode(file_name)
         data = Benchmarking.load_file(file_name)
-        buffer = BSON::Document.new(data.first).to_bson
+        buffer = ::BSON::Document.new(data.first).to_bson
 
-        results = Benchmarking.benchmark(max_iterations: repetitions) do
+        Benchmarking.benchmark do
           10_000.times do
-            BSON::Document.from_bson(buffer)
+            ::BSON::Document.from_bson(buffer)
             buffer.rewind!
           end
         end
-
-        Benchmarking.median(results)
       end
     end
   end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
@@ -36,7 +36,7 @@ def load_file(file_name)
     # @since 2.2.3
     def parse_json(document)
       JSON.parse(document).tap do |doc|
-        doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
+        doc['_id'] = ::BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
       end
     end
 
@@ -81,19 +81,83 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
       end
     end
 
+    # Formats and displays a report of the given results.
+    #
+    # @param [ Hash ] results the results of a benchmarking run.
+    # @param [ Integer ] indent how much the report should be indented.
+    # @param [ Array<Numeric> ] percentiles the percentile values to report
+    def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
+      results.each do |key, value|
+        puts format('%*s%s:', indent, '', key)
+        if value.is_a?(Hash)
+          report(value, indent: indent + 2, percentiles: percentiles)
+        else
+          report_result(value, indent, percentiles)
+        end
+      end
+    end
+
+    # A utility class for returning the list item at a given percentile
+    # value.
+    class Percentiles
+      # @return [ Array<Number> ] the sorted list of numbers to consider
+      attr_reader :list
+
+      # Create a new Percentiles object that encapsulates the given list of
+      # numbers.
+      #
+      # @param [ Array<Number> ] list the list of numbers to considier
+      def initialize(list)
+        @list = list.sort
+      end
+
+      # Finds and returns the element in the list that represents the given
+      # percentile value.
+      #
+      # @param [ Number ] percentile a number in the range [1,100]
+      #
+      # @return [ Number ] the element of the list for the given percentile.
+      def [](percentile)
+        i = (list.size * percentile / 100.0).ceil - 1
+        list[i]
+      end
+    end
+
     # Get the median of values in a list.
     #
     # @example Get the median.
     #   Benchmarking.median(values)
     #
-    # @param [ Array ] The values to get the median of.
+    # @param [ Array ] values The values to get the median of.
     #
     # @return [ Numeric ] The median of the list.
-    #
-    # @since 2.2.3
     def median(values)
       i = (values.size / 2) - 1
       values.sort[i]
     end
+
+    # Runs a given block with GC disabled.
+    def without_gc
+      GC.disable
+      yield
+    ensure
+      GC.enable
+    end
+
+    private
+
+    # Formats and displays the results of a single benchmark run.
+    #
+    # @param [ Array<Numeric> ] results the results to report
+    # @param [ Integer ] indent how much the report should be indented
+    # @param [ Array<Numeric> ] percentiles the percentiles to report
+    def report_result(results, indent, percentiles)
+      ps = Percentiles.new(results)
+      puts format('%*smedian: %g', indent + 2, '', ps[50])
+      puts format('%*spercentiles:', indent + 2, '')
+      percentiles.each do |pct|
+        puts format('%*s%g: %g', indent + 4, '', pct, ps[pct])
+      end
+    end
   end
 end