From e56f3c3c0d8966bc5769f048d1097427fc0064b1 Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Mon, 14 Aug 2023 12:28:26 -0600
Subject: [PATCH 1/8] RUBY-3314 Implement variable iterations for benchmarks

---
 .gitignore                     |  1 +
 profile/benchmarking.rb        | 97 +++++++++-------------------------
 profile/benchmarking/helper.rb | 51 +++++++++++++++---
 profile/benchmarking/micro.rb  | 35 ++++--------
 4 files changed, 79 insertions(+), 105 deletions(-)

diff --git a/.gitignore b/.gitignore
index a60fb2cf76..7af488f10d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock
 .env.private*
 .env
 build
+profile/benchmarking/data
diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb
index a054b3906b..43797982b2 100644
--- a/profile/benchmarking.rb
+++ b/profile/benchmarking.rb
@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 # Copyright (C) 2015-2020 MongoDB Inc.
 #
@@ -23,107 +22,59 @@
 require_relative 'benchmarking/parallel'
 
 module Mongo
-
   # Module with all functionality for running driver benchmark tests.
   #
   # @since 2.2.3
   module Benchmarking
-
     extend self
 
-    # The current path.
-    #
-    # @return [ String ] The current path.
-    #
-    # @since 2.2.3
-    CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze
-
-    # The path to data files used in Benchmarking tests.
-    #
     # @return [ String ] Path to Benchmarking test files.
-    #
-    # @since 2.2.3
-    DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze
+    DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze
 
-    # The file containing the single tweet document.
-    #
-    # @return [ String ] The file containing the tweet document.
-    #
-    # @since 2.2.3
-    TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze
+    # @return [ String ] The file containing the single tweet document.
+    TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze
 
-    # The file containing the single small document.
-    #
-    # @return [ String ] The file containing the small document.
-    #
-    # @since 2.2.3
-    SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze
+    # @return [ String ] The file containing the single small document.
+    SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze
 
-    # The file containing the single large document.
-    #
-    # @return [ String ] The file containing the large document.
-    #
-    # @since 2.2.3
-    LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze
+    # @return [ String ] The file containing the single large document.
+    LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze
 
-    # The file to upload when testing GridFS.
-    #
-    # @return [ String ] The file containing the GridFS test data.
-    #
-    # @since 2.2.3
-    GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze
+    # @return [ String ] The file to upload when testing GridFS.
+    GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze
 
-    # The file path and base name for the LDJSON files.
-    #
     # @return [ String ] The file path and base name for the LDJSON files.
-    #
-    # @since 2.2.3
-    LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze
+    LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze
 
-    # The file path and base name for the outputted LDJSON files.
-    #
-    # @return [ String ] The file path and base name for the outputted LDJSON files.
-    #
-    # @since 2.2.3
-    LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze
+    # @return [ String ] The file path and base name for the emitted LDJSON files.
+    LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze
 
-    # The file path and base name for the GRIDFS files to upload.
-    #
     # @return [ String ] The file path and base name for the GRIDFS files to upload.
-    #
-    # @since 2.2.3
-    GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze
+    GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze
 
-    # The file path and base name for the outputted GRIDFS downloaded files.
-    #
-    # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files.
-    #
-    # @since 2.2.3
-    GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze
+    # @return [ String ] The file path and base name for the emitted GRIDFS downloaded files.
+    GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze
 
-    # The default number of test repetitions.
-    #
     # @return [ Integer ] The number of test repetitions.
-    #
-    # @since 2.2.3
-    TEST_REPETITIONS = 100.freeze
+    TEST_REPETITIONS = 100
 
-    # The number of default warmup repetitions of the test to do before
-    # recording times.
-    #
-    # @return [ Integer ] The default number of warmup repetitions.
+    # Convenience helper for loading the single tweet document.
     #
-    # @since 2.2.3
-    WARMUP_REPETITIONS = 10.freeze
-
+    # @return [ Hash ] a single parsed JSON document
     def tweet_document
       Benchmarking.load_file(TWEET_DOCUMENT_FILE).first
     end
 
+    # Convenience helper for loading the single small document.
+    #
+    # @return [ Hash ] a single parsed JSON document
     def small_document
       Benchmarking.load_file(SMALL_DOCUMENT_FILE).first
     end
 
+    # Convenience helper for loading the single large document.
+    #
+    # @return [ Hash ] a single parsed JSON document
     def large_document
       Benchmarking.load_file(LARGE_DOCUMENT_FILE).first
     end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index 11d9f670ab..654eddbfd9 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -1,11 +1,8 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 module Mongo
-
   # Helper functions used by benchmarking tasks
   module Benchmarking
-
     extend self
 
     # Load a json file and represent each document as a Hash.
@@ -19,7 +16,7 @@ module Benchmarking
     #
     # @since 2.2.3
     def load_file(file_name)
-      File.open(file_name, "r") do |f|
+      File.open(file_name, 'r') do |f|
         f.each_line.collect do |line|
           parse_json(line)
         end
@@ -39,8 +36,47 @@ def load_file(file_name)
     # @since 2.2.3
     def parse_json(document)
       JSON.parse(document).tap do |doc|
-        if doc['_id'] && doc['_id']['$oid']
-          doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid'])
+        doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
+      end
+    end
+
+    # The spec requires that most benchmarks use a variable number of
+    # iterations, defined as follows:
+    #
+    # * iterations should loop for at least 1 minute cumulative execution
+    #   time
+    # * iterations should stop after 100 iterations or 5 minutes cumulative
+    #   execution time, whichever is shorter
+    #
+    # This method will yield once for each iteration.
+    #
+    # @param [ Integer ] max_iterations the maximum number of iterations to
+    #   attempt (default: 100)
+    # @param [ Integer ] min_time the minimum number of seconds to spend
+    #   iterating
+    # @param [ Integer ] max_time the maximum number of seconds to spend
+    #   iterating.
+    #
+    # @return [ Array<Float> ] the timings for each iteration
+    def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block)
+      [].tap do |results|
+        iteration_count = 0
+        cumulative_time = 0
+
+        loop do
+          timing = Benchmark.realtime(&block)
+
+          iteration_count += 1
+          cumulative_time += timing
+          results.push timing
+
+          # always stop after the maximum time has elapsed, regardless of
+          # iteration count.
+          break if cumulative_time > max_time
+
+          # otherwise, break if the minimum time has elapsed, and the maximum
+          # number of iterations have been reached.
+          break if cumulative_time >= min_time && iteration_count >= max_iterations
         end
       end
     end
@@ -56,7 +92,8 @@ def parse_json(document)
     #
     # @since 2.2.3
     def median(values)
-      values.sort![values.size/2-1]
+      i = (values.size / 2) - 1
+      values.sort[i]
     end
   end
 end
diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb
index 2f560bb310..99edf2ae10 100644
--- a/profile/benchmarking/micro.rb
+++ b/profile/benchmarking/micro.rb
@@ -1,5 +1,4 @@
 # frozen_string_literal: true
-# rubocop:todo all
 
 # Copyright (C) 2015-2020 MongoDB Inc.
 #
@@ -17,13 +16,11 @@
 
 module Mongo
   module Benchmarking
-
     # These tests focus on BSON encoding and decoding; they are client-side only and
     # do not involve any transmission of data to or from the server.
     #
     # @since 2.2.3
     module Micro
-
       extend self
 
       # Run a micro benchmark test.
@@ -38,10 +35,11 @@ module Micro
       #
       # @since 2.2.3
       def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
-        file_name = type.to_s << "_bson.json"
+        file_name = type.to_s << '_bson.json'
         GC.disable
-        file_path = [Benchmarking::DATA_PATH, file_name].join('/')
+        file_path = [ Benchmarking::DATA_PATH, file_name ].join('/')
         puts "#{action} : #{send(action, file_path, repetitions)}"
+        GC.enable
       end
 
       # Run an encoding micro benchmark test.
@@ -59,16 +57,8 @@ def encode(file_name, repetitions)
         data = Benchmarking.load_file(file_name)
         document = BSON::Document.new(data.first)
 
-        # WARMUP_REPETITIONS.times do
-        #   doc.to_bson
-        # end
-
-        results = repetitions.times.collect do
-          Benchmark.realtime do
-            10_000.times do
-              document.to_bson
-            end
-          end
+        results = Benchmarking.benchmark(max_iterations: repetitions) do
+          10_000.times { document.to_bson }
         end
         Benchmarking.median(results)
       end
@@ -88,18 +78,13 @@ def decode(file_name, repetitions)
         data = Benchmarking.load_file(file_name)
         buffer = BSON::Document.new(data.first).to_bson
 
-        # WARMUP_REPETITIONS.times do
-        #   BSON::Document.from_bson(buffers.shift)
-        # end
-
-        results = repetitions.times.collect do
-          Benchmark.realtime do
-            10_000.times do
-              BSON::Document.from_bson(buffer)
-              buffer.rewind!
-            end
+        results = Benchmarking.benchmark(max_iterations: repetitions) do
+          10_000.times do
+            BSON::Document.from_bson(buffer)
+            buffer.rewind!
           end
         end
+
         Benchmarking.median(results)
       end
     end

From 897fc1c58c1188da0d19746767b28f7381a7406b Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Mon, 14 Aug 2023 13:52:28 -0600
Subject: [PATCH 2/8] report percentiles along with the median

---
 Rakefile                       | 10 +++---
 profile/benchmarking/helper.rb | 60 ++++++++++++++++++++++++++++++++--
 profile/benchmarking/micro.rb  | 58 ++++++++++++++++++--------------
 3 files changed, 94 insertions(+), 34 deletions(-)

diff --git a/Rakefile b/Rakefile
index f64a790c12..e954ca3f9c 100644
--- a/Rakefile
+++ b/Rakefile
@@ -137,22 +137,20 @@ require_relative "profile/benchmarking"
 namespace :benchmark do
   desc "Run the driver benchmark tests."
 
-  namespace :micro do
-    desc "Run the common driver micro benchmarking tests"
+  namespace :bson do
+    desc "Run the bson benchmarking tests"
 
     namespace :flat do
       desc "Benchmarking for flat bson documents."
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: FLAT:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:flat, :encode)
+        Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: FLAT:: DECODE"
-        Mongo::Benchmarking::Micro.run(:flat, :decode)
+        Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } })
       end
     end
 
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index 654eddbfd9..73b4d8bb84 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -81,19 +81,73 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
       end
     end
 
+    # Formats and displays a report of the given results.
+    #
+    # @param [ Hash ] results the results of a benchmarking run.
+    # @param [ Integer ] indent how much the report should be indented.
+    # @param [ Array<Number> ] percentiles the percentile values to report
+    def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
+      indentation = ' ' * indent
+      results.each do |key, value|
+        puts "#{indentation}#{key}:"
+        if value.is_a?(Hash)
+          report(value, indent: indent + 2, percentiles: percentiles)
+        else
+          ps = Percentiles.new(value)
+          puts "#{indentation}  median: %g" % [ ps[50] ]
+          puts "#{indentation}  percentiles:"
+          percentiles.each do |pct|
+            puts "#{indentation}    %g: %g" % [ pct, ps[pct] ]
+          end
+        end
+      end
+    end
+
+    # A utility class for returning the list item at a given percentile
+    # value.
+    class Percentiles
+      # @return [ Array<Number> ] the sorted list of numbers to consider
+      attr_reader :list
+
+      # Create a new Percentiles object that encapsulates the given list of
+      # numbers.
+      #
+      # @param [ Array<Number> ] list the list of numbers to considier
+      def initialize(list)
+        @list = list.sort
+      end
+
+      # Finds and returns the element in the list that represents the given
+      # percentile value.
+      #
+      # @param [ Number ] percentile a number in the range [1,100]
+      #
+      # @return [ Number ] the element of the list for the given percentile.
+      def [](percentile)
+        i = (list.size * percentile / 100.0).ceil - 1
+        list[i]
+      end
+    end
+
     # Get the median of values in a list.
     #
     # @example Get the median.
     #   Benchmarking.median(values)
     #
-    # @param [ Array ] The values to get the median of.
+    # @param [ Array ] values The values to get the median of.
     #
     # @return [ Numeric ] The median of the list.
-    #
-    # @since 2.2.3
     def median(values)
       i = (values.size / 2) - 1
       values.sort[i]
     end
+
+    # Runs a given block with GC disabled.
+    def without_gc
+      GC.disable
+      yield
+    ensure
+      GC.enable
+    end
   end
 end
diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb
index 99edf2ae10..80899983c9 100644
--- a/profile/benchmarking/micro.rb
+++ b/profile/benchmarking/micro.rb
@@ -18,28 +18,43 @@ module Mongo
   module Benchmarking
     # These tests focus on BSON encoding and decoding; they are client-side only and
     # do not involve any transmission of data to or from the server.
-    #
-    # @since 2.2.3
     module Micro
       extend self
 
+      # Runs all of the benchmarks specified by the given mapping.
+      #
+      # @example Run a collection of benchmarks.
+      #   Benchmarking::Micro.run_all(
+      #     flat: %i[ encode decode ],
+      #     deep: %i[ encode decode ],
+      #     full: %i[ encode decode ]
+      #   )
+      #
+      # @return [ Hash ] a hash of the results for each benchmark
+      def run_all(map)
+        {}.tap do |results|
+          map.each do |type, actions|
+            results[type] = {}
+
+            actions.each do |action|
+              results[type][action] = run(type, action)
+            end
+          end
+        end
+      end
+
       # Run a micro benchmark test.
       #
       # @example Run a test.
       #   Benchmarking::Micro.run(:flat)
       #
       # @param [ Symbol ] type The type of test to run.
-      # @param [ Integer ] repetitions The number of test repetitions.
-      #
-      # @return [ Numeric ] The test results.
+      # @param [ :encode | :decode ] action The action to perform.
       #
-      # @since 2.2.3
-      def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
-        file_name = type.to_s << '_bson.json'
-        GC.disable
-        file_path = [ Benchmarking::DATA_PATH, file_name ].join('/')
-        puts "#{action} : #{send(action, file_path, repetitions)}"
-        GC.enable
+      # @return [ Array<Number> ] The test results for each iteration
+      def run(type, action)
+        file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
+        Benchmarking.without_gc { send(action, file_path) }
       end
 
       # Run an encoding micro benchmark test.
@@ -50,17 +65,14 @@ def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
       #
-      # @return [ Numeric ] The median of the results.
-      #
-      # @since 2.2.3
-      def encode(file_name, repetitions)
+      # @return [ Array<Numeric> ] The list of the results for each iteration
+      def encode(file_name)
         data = Benchmarking.load_file(file_name)
         document = BSON::Document.new(data.first)
 
-        results = Benchmarking.benchmark(max_iterations: repetitions) do
+        Benchmarking.benchmark do
           10_000.times { document.to_bson }
         end
-        Benchmarking.median(results)
       end
 
       # Run a decoding micro benchmark test.
@@ -71,21 +83,17 @@ def encode(file_name, repetitions)
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
       #
-      # @return [ Numeric ] The median of the results.
-      #
-      # @since 2.2.3
-      def decode(file_name, repetitions)
+      # @return [ Array<Numeric> ] The list of the results for each iteration
+      def decode(file_name)
         data = Benchmarking.load_file(file_name)
         buffer = BSON::Document.new(data.first).to_bson
 
-        results = Benchmarking.benchmark(max_iterations: repetitions) do
+        Benchmarking.benchmark do
           10_000.times do
             BSON::Document.from_bson(buffer)
             buffer.rewind!
           end
         end
-
-        Benchmarking.median(results)
       end
     end
   end

From 072b56a3cbc122eaa223e50ca226b9198ca1639c Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Mon, 14 Aug 2023 15:10:30 -0600
Subject: [PATCH 3/8] rename Benchmarking::Micro to Benchmarking::BSON

---
 Rakefile                                   | 36 ++++++++++++++--------
 profile/benchmarking.rb                    |  2 +-
 profile/benchmarking/{micro.rb => bson.rb} | 22 ++++++-------
 profile/benchmarking/helper.rb             |  2 +-
 4 files changed, 36 insertions(+), 26 deletions(-)
 rename profile/benchmarking/{micro.rb => bson.rb} (85%)

diff --git a/Rakefile b/Rakefile
index e954ca3f9c..49733ed4af 100644
--- a/Rakefile
+++ b/Rakefile
@@ -135,22 +135,32 @@ require_relative "profile/benchmarking"
 
 # Some require data files, available from the drivers team. See the comments above each task for details."
 namespace :benchmark do
-  desc "Run the driver benchmark tests."
+  desc "Run the bson benchmarking tests"
+  task :bson do
+    puts "BSON BENCHMARK"
+    Mongo::Benchmarking.report({
+      bson: Mongo::Benchmarking::BSON.run_all({
+        flat: %i[ encode decode ],
+        deep: %i[ encode decode ],
+        full: %i[ encode decode ],
+      })
+    })
+  end
 
   namespace :bson do
-    desc "Run the bson benchmarking tests"
-
     namespace :flat do
       desc "Benchmarking for flat bson documents."
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :encode do
-        Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } })
+        puts "BSON BENCHMARK :: FLAT :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
       task :decode do
-        Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } })
+        puts "BSON BENCHMARK :: FLAT :: DECODE"
+        Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } })
       end
     end
 
@@ -159,14 +169,14 @@ namespace :benchmark do
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: DEEP:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:deep, :encode)
+        puts "BSON BENCHMARK :: DEEP :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: DEEP:: DECODE"
-        Mongo::Benchmarking::Micro.run(:deep, :decode)
+        puts "BSON BENCHMARK :: DEEP :: DECODE"
+        Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } })
       end
     end
 
@@ -175,14 +185,14 @@ namespace :benchmark do
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
       task :encode do
-        puts "MICRO BENCHMARK:: FULL:: ENCODE"
-        Mongo::Benchmarking::Micro.run(:full, :encode)
+        puts "BSON BENCHMARK :: FULL :: ENCODE"
+        Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } })
       end
 
       # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
       task :decode do
-        puts "MICRO BENCHMARK:: FULL:: DECODE"
-        Mongo::Benchmarking::Micro.run(:full, :decode)
+        puts "BSON BENCHMARK :: FULL :: DECODE"
+        Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } })
       end
     end
   end
diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb
index 43797982b2..08f6b20423 100644
--- a/profile/benchmarking.rb
+++ b/profile/benchmarking.rb
@@ -16,7 +16,7 @@
 
 require 'benchmark'
 require_relative 'benchmarking/helper'
-require_relative 'benchmarking/micro'
+require_relative 'benchmarking/bson'
 require_relative 'benchmarking/single_doc'
 require_relative 'benchmarking/multi_doc'
 require_relative 'benchmarking/parallel'
diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/bson.rb
similarity index 85%
rename from profile/benchmarking/micro.rb
rename to profile/benchmarking/bson.rb
index 80899983c9..0a3667b261 100644
--- a/profile/benchmarking/micro.rb
+++ b/profile/benchmarking/bson.rb
@@ -18,13 +18,13 @@ module Mongo
   module Benchmarking
     # These tests focus on BSON encoding and decoding; they are client-side only and
     # do not involve any transmission of data to or from the server.
-    module Micro
+    module BSON
       extend self
 
       # Runs all of the benchmarks specified by the given mapping.
       #
       # @example Run a collection of benchmarks.
-      #   Benchmarking::Micro.run_all(
+      #   Benchmarking::BSON.run_all(
       #     flat: %i[ encode decode ],
       #     deep: %i[ encode decode ],
       #     full: %i[ encode decode ]
@@ -43,10 +43,10 @@ def run_all(map)
         end
       end
 
-      # Run a micro benchmark test.
+      # Run a BSON benchmark test.
       #
       # @example Run a test.
-      #   Benchmarking::Micro.run(:flat)
+      #   Benchmarking::BSON.run(:flat)
       #
       # @param [ Symbol ] type The type of test to run.
       # @param [ :encode | :decode ] action The action to perform.
@@ -57,10 +57,10 @@ def run(type, action)
         Benchmarking.without_gc { send(action, file_path) }
       end
 
-      # Run an encoding micro benchmark test.
+      # Run an encoding BSON benchmark test.
       #
       # @example Run an encoding test.
-      #   Benchmarking::Micro.encode(file_name)
+      #   Benchmarking::BSON.encode(file_name)
       #
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
@@ -68,17 +68,17 @@ def run(type, action)
       # @return [ Array<Numeric> ] The list of the results for each iteration
       def encode(file_name)
         data = Benchmarking.load_file(file_name)
-        document = BSON::Document.new(data.first)
+        document = ::BSON::Document.new(data.first)
 
         Benchmarking.benchmark do
           10_000.times { document.to_bson }
         end
       end
 
-      # Run a decoding micro benchmark test.
+      # Run a decoding BSON benchmark test.
       #
       # @example Run an decoding test.
-      #   Benchmarking::Micro.decode(file_name)
+      #   Benchmarking::BSON.decode(file_name)
       #
       # @param [ String ] file_name The name of the file with data for the test.
       # @param [ Integer ] repetitions The number of test repetitions.
@@ -86,11 +86,11 @@ def encode(file_name)
       # @return [ Array<Numeric> ] The list of the results for each iteration
       def decode(file_name)
         data = Benchmarking.load_file(file_name)
-        buffer = BSON::Document.new(data.first).to_bson
+        buffer = ::BSON::Document.new(data.first).to_bson
 
         Benchmarking.benchmark do
           10_000.times do
-            BSON::Document.from_bson(buffer)
+            ::BSON::Document.from_bson(buffer)
             buffer.rewind!
           end
         end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index 73b4d8bb84..e66b292c5b 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -36,7 +36,7 @@ def load_file(file_name)
     # @since 2.2.3
     def parse_json(document)
       JSON.parse(document).tap do |doc|
-        doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
+        doc['_id'] = ::BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
       end
     end
 

From 126e87d13dc2434d1b9c38535d1c13ba221da5bc Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Mon, 14 Aug 2023 16:47:19 -0600
Subject: [PATCH 4/8] refactoring to appease rubocop

---
 .rubocop.yml                   |  3 +++
 profile/benchmarking/helper.rb | 28 +++++++++++++++++++---------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/.rubocop.yml b/.rubocop.yml
index 2b7a7f90f7..6cb04cd597 100644
--- a/.rubocop.yml
+++ b/.rubocop.yml
@@ -83,6 +83,9 @@ Style/Documentation:
   Exclude:
     - 'spec/**/*'
 
+Style/FormatStringToken:
+  Enabled: false
+
 Style/ModuleFunction:
   EnforcedStyle: extend_self
 
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index e66b292c5b..309932964c 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -85,20 +85,14 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
     #
     # @param [ Hash ] results the results of a benchmarking run.
     # @param [ Integer ] indent how much the report should be indented.
-    # @param [ Array<Number> ] percentiles the percentile values to report
+    # @param [ Array<Numeric> ] percentiles the percentile values to report
     def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
-      indentation = ' ' * indent
       results.each do |key, value|
-        puts "#{indentation}#{key}:"
+        puts format('%*s%s:', indent, '', key)
         if value.is_a?(Hash)
           report(value, indent: indent + 2, percentiles: percentiles)
         else
-          ps = Percentiles.new(value)
-          puts "#{indentation}  median: %g" % [ ps[50] ]
-          puts "#{indentation}  percentiles:"
-          percentiles.each do |pct|
-            puts "#{indentation}    %g: %g" % [ pct, ps[pct] ]
-          end
+          report_result(value, indent, percentiles)
         end
       end
     end
@@ -149,5 +143,21 @@ def without_gc
     ensure
       GC.enable
     end
+
+    private
+
+    # Formats and displays the results of a single benchmark run.
+    #
+    # @param [ Array<Numeric> ] results the results to report
+    # @param [ Integer ] indent how much the report should be indented
+    # @param [ Array<Numeric> ] percentiles the percentiles to report
+    def report_result(results, indent, percentiles)
+      ps = Percentiles.new(results)
+      puts format('%*smedian: %g', indent + 2, '', ps[50])
+      puts format('%*spercentiles:', indent + 2, '')
+      percentiles.each do |pct|
+        puts format('%*s%g: %g', indent + 4, '', pct, ps[pct])
+      end
+    end
   end
 end

From e86addc24eee503e43cc5b96da4660d8526de290 Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Tue, 15 Aug 2023 09:41:11 -0600
Subject: [PATCH 5/8] RUBY-3315 benchmark scoring

---
 profile/benchmarking/bson.rb        | 57 +++++++++++++++++++++++++++--
 profile/benchmarking/helper.rb      | 49 ++-----------------------
 profile/benchmarking/percentiles.rb | 31 ++++++++++++++++
 profile/benchmarking/summary.rb     | 56 ++++++++++++++++++++++++++++
 4 files changed, 145 insertions(+), 48 deletions(-)
 create mode 100644 profile/benchmarking/percentiles.rb
 create mode 100644 profile/benchmarking/summary.rb

diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb
index 0a3667b261..88cb979ab2 100644
--- a/profile/benchmarking/bson.rb
+++ b/profile/benchmarking/bson.rb
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+require_relative 'percentiles'
+require_relative 'summary'
+
 module Mongo
   module Benchmarking
     # These tests focus on BSON encoding and decoding; they are client-side only and
@@ -43,6 +46,20 @@ def run_all(map)
         end
       end
 
+      # As defined by the spec, the score for a given benchmark is the
+      # size of the task (in MB) divided by the median wall clock time.
+      #
+      # @param [ Symbol ] type the type of the task
+      # @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles
+      #   object to query for the median time.
+      # @param [ Numeric ] scale the number of times the operation is performed
+      #   per iteration, used to scale the task size.
+      #
+      # @return [ Numeric ] the score for the given task.
+      def score_for(type, percentiles, scale: 10_000)
+        task_size(type, scale) / percentiles[50]
+      end
+
       # Run a BSON benchmark test.
       #
       # @example Run a test.
@@ -51,10 +68,14 @@ def run_all(map)
       # @param [ Symbol ] type The type of test to run.
       # @param [ :encode | :decode ] action The action to perform.
       #
-      # @return [ Array<Number> ] The test results for each iteration
+      # @return [ Hash<:timings,:percentiles,:score> ] The test results for
+      #    the requested benchmark.
       def run(type, action)
-        file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
-        Benchmarking.without_gc { send(action, file_path) }
+        timings = Benchmarking.without_gc { send(action, file_for(type)) }
+        percentiles = Percentiles.new(timings)
+        score = score_for(type, percentiles)
+
+        Summary.new(timings, percentiles, score)
       end
 
       # Run an encoding BSON benchmark test.
@@ -95,6 +116,36 @@ def decode(file_name)
           end
         end
       end
+
+      private
+
+      # The path to the source file for the given task type.
+      #
+      # @param [ Symbol ] type the task type
+      #
+      # @return [ String ] the path to the source file.
+      def file_for(type)
+        File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
+      end
+
+      # As defined by the spec, the size of a BSON task is the size of the
+      # file, multipled by the scale (the number of times the file is processed
+      # per iteration), divided by a million.
+      #
+      # "the dataset size for a task is the size of the single-document source
+      # file...times 10,000 operations"
+      #
+      # "Each task will have defined for it an associated size in
+      # megabytes (MB)"
+      #
+      # @param [ Symbol ] type the type of the task
+      # @param [ Numeric ] scale the number of times the operation is performed
+      #   per iteration (e.g. 10,000)
+      #
+      # @return [ Numeric ] the score for the task, reported in MB
+      def task_size(type, scale)
+        File.size(file_for(type)) * scale / 1_000_000.0
+      end
     end
   end
 end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index 309932964c..b7a09d6e91 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -89,40 +89,15 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
     def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
       results.each do |key, value|
         puts format('%*s%s:', indent, '', key)
-        if value.is_a?(Hash)
-          report(value, indent: indent + 2, percentiles: percentiles)
+
+        if value.respond_to?(:summary)
+          puts value.summary(indent + 2, percentiles)
         else
-          report_result(value, indent, percentiles)
+          report(value, indent: indent + 2, percentiles: percentiles)
         end
       end
     end
 
-    # A utility class for returning the list item at a given percentile
-    # value.
-    class Percentiles
-      # @return [ Array<Number> ] the sorted list of numbers to consider
-      attr_reader :list
-
-      # Create a new Percentiles object that encapsulates the given list of
-      # numbers.
-      #
-      # @param [ Array<Number> ] list the list of numbers to considier
-      def initialize(list)
-        @list = list.sort
-      end
-
-      # Finds and returns the element in the list that represents the given
-      # percentile value.
-      #
-      # @param [ Number ] percentile a number in the range [1,100]
-      #
-      # @return [ Number ] the element of the list for the given percentile.
-      def [](percentile)
-        i = (list.size * percentile / 100.0).ceil - 1
-        list[i]
-      end
-    end
-
     # Get the median of values in a list.
     #
     # @example Get the median.
@@ -143,21 +118,5 @@ def without_gc
     ensure
       GC.enable
     end
-
-    private
-
-    # Formats and displays the results of a single benchmark run.
-    #
-    # @param [ Array<Numeric> ] results the results to report
-    # @param [ Integer ] indent how much the report should be indented
-    # @param [ Array<Numeric> ] percentiles the percentiles to report
-    def report_result(results, indent, percentiles)
-      ps = Percentiles.new(results)
-      puts format('%*smedian: %g', indent + 2, '', ps[50])
-      puts format('%*spercentiles:', indent + 2, '')
-      percentiles.each do |pct|
-        puts format('%*s%g: %g', indent + 4, '', pct, ps[pct])
-      end
-    end
   end
 end
diff --git a/profile/benchmarking/percentiles.rb b/profile/benchmarking/percentiles.rb
new file mode 100644
index 0000000000..aeebe9d1d9
--- /dev/null
+++ b/profile/benchmarking/percentiles.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Mongo
+  module Benchmarking
+    # A utility class for returning the list item at a given percentile
+    # value.
+    class Percentiles
+      # @return [ Array<Number> ] the sorted list of numbers to consider
+      attr_reader :list
+
+      # Create a new Percentiles object that encapsulates the given list of
+      # numbers.
+      #
+      # @param [ Array<Number> ] list the list of numbers to considier
+      def initialize(list)
+        @list = list.sort
+      end
+
+      # Finds and returns the element in the list that represents the given
+      # percentile value.
+      #
+      # @param [ Number ] percentile a number in the range [1,100]
+      #
+      # @return [ Number ] the element of the list for the given percentile.
+      def [](percentile)
+        i = (list.size * percentile / 100.0).ceil - 1
+        list[i]
+      end
+    end
+  end
+end
diff --git a/profile/benchmarking/summary.rb b/profile/benchmarking/summary.rb
new file mode 100644
index 0000000000..93fddf5435
--- /dev/null
+++ b/profile/benchmarking/summary.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Mongo
+  module Benchmarking
+    # A utility class for encapsulating the summary information for a
+    # benchmark, including behaviors for reporting on the summary.
+    class Summary
+      # @return [ Array<Numeric> ] the timings of each iteration in the
+      #   benchmark
+      attr_reader :timings
+
+      # @return [ Percentiles ] the percentiles object for querying the
+      #   timing at a given percentile value.
+      attr_reader :percentiles
+
+      # @return [ Numeric ] the composite score for the benchmark
+      attr_reader :score
+
+      # Construct a new Summary object with the given timings, percentiles,
+      # and score.
+      #
+      # @param [ Array<Numeric> ] timings the timings of each iteration in the
+      #   benchmark
+      # @param [ Percentiles ] percentiles the percentiles object for querying
+      #   the timing at a given percentile value
+      # @param [ Numeric ] score the composite score for the benchmark
+      def initialize(timings, percentiles, score)
+        @timings = timings
+        @percentiles = percentiles
+        @score = score
+      end
+
+      # @return [ Numeric ] the median timing for the benchmark.
+      def median
+        percentiles[50]
+      end
+
+      # Formats and displays the results of a single benchmark run.
+      #
+      # @param [ Integer ] indent how much the report should be indented
+      # @param [ Array<Numeric> ] points the percentile points to report
+      #
+      # @return [ String ] a YAML-formatted summary
+      def summary(indent, points)
+        [].tap do |lines|
+          lines << format('%*sscore: %g', indent, '', score)
+          lines << format('%*smedian: %g', indent, '', median)
+          lines << format('%*spercentiles:', indent, '')
+          points.each do |pct|
+            lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct])
+          end
+        end.join("\n")
+      end
+    end
+  end
+end

From 000765752eba5f01a36cfcbe0625838c556a3737 Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Tue, 15 Aug 2023 10:38:29 -0600
Subject: [PATCH 6/8] clean up the Rakefile

---
 Rakefile                                  | 163 +---------------------
 profile/benchmarking/rake/bson.rake       | 109 +++++++++++++++
 profile/benchmarking/rake/multi_doc.rake  |  33 +++++
 profile/benchmarking/rake/parallel.rake   |  35 +++++
 profile/benchmarking/rake/single_doc.rake |  26 ++++
 profile/benchmarking/rake/tasks.rake      |  11 ++
 6 files changed, 215 insertions(+), 162 deletions(-)
 create mode 100644 profile/benchmarking/rake/bson.rake
 create mode 100644 profile/benchmarking/rake/multi_doc.rake
 create mode 100644 profile/benchmarking/rake/parallel.rake
 create mode 100644 profile/benchmarking/rake/single_doc.rake
 create mode 100644 profile/benchmarking/rake/tasks.rake

diff --git a/Rakefile b/Rakefile
index 49733ed4af..4a4458070d 100644
--- a/Rakefile
+++ b/Rakefile
@@ -131,165 +131,4 @@ namespace :docs do
   end
 end
 
-require_relative "profile/benchmarking"
-
-# Some require data files, available from the drivers team. See the comments above each task for details."
-namespace :benchmark do
-  desc "Run the bson benchmarking tests"
-  task :bson do
-    puts "BSON BENCHMARK"
-    Mongo::Benchmarking.report({
-      bson: Mongo::Benchmarking::BSON.run_all({
-        flat: %i[ encode decode ],
-        deep: %i[ encode decode ],
-        full: %i[ encode decode ],
-      })
-    })
-  end
-
-  namespace :bson do
-    namespace :flat do
-      desc "Benchmarking for flat bson documents."
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
-      task :encode do
-        puts "BSON BENCHMARK :: FLAT :: ENCODE"
-        Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } })
-      end
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
-      task :decode do
-        puts "BSON BENCHMARK :: FLAT :: DECODE"
-        Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } })
-      end
-    end
-
-    namespace :deep do
-      desc "Benchmarking for deep bson documents."
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
-      task :encode do
-        puts "BSON BENCHMARK :: DEEP :: ENCODE"
-        Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } })
-      end
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
-      task :decode do
-        puts "BSON BENCHMARK :: DEEP :: DECODE"
-        Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } })
-      end
-    end
-
-    namespace :full do
-      desc "Benchmarking for full bson documents."
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
-      task :encode do
-        puts "BSON BENCHMARK :: FULL :: ENCODE"
-        Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } })
-      end
-
-      # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
-      task :decode do
-        puts "BSON BENCHMARK :: FULL :: DECODE"
-        Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } })
-      end
-    end
-  end
-
-  namespace :single_doc do
-    desc "Run the common driver single-document benchmarking tests"
-    task :command do
-      puts "SINGLE DOC BENCHMARK:: COMMAND"
-      Mongo::Benchmarking::SingleDoc.run(:command)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
-    task :find_one do
-      puts "SINGLE DOC BENCHMARK:: FIND ONE BY ID"
-      Mongo::Benchmarking::SingleDoc.run(:find_one)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
-    task :insert_one_small do
-      puts "SINGLE DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT"
-      Mongo::Benchmarking::SingleDoc.run(:insert_one_small)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
-    task :insert_one_large do
-      puts "SINGLE DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT"
-      Mongo::Benchmarking::SingleDoc.run(:insert_one_large)
-    end
-  end
-
-  namespace :multi_doc do
-    desc "Run the common driver multi-document benchmarking tests"
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
-    task :find_many do
-      puts "MULTI DOCUMENT BENCHMARK:: FIND MANY"
-      Mongo::Benchmarking::MultiDoc.run(:find_many)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
-    task :bulk_insert_small do
-      puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL"
-      Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
-    task :bulk_insert_large do
-      puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE"
-      Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
-    task :gridfs_upload do
-      puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD"
-      Mongo::Benchmarking::MultiDoc.run(:gridfs_upload)
-    end
-
-    # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
-    task :gridfs_download do
-      puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD"
-      Mongo::Benchmarking::MultiDoc.run(:gridfs_download)
-    end
-  end
-
-  namespace :parallel do
-    desc "Run the common driver paralell ETL benchmarking tests"
-
-    # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
-    # with the files used in this task.
-    task :import do
-      puts "PARALLEL ETL BENCHMARK:: IMPORT"
-      Mongo::Benchmarking::Parallel.run(:import)
-    end
-
-    # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
-    # with the files used in this task.
-    # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI"
-    # called 'output'.
-    task :export do
-      puts "PARALLEL ETL BENCHMARK:: EXPORT"
-      Mongo::Benchmarking::Parallel.run(:export)
-    end
-
-    # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
-    # with the files used in this task.
-    task :gridfs_upload do
-      puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD"
-      Mongo::Benchmarking::Parallel.run(:gridfs_upload)
-    end
-
-    # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
-    # with the files used in this task.
-    # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI"
-    # called 'output'.
-    task :gridfs_download do
-      puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD"
-      Mongo::Benchmarking::Parallel.run(:gridfs_download)
-    end
-  end
-end
+load 'profile/benchmarking/rake/tasks.rake'
diff --git a/profile/benchmarking/rake/bson.rake b/profile/benchmarking/rake/bson.rake
new file mode 100644
index 0000000000..2147eb2220
--- /dev/null
+++ b/profile/benchmarking/rake/bson.rake
@@ -0,0 +1,109 @@
+# frozen_string_literal: true
+
+desc 'Run the full BSON benchmarking suite'
+task :bson do
+  puts 'BSON BENCHMARK SUITE'
+  Mongo::Benchmarking.report({
+    bson: Mongo::Benchmarking::BSON.run_all({
+      flat: %i[ encode decode ],
+      deep: %i[ encode decode ],
+      full: %i[ encode decode ],
+    })
+  })
+end
+
+namespace :bson do
+  desc 'Learn how to run the BSON benchmarks'
+  task :help do
+    puts <<~HELP
+      The BSON micro benchmarks require a set of data files that are stored in
+      the specifications repository, here:
+
+        https://github.com/mongodb/specifications/tree/master/source/benchmarking/data
+
+      Download the `extended_bson.tgz` file and extract its contents. It should
+      contain a single folder (`extended_bson`) with several files in it. Move
+      those files to:
+
+        #{Mongo::Benchmarking::DATA_PATH}
+
+      Once there, you may run any of the BSON benchmarking tasks:
+
+        $ rake benchmark:bson:flat:encode
+
+      Tasks may be run in aggregate, as well, by specifying the namespace
+      directly:
+
+        $ rake benchmark:bson:flat # runs all flat BSON benchmarks
+        $ rake benchmark:bson:deep # runs all deep BSON benchmarks
+        $ rake benchmark:bson:full # runs all full BSON benchmarks
+        # rake benchmark:bson      # runs all BSON benchmarks
+    HELP
+  end
+
+  desc 'Run the `flat` BSON benchmarking suite'
+  task :flat do
+    puts 'BSON BENCHMARK :: FLAT'
+    Mongo::Benchmarking.report({
+      bson: Mongo::Benchmarking::BSON.run_all({ flat: %i[ encode decode ] })
+    })
+  end
+
+  namespace :flat do
+    desc 'Run the `flat` encoding BSON benchmark'
+    task :encode do
+      puts 'BSON BENCHMARK :: FLAT :: ENCODE'
+      Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } })
+    end
+
+    desc 'Run the `flat` decoding BSON benchmark'
+    task :decode do
+      puts 'BSON BENCHMARK :: FLAT :: DECODE'
+      Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } })
+    end
+  end
+
+  desc 'Run the `deep` BSON benchmarking suite'
+  task :deep do
+    puts 'BSON BENCHMARK :: DEEP'
+    Mongo::Benchmarking.report({
+      bson: Mongo::Benchmarking::BSON.run_all({ deep: %i[ encode decode ] })
+    })
+  end
+
+  namespace :deep do
+    desc 'Run the `deep` encoding BSON benchmark'
+    task :encode do
+      puts 'BSON BENCHMARK :: DEEP :: ENCODE'
+      Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } })
+    end
+
+    desc 'Run the `deep` decoding BSON benchmark'
+    task :decode do
+      puts 'BSON BENCHMARK :: DEEP :: DECODE'
+      Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } })
+    end
+  end
+
+  desc 'Run the `full` BSON benchmarking suite'
+  task :full do
+    puts 'BSON BENCHMARK :: FULL'
+    Mongo::Benchmarking.report({
+      bson: Mongo::Benchmarking::BSON.run_all({ full: %i[ encode decode ] })
+    })
+  end
+
+  namespace :full do
+    desc 'Run the `full` encoding BSON benchmark'
+    task :encode do
+      puts 'BSON BENCHMARK :: FULL :: ENCODE'
+      Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } })
+    end
+
+    desc 'Run the `full` decoding BSON benchmark'
+    task :decode do
+      puts 'BSON BENCHMARK :: FULL :: DECODE'
+      Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } })
+    end
+  end
+end
diff --git a/profile/benchmarking/rake/multi_doc.rake b/profile/benchmarking/rake/multi_doc.rake
new file mode 100644
index 0000000000..49d9aab95f
--- /dev/null
+++ b/profile/benchmarking/rake/multi_doc.rake
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+namespace :multi_doc do
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
+  task :find_many do
+    puts 'MULTI DOCUMENT BENCHMARK :: FIND MANY'
+    Mongo::Benchmarking::MultiDoc.run(:find_many)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
+  task :bulk_insert_small do
+    puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT SMALL'
+    Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
+  task :bulk_insert_large do
+    puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT LARGE'
+    Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
+  task :gridfs_upload do
+    puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS UPLOAD'
+    Mongo::Benchmarking::MultiDoc.run(:gridfs_upload)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
+  task :gridfs_download do
+    puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS DOWNLOAD'
+    Mongo::Benchmarking::MultiDoc.run(:gridfs_download)
+  end
+end
diff --git a/profile/benchmarking/rake/parallel.rake b/profile/benchmarking/rake/parallel.rake
new file mode 100644
index 0000000000..c8fa0fd5a2
--- /dev/null
+++ b/profile/benchmarking/rake/parallel.rake
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+namespace :parallel do
+  # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
+  # with the files used in this task.
+  task :import do
+    puts 'PARALLEL ETL BENCHMARK :: IMPORT'
+    Mongo::Benchmarking::Parallel.run(:import)
+  end
+
+  # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
+  # with the files used in this task.
+  # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI'
+  # called 'output'.
+  task :export do
+    puts 'PARALLEL ETL BENCHMARK :: EXPORT'
+    Mongo::Benchmarking::Parallel.run(:export)
+  end
+
+  # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
+  # with the files used in this task.
+  task :gridfs_upload do
+    puts 'PARALLEL ETL BENCHMARK :: GRIDFS UPLOAD'
+    Mongo::Benchmarking::Parallel.run(:gridfs_upload)
+  end
+
+  # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
+  # with the files used in this task.
+  # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI'
+  # called 'output'.
+  task :gridfs_download do
+    puts 'PARALLEL ETL BENCHMARK :: GRIDFS DOWNLOAD'
+    Mongo::Benchmarking::Parallel.run(:gridfs_download)
+  end
+end
diff --git a/profile/benchmarking/rake/single_doc.rake b/profile/benchmarking/rake/single_doc.rake
new file mode 100644
index 0000000000..9280779f0d
--- /dev/null
+++ b/profile/benchmarking/rake/single_doc.rake
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+namespace :single_doc do
+  task :command do
+    puts 'SINGLE DOC BENCHMARK :: COMMAND'
+    Mongo::Benchmarking::SingleDoc.run(:command)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
+  task :find_one do
+    puts 'SINGLE DOC BENCHMARK :: FIND ONE BY ID'
+    Mongo::Benchmarking::SingleDoc.run(:find_one)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
+  task :insert_one_small do
+    puts 'SINGLE DOC BENCHMARK :: INSERT ONE SMALL DOCUMENT'
+    Mongo::Benchmarking::SingleDoc.run(:insert_one_small)
+  end
+
+  # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
+  task :insert_one_large do
+    puts 'SINGLE DOC BENCHMARK :: INSERT ONE LARGE DOCUMENT'
+    Mongo::Benchmarking::SingleDoc.run(:insert_one_large)
+  end
+end
diff --git a/profile/benchmarking/rake/tasks.rake b/profile/benchmarking/rake/tasks.rake
new file mode 100644
index 0000000000..7feae8d2ab
--- /dev/null
+++ b/profile/benchmarking/rake/tasks.rake
@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+
+require_relative '../../benchmarking'
+
+# Some require data files, available from the drivers team.
+# See the comments above each task for details.
+namespace :benchmark do
+  %w[ bson single_doc multi_doc parallel ].each do |group|
+    load File.join(__dir__, "#{group}.rake")
+  end
+end

From b9c312540329cb1df05fdd750a5e4235a2de3c65 Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Tue, 15 Aug 2023 11:10:47 -0600
Subject: [PATCH 7/8] progress indicator while benchmarks are running

---
 profile/benchmarking/bson.rb   |  2 +-
 profile/benchmarking/helper.rb | 46 ++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb
index 88cb979ab2..415e58114e 100644
--- a/profile/benchmarking/bson.rb
+++ b/profile/benchmarking/bson.rb
@@ -71,7 +71,7 @@ def score_for(type, percentiles, scale: 10_000)
       # @return [ Hash<:timings,:percentiles,:score> ] The test results for
       #    the requested benchmark.
       def run(type, action)
-        timings = Benchmarking.without_gc { send(action, file_for(type)) }
+        timings = send(action, file_for(type))
         percentiles = Percentiles.new(timings)
         score = score_for(type, percentiles)
 
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index b7a09d6e91..730e74ae34 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -58,13 +58,21 @@ def parse_json(document)
     #   iterating.
     #
     # @return [ Array<Float> ] the timings for each iteration
-    def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block)
+    def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS,
+                  min_time: 60,
+                  max_time: 5 * 60,
+                  progress: default_progress_callback,
+                  &block)
+      progress ||= -> (state) {} # fallback to a no-op callback
+      progress[:start]
+
       [].tap do |results|
         iteration_count = 0
         cumulative_time = 0
 
         loop do
-          timing = Benchmark.realtime(&block)
+          timing = without_gc { Benchmark.realtime(&block) }
+          progress[:step]
 
           iteration_count += 1
           cumulative_time += timing
@@ -78,6 +86,8 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
           # number of iterations have been reached.
           break if cumulative_time >= min_time && iteration_count >= max_iterations
         end
+
+        progress[:end]
       end
     end
 
@@ -118,5 +128,37 @@ def without_gc
     ensure
       GC.enable
     end
+
+    private
+
+    # Returns the proc object (or nil) corresponding to the "PROGRESS"
+    # environment variable.
+    #
+    # @return [ Proc | nil ] the callback proc to use (or nil if none should
+    #   be used)
+    def default_progress_callback
+      case ENV['PROGRESS']
+      when nil, '0', 'false', 'none'
+        nil
+      when '1', 'true', 'minimal'
+        method(:minimal_progress_callback).to_proc
+      else
+        raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}"
+      end
+    end
+
+    # A minimal progress callback implementation, printing '|' when a benchmark
+    # starts and '.' for each iteration.
+    #
+    # @param [ :start | :step | :end ] state the current progress state
+    def minimal_progress_callback(state)
+      case state
+      when :start then print '|'
+      when :step  then print '.'
+      when :end   then puts
+      end
+
+      $stdout.flush
+    end
   end
 end

From 3f07148c0baae773bdbf51a65c257ba6ee2182fc Mon Sep 17 00:00:00 2001
From: Jamis Buck <jamis.buck@mongodb.com>
Date: Tue, 15 Aug 2023 12:12:30 -0600
Subject: [PATCH 8/8] rubocop

---
 profile/benchmarking/helper.rb            |  6 +++---
 profile/benchmarking/rake/bson.rake       | 25 +++++++++++++++++------
 profile/benchmarking/rake/multi_doc.rake  |  1 +
 profile/benchmarking/rake/parallel.rake   |  1 +
 profile/benchmarking/rake/single_doc.rake |  1 +
 5 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
index 730e74ae34..70d4e34312 100644
--- a/profile/benchmarking/helper.rb
+++ b/profile/benchmarking/helper.rb
@@ -63,7 +63,7 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS,
                   max_time: 5 * 60,
                   progress: default_progress_callback,
                   &block)
-      progress ||= -> (state) {} # fallback to a no-op callback
+      progress ||= ->(state) {} # fallback to a no-op callback
       progress[:start]
 
       [].tap do |results|
@@ -138,9 +138,9 @@ def without_gc
     #   be used)
     def default_progress_callback
       case ENV['PROGRESS']
-      when nil, '0', 'false', 'none'
+      when '0', 'false', 'none'
         nil
-      when '1', 'true', 'minimal'
+      when nil, '1', 'true', 'minimal'
         method(:minimal_progress_callback).to_proc
       else
         raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}"
diff --git a/profile/benchmarking/rake/bson.rake b/profile/benchmarking/rake/bson.rake
index 2147eb2220..4d5bdb1c04 100644
--- a/profile/benchmarking/rake/bson.rake
+++ b/profile/benchmarking/rake/bson.rake
@@ -1,18 +1,29 @@
 # frozen_string_literal: true
 
+# rubocop:disable Layout/FirstHashElementIndentation
+
 desc 'Run the full BSON benchmarking suite'
 task :bson do
   puts 'BSON BENCHMARK SUITE'
   Mongo::Benchmarking.report({
-    bson: Mongo::Benchmarking::BSON.run_all({
+    bson: Mongo::Benchmarking::BSON.run_all(
       flat: %i[ encode decode ],
       deep: %i[ encode decode ],
-      full: %i[ encode decode ],
-    })
+      full: %i[ encode decode ]
+    )
   })
 end
 
-namespace :bson do
+namespace :bson do # rubocop:disable Metrics/BlockLength
+  # a convenience task for running all of the bson benchmark tasks; this is
+  # only useful for testing that they all work.
+  task test: %w[
+    bson
+    bson:flat bson:flat:encode bson:flat:decode
+    bson:deep bson:deep:encode bson:deep:decode
+    bson:full bson:full:encode bson:full:decode
+  ]
+
   desc 'Learn how to run the BSON benchmarks'
   task :help do
     puts <<~HELP
@@ -45,7 +56,7 @@ namespace :bson do
   task :flat do
     puts 'BSON BENCHMARK :: FLAT'
     Mongo::Benchmarking.report({
-      bson: Mongo::Benchmarking::BSON.run_all({ flat: %i[ encode decode ] })
+      bson: Mongo::Benchmarking::BSON.run_all(flat: %i[ encode decode ])
     })
   end
 
@@ -67,7 +78,7 @@ namespace :bson do
   task :deep do
     puts 'BSON BENCHMARK :: DEEP'
     Mongo::Benchmarking.report({
-      bson: Mongo::Benchmarking::BSON.run_all({ deep: %i[ encode decode ] })
+      bson: Mongo::Benchmarking::BSON.run_all(deep: %i[ encode decode ])
     })
   end
 
@@ -107,3 +118,5 @@ namespace :bson do
     end
   end
 end
+
+# rubocop:enable Layout/FirstHashElementIndentation
diff --git a/profile/benchmarking/rake/multi_doc.rake b/profile/benchmarking/rake/multi_doc.rake
index 49d9aab95f..86c190ef1f 100644
--- a/profile/benchmarking/rake/multi_doc.rake
+++ b/profile/benchmarking/rake/multi_doc.rake
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# rubocop:todo all
 
 namespace :multi_doc do
   # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
diff --git a/profile/benchmarking/rake/parallel.rake b/profile/benchmarking/rake/parallel.rake
index c8fa0fd5a2..98752e231e 100644
--- a/profile/benchmarking/rake/parallel.rake
+++ b/profile/benchmarking/rake/parallel.rake
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# rubocop:todo all
 
 namespace :parallel do
   # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
diff --git a/profile/benchmarking/rake/single_doc.rake b/profile/benchmarking/rake/single_doc.rake
index 9280779f0d..803e28d593 100644
--- a/profile/benchmarking/rake/single_doc.rake
+++ b/profile/benchmarking/rake/single_doc.rake
@@ -1,4 +1,5 @@
 # frozen_string_literal: true
+# rubocop:todo all
 
 namespace :single_doc do
   task :command do