From e56f3c3c0d8966bc5769f048d1097427fc0064b1 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 12:28:26 -0600 Subject: [PATCH 1/8] RUBY-3314 Implement variable iterations for benchmarks --- .gitignore | 1 + profile/benchmarking.rb | 97 +++++++++------------------------- profile/benchmarking/helper.rb | 51 +++++++++++++++--- profile/benchmarking/micro.rb | 35 ++++-------- 4 files changed, 79 insertions(+), 105 deletions(-) diff --git a/.gitignore b/.gitignore index a60fb2cf76..7af488f10d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock .env.private* .env build +profile/benchmarking/data diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index a054b3906b..43797982b2 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -23,107 +22,59 @@ require_relative 'benchmarking/parallel' module Mongo - # Module with all functionality for running driver benchmark tests. # # @since 2.2.3 module Benchmarking - extend self - # The current path. - # - # @return [ String ] The current path. - # - # @since 2.2.3 - CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze - - # The path to data files used in Benchmarking tests. - # # @return [ String ] Path to Benchmarking test files. - # - # @since 2.2.3 - DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze + DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze - # The file containing the single tweet document. - # - # @return [ String ] The file containing the tweet document. - # - # @since 2.2.3 - TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze + # @return [ String ] The file containing the single tweet document. + TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze - # The file containing the single small document. - # - # @return [ String ] The file containing the small document. - # - # @since 2.2.3 - SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single small document. + SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze - # The file containing the single large document. - # - # @return [ String ] The file containing the large document. - # - # @since 2.2.3 - LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single large document. + LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze - # The file to upload when testing GridFS. - # - # @return [ String ] The file containing the GridFS test data. - # - # @since 2.2.3 - GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze + # @return [ String ] The file to upload when testing GridFS. + GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze - # The file path and base name for the LDJSON files. - # # @return [ String ] The file path and base name for the LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze + LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze - # The file path and base name for the outputted LDJSON files. - # - # @return [ String ] The file path and base name for the outputted LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze + # @return [ String ] The file path and base name for the emitted LDJSON files. + LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze - # The file path and base name for the GRIDFS files to upload. - # # @return [ String ] The file path and base name for the GRIDFS files to upload. - # - # @since 2.2.3 - GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze + GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze - # The file path and base name for the outputted GRIDFS downloaded files. - # - # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files. - # - # @since 2.2.3 - GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze + # @return [ String ] The file path and base name for the emitted GRIDFS downloaded files. + GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze - # The default number of test repetitions. - # # @return [ Integer ] The number of test repetitions. - # - # @since 2.2.3 - TEST_REPETITIONS = 100.freeze + TEST_REPETITIONS = 100 - # The number of default warmup repetitions of the test to do before - # recording times. - # - # @return [ Integer ] The default number of warmup repetitions. + # Convenience helper for loading the single tweet document. # - # @since 2.2.3 - WARMUP_REPETITIONS = 10.freeze - + # @return [ Hash ] a single parsed JSON document def tweet_document Benchmarking.load_file(TWEET_DOCUMENT_FILE).first end + # Convenience helper for loading the single small document. + # + # @return [ Hash ] a single parsed JSON document def small_document Benchmarking.load_file(SMALL_DOCUMENT_FILE).first end + # Convenience helper for loading the single large document. + # + # @return [ Hash ] a single parsed JSON document def large_document Benchmarking.load_file(LARGE_DOCUMENT_FILE).first end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 11d9f670ab..654eddbfd9 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true -# rubocop:todo all module Mongo - # Helper functions used by benchmarking tasks module Benchmarking - extend self # Load a json file and represent each document as a Hash. @@ -19,7 +16,7 @@ module Benchmarking # # @since 2.2.3 def load_file(file_name) - File.open(file_name, "r") do |f| + File.open(file_name, 'r') do |f| f.each_line.collect do |line| parse_json(line) end @@ -39,8 +36,47 @@ def load_file(file_name) # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| - if doc['_id'] && doc['_id']['$oid'] - doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) + doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] + end + end + + # The spec requires that most benchmarks use a variable number of + # iterations, defined as follows: + # + # * iterations should loop for at least 1 minute cumulative execution + # time + # * iterations should stop after 100 iterations or 5 minutes cumulative + # execution time, whichever is shorter + # + # This method will yield once for each iteration. + # + # @param [ Integer ] max_iterations the maximum number of iterations to + # attempt (default: 100) + # @param [ Integer ] min_time the minimum number of seconds to spend + # iterating + # @param [ Integer ] max_time the maximum number of seconds to spend + # iterating. + # + # @return [ Array ] the timings for each iteration + def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block) + [].tap do |results| + iteration_count = 0 + cumulative_time = 0 + + loop do + timing = Benchmark.realtime(&block) + + iteration_count += 1 + cumulative_time += timing + results.push timing + + # always stop after the maximum time has elapsed, regardless of + # iteration count. + break if cumulative_time > max_time + + # otherwise, break if the minimum time has elapsed, and the maximum + # number of iterations have been reached. + break if cumulative_time >= min_time && iteration_count >= max_iterations end end end @@ -56,7 +92,8 @@ def parse_json(document) # # @since 2.2.3 def median(values) - values.sort![values.size/2-1] + i = (values.size / 2) - 1 + values.sort[i] end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 2f560bb310..99edf2ae10 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -17,13 +16,11 @@ module Mongo module Benchmarking - # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. # # @since 2.2.3 module Micro - extend self # Run a micro benchmark test. @@ -38,10 +35,11 @@ module Micro # # @since 2.2.3 def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) - file_name = type.to_s << "_bson.json" + file_name = type.to_s << '_bson.json' GC.disable - file_path = [Benchmarking::DATA_PATH, file_name].join('/') + file_path = [ Benchmarking::DATA_PATH, file_name ].join('/') puts "#{action} : #{send(action, file_path, repetitions)}" + GC.enable end # Run an encoding micro benchmark test. @@ -59,16 +57,8 @@ def encode(file_name, repetitions) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) - # WARMUP_REPETITIONS.times do - # doc.to_bson - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - document.to_bson - end - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times { document.to_bson } end Benchmarking.median(results) end @@ -88,18 +78,13 @@ def decode(file_name, repetitions) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - # WARMUP_REPETITIONS.times do - # BSON::Document.from_bson(buffers.shift) - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - BSON::Document.from_bson(buffer) - buffer.rewind! - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times do + BSON::Document.from_bson(buffer) + buffer.rewind! end end + Benchmarking.median(results) end end From 897fc1c58c1188da0d19746767b28f7381a7406b Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 13:52:28 -0600 Subject: [PATCH 2/8] report percentiles along with the median --- Rakefile | 10 +++--- profile/benchmarking/helper.rb | 60 ++++++++++++++++++++++++++++++++-- profile/benchmarking/micro.rb | 58 ++++++++++++++++++-------------- 3 files changed, 94 insertions(+), 34 deletions(-) diff --git a/Rakefile b/Rakefile index f64a790c12..e954ca3f9c 100644 --- a/Rakefile +++ b/Rakefile @@ -137,22 +137,20 @@ require_relative "profile/benchmarking" namespace :benchmark do desc "Run the driver benchmark tests." - namespace :micro do - desc "Run the common driver micro benchmarking tests" + namespace :bson do + desc "Run the bson benchmarking tests" namespace :flat do desc "Benchmarking for flat bson documents." # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :encode do - puts "MICRO BENCHMARK:: FLAT:: ENCODE" - Mongo::Benchmarking::Micro.run(:flat, :encode) + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :decode do - puts "MICRO BENCHMARK:: FLAT:: DECODE" - Mongo::Benchmarking::Micro.run(:flat, :decode) + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 654eddbfd9..73b4d8bb84 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -81,19 +81,73 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ end end + # Formats and displays a report of the given results. + # + # @param [ Hash ] results the results of a benchmarking run. + # @param [ Integer ] indent how much the report should be indented. + # @param [ Array ] percentiles the percentile values to report + def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) + indentation = ' ' * indent + results.each do |key, value| + puts "#{indentation}#{key}:" + if value.is_a?(Hash) + report(value, indent: indent + 2, percentiles: percentiles) + else + ps = Percentiles.new(value) + puts "#{indentation} median: %g" % [ ps[50] ] + puts "#{indentation} percentiles:" + percentiles.each do |pct| + puts "#{indentation} %g: %g" % [ pct, ps[pct] ] + end + end + end + end + + # A utility class for returning the list item at a given percentile + # value. + class Percentiles + # @return [ Array ] the sorted list of numbers to consider + attr_reader :list + + # Create a new Percentiles object that encapsulates the given list of + # numbers. + # + # @param [ Array ] list the list of numbers to considier + def initialize(list) + @list = list.sort + end + + # Finds and returns the element in the list that represents the given + # percentile value. + # + # @param [ Number ] percentile a number in the range [1,100] + # + # @return [ Number ] the element of the list for the given percentile. + def [](percentile) + i = (list.size * percentile / 100.0).ceil - 1 + list[i] + end + end + # Get the median of values in a list. # # @example Get the median. # Benchmarking.median(values) # - # @param [ Array ] The values to get the median of. + # @param [ Array ] values The values to get the median of. # # @return [ Numeric ] The median of the list. - # - # @since 2.2.3 def median(values) i = (values.size / 2) - 1 values.sort[i] end + + # Runs a given block with GC disabled. + def without_gc + GC.disable + yield + ensure + GC.enable + end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 99edf2ae10..80899983c9 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -18,28 +18,43 @@ module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. - # - # @since 2.2.3 module Micro extend self + # Runs all of the benchmarks specified by the given mapping. + # + # @example Run a collection of benchmarks. + # Benchmarking::Micro.run_all( + # flat: %i[ encode decode ], + # deep: %i[ encode decode ], + # full: %i[ encode decode ] + # ) + # + # @return [ Hash ] a hash of the results for each benchmark + def run_all(map) + {}.tap do |results| + map.each do |type, actions| + results[type] = {} + + actions.each do |action| + results[type][action] = run(type, action) + end + end + end + end + # Run a micro benchmark test. # # @example Run a test. # Benchmarking::Micro.run(:flat) # # @param [ Symbol ] type The type of test to run. - # @param [ Integer ] repetitions The number of test repetitions. - # - # @return [ Numeric ] The test results. + # @param [ :encode | :decode ] action The action to perform. # - # @since 2.2.3 - def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) - file_name = type.to_s << '_bson.json' - GC.disable - file_path = [ Benchmarking::DATA_PATH, file_name ].join('/') - puts "#{action} : #{send(action, file_path, repetitions)}" - GC.enable + # @return [ Array ] The test results for each iteration + def run(type, action) + file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") + Benchmarking.without_gc { send(action, file_path) } end # Run an encoding micro benchmark test. @@ -50,17 +65,14 @@ def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Numeric ] The median of the results. - # - # @since 2.2.3 - def encode(file_name, repetitions) + # @return [ Array ] The list of the results for each iteration + def encode(file_name) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) - results = Benchmarking.benchmark(max_iterations: repetitions) do + Benchmarking.benchmark do 10_000.times { document.to_bson } end - Benchmarking.median(results) end # Run a decoding micro benchmark test. @@ -71,21 +83,17 @@ def encode(file_name, repetitions) # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. # - # @return [ Numeric ] The median of the results. - # - # @since 2.2.3 - def decode(file_name, repetitions) + # @return [ Array ] The list of the results for each iteration + def decode(file_name) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - results = Benchmarking.benchmark(max_iterations: repetitions) do + Benchmarking.benchmark do 10_000.times do BSON::Document.from_bson(buffer) buffer.rewind! end end - - Benchmarking.median(results) end end end From 072b56a3cbc122eaa223e50ca226b9198ca1639c Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 15:10:30 -0600 Subject: [PATCH 3/8] rename Benchmarking::Micro to Benchmarking::BSON --- Rakefile | 36 ++++++++++++++-------- profile/benchmarking.rb | 2 +- profile/benchmarking/{micro.rb => bson.rb} | 22 ++++++------- profile/benchmarking/helper.rb | 2 +- 4 files changed, 36 insertions(+), 26 deletions(-) rename profile/benchmarking/{micro.rb => bson.rb} (85%) diff --git a/Rakefile b/Rakefile index e954ca3f9c..49733ed4af 100644 --- a/Rakefile +++ b/Rakefile @@ -135,22 +135,32 @@ require_relative "profile/benchmarking" # Some require data files, available from the drivers team. See the comments above each task for details." namespace :benchmark do - desc "Run the driver benchmark tests." + desc "Run the bson benchmarking tests" + task :bson do + puts "BSON BENCHMARK" + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ + flat: %i[ encode decode ], + deep: %i[ encode decode ], + full: %i[ encode decode ], + }) + }) + end namespace :bson do - desc "Run the bson benchmarking tests" - namespace :flat do desc "Benchmarking for flat bson documents." # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :encode do - Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) + puts "BSON BENCHMARK :: FLAT :: ENCODE" + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. task :decode do - Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::Micro.run(:flat, :encode) } } }) + puts "BSON BENCHMARK :: FLAT :: DECODE" + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) end end @@ -159,14 +169,14 @@ namespace :benchmark do # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :encode do - puts "MICRO BENCHMARK:: DEEP:: ENCODE" - Mongo::Benchmarking::Micro.run(:deep, :encode) + puts "BSON BENCHMARK :: DEEP :: ENCODE" + Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. task :decode do - puts "MICRO BENCHMARK:: DEEP:: DECODE" - Mongo::Benchmarking::Micro.run(:deep, :decode) + puts "BSON BENCHMARK :: DEEP :: DECODE" + Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) end end @@ -175,14 +185,14 @@ namespace :benchmark do # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :encode do - puts "MICRO BENCHMARK:: FULL:: ENCODE" - Mongo::Benchmarking::Micro.run(:full, :encode) + puts "BSON BENCHMARK :: FULL :: ENCODE" + Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) end # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. task :decode do - puts "MICRO BENCHMARK:: FULL:: DECODE" - Mongo::Benchmarking::Micro.run(:full, :decode) + puts "BSON BENCHMARK :: FULL :: DECODE" + Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) end end end diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index 43797982b2..08f6b20423 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -16,7 +16,7 @@ require 'benchmark' require_relative 'benchmarking/helper' -require_relative 'benchmarking/micro' +require_relative 'benchmarking/bson' require_relative 'benchmarking/single_doc' require_relative 'benchmarking/multi_doc' require_relative 'benchmarking/parallel' diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/bson.rb similarity index 85% rename from profile/benchmarking/micro.rb rename to profile/benchmarking/bson.rb index 80899983c9..0a3667b261 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/bson.rb @@ -18,13 +18,13 @@ module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. - module Micro + module BSON extend self # Runs all of the benchmarks specified by the given mapping. # # @example Run a collection of benchmarks. - # Benchmarking::Micro.run_all( + # Benchmarking::BSON.run_all( # flat: %i[ encode decode ], # deep: %i[ encode decode ], # full: %i[ encode decode ] @@ -43,10 +43,10 @@ def run_all(map) end end - # Run a micro benchmark test. + # Run a BSON benchmark test. # # @example Run a test. - # Benchmarking::Micro.run(:flat) + # Benchmarking::BSON.run(:flat) # # @param [ Symbol ] type The type of test to run. # @param [ :encode | :decode ] action The action to perform. @@ -57,10 +57,10 @@ def run(type, action) Benchmarking.without_gc { send(action, file_path) } end - # Run an encoding micro benchmark test. + # Run an encoding BSON benchmark test. # # @example Run an encoding test. - # Benchmarking::Micro.encode(file_name) + # Benchmarking::BSON.encode(file_name) # # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. @@ -68,17 +68,17 @@ def run(type, action) # @return [ Array ] The list of the results for each iteration def encode(file_name) data = Benchmarking.load_file(file_name) - document = BSON::Document.new(data.first) + document = ::BSON::Document.new(data.first) Benchmarking.benchmark do 10_000.times { document.to_bson } end end - # Run a decoding micro benchmark test. + # Run a decoding BSON benchmark test. # # @example Run an decoding test. - # Benchmarking::Micro.decode(file_name) + # Benchmarking::BSON.decode(file_name) # # @param [ String ] file_name The name of the file with data for the test. # @param [ Integer ] repetitions The number of test repetitions. @@ -86,11 +86,11 @@ def encode(file_name) # @return [ Array ] The list of the results for each iteration def decode(file_name) data = Benchmarking.load_file(file_name) - buffer = BSON::Document.new(data.first).to_bson + buffer = ::BSON::Document.new(data.first).to_bson Benchmarking.benchmark do 10_000.times do - BSON::Document.from_bson(buffer) + ::BSON::Document.from_bson(buffer) buffer.rewind! end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 73b4d8bb84..e66b292c5b 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -36,7 +36,7 @@ def load_file(file_name) # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| - doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] + doc['_id'] = ::BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] end end From 126e87d13dc2434d1b9c38535d1c13ba221da5bc Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 16:47:19 -0600 Subject: [PATCH 4/8] refactoring to appease rubocop --- .rubocop.yml | 3 +++ profile/benchmarking/helper.rb | 28 +++++++++++++++++++--------- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 2b7a7f90f7..6cb04cd597 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -83,6 +83,9 @@ Style/Documentation: Exclude: - 'spec/**/*' +Style/FormatStringToken: + Enabled: false + Style/ModuleFunction: EnforcedStyle: extend_self diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index e66b292c5b..309932964c 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -85,20 +85,14 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ # # @param [ Hash ] results the results of a benchmarking run. # @param [ Integer ] indent how much the report should be indented. - # @param [ Array ] percentiles the percentile values to report + # @param [ Array ] percentiles the percentile values to report def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) - indentation = ' ' * indent results.each do |key, value| - puts "#{indentation}#{key}:" + puts format('%*s%s:', indent, '', key) if value.is_a?(Hash) report(value, indent: indent + 2, percentiles: percentiles) else - ps = Percentiles.new(value) - puts "#{indentation} median: %g" % [ ps[50] ] - puts "#{indentation} percentiles:" - percentiles.each do |pct| - puts "#{indentation} %g: %g" % [ pct, ps[pct] ] - end + report_result(value, indent, percentiles) end end end @@ -149,5 +143,21 @@ def without_gc ensure GC.enable end + + private + + # Formats and displays the results of a single benchmark run. + # + # @param [ Array ] results the results to report + # @param [ Integer ] indent how much the report should be indented + # @param [ Array ] percentiles the percentiles to report + def report_result(results, indent, percentiles) + ps = Percentiles.new(results) + puts format('%*smedian: %g', indent + 2, '', ps[50]) + puts format('%*spercentiles:', indent + 2, '') + percentiles.each do |pct| + puts format('%*s%g: %g', indent + 4, '', pct, ps[pct]) + end + end end end From e86addc24eee503e43cc5b96da4660d8526de290 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 15 Aug 2023 09:41:11 -0600 Subject: [PATCH 5/8] RUBY-3315 benchmark scoring --- profile/benchmarking/bson.rb | 57 +++++++++++++++++++++++++++-- profile/benchmarking/helper.rb | 49 ++----------------------- profile/benchmarking/percentiles.rb | 31 ++++++++++++++++ profile/benchmarking/summary.rb | 56 ++++++++++++++++++++++++++++ 4 files changed, 145 insertions(+), 48 deletions(-) create mode 100644 profile/benchmarking/percentiles.rb create mode 100644 profile/benchmarking/summary.rb diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb index 0a3667b261..88cb979ab2 100644 --- a/profile/benchmarking/bson.rb +++ b/profile/benchmarking/bson.rb @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +require_relative 'percentiles' +require_relative 'summary' + module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and @@ -43,6 +46,20 @@ def run_all(map) end end + # As defined by the spec, the score for a given benchmark is the + # size of the task (in MB) divided by the median wall clock time. + # + # @param [ Symbol ] type the type of the task + # @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles + # object to query for the median time. + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration, used to scale the task size. + # + # @return [ Numeric ] the score for the given task. + def score_for(type, percentiles, scale: 10_000) + task_size(type, scale) / percentiles[50] + end + # Run a BSON benchmark test. # # @example Run a test. @@ -51,10 +68,14 @@ def run_all(map) # @param [ Symbol ] type The type of test to run. # @param [ :encode | :decode ] action The action to perform. # - # @return [ Array ] The test results for each iteration + # @return [ Hash<:timings,:percentiles,:score> ] The test results for + # the requested benchmark. def run(type, action) - file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") - Benchmarking.without_gc { send(action, file_path) } + timings = Benchmarking.without_gc { send(action, file_for(type)) } + percentiles = Percentiles.new(timings) + score = score_for(type, percentiles) + + Summary.new(timings, percentiles, score) end # Run an encoding BSON benchmark test. @@ -95,6 +116,36 @@ def decode(file_name) end end end + + private + + # The path to the source file for the given task type. + # + # @param [ Symbol ] type the task type + # + # @return [ String ] the path to the source file. + def file_for(type) + File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") + end + + # As defined by the spec, the size of a BSON task is the size of the + # file, multipled by the scale (the number of times the file is processed + # per iteration), divided by a million. + # + # "the dataset size for a task is the size of the single-document source + # file...times 10,000 operations" + # + # "Each task will have defined for it an associated size in + # megabytes (MB)" + # + # @param [ Symbol ] type the type of the task + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration (e.g. 10,000) + # + # @return [ Numeric ] the score for the task, reported in MB + def task_size(type, scale) + File.size(file_for(type)) * scale / 1_000_000.0 + end end end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 309932964c..b7a09d6e91 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -89,40 +89,15 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) results.each do |key, value| puts format('%*s%s:', indent, '', key) - if value.is_a?(Hash) - report(value, indent: indent + 2, percentiles: percentiles) + + if value.respond_to?(:summary) + puts value.summary(indent + 2, percentiles) else - report_result(value, indent, percentiles) + report(value, indent: indent + 2, percentiles: percentiles) end end end - # A utility class for returning the list item at a given percentile - # value. - class Percentiles - # @return [ Array ] the sorted list of numbers to consider - attr_reader :list - - # Create a new Percentiles object that encapsulates the given list of - # numbers. - # - # @param [ Array ] list the list of numbers to considier - def initialize(list) - @list = list.sort - end - - # Finds and returns the element in the list that represents the given - # percentile value. - # - # @param [ Number ] percentile a number in the range [1,100] - # - # @return [ Number ] the element of the list for the given percentile. - def [](percentile) - i = (list.size * percentile / 100.0).ceil - 1 - list[i] - end - end - # Get the median of values in a list. # # @example Get the median. @@ -143,21 +118,5 @@ def without_gc ensure GC.enable end - - private - - # Formats and displays the results of a single benchmark run. - # - # @param [ Array ] results the results to report - # @param [ Integer ] indent how much the report should be indented - # @param [ Array ] percentiles the percentiles to report - def report_result(results, indent, percentiles) - ps = Percentiles.new(results) - puts format('%*smedian: %g', indent + 2, '', ps[50]) - puts format('%*spercentiles:', indent + 2, '') - percentiles.each do |pct| - puts format('%*s%g: %g', indent + 4, '', pct, ps[pct]) - end - end end end diff --git a/profile/benchmarking/percentiles.rb b/profile/benchmarking/percentiles.rb new file mode 100644 index 0000000000..aeebe9d1d9 --- /dev/null +++ b/profile/benchmarking/percentiles.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for returning the list item at a given percentile + # value. + class Percentiles + # @return [ Array ] the sorted list of numbers to consider + attr_reader :list + + # Create a new Percentiles object that encapsulates the given list of + # numbers. + # + # @param [ Array ] list the list of numbers to considier + def initialize(list) + @list = list.sort + end + + # Finds and returns the element in the list that represents the given + # percentile value. + # + # @param [ Number ] percentile a number in the range [1,100] + # + # @return [ Number ] the element of the list for the given percentile. + def [](percentile) + i = (list.size * percentile / 100.0).ceil - 1 + list[i] + end + end + end +end diff --git a/profile/benchmarking/summary.rb b/profile/benchmarking/summary.rb new file mode 100644 index 0000000000..93fddf5435 --- /dev/null +++ b/profile/benchmarking/summary.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for encapsulating the summary information for a + # benchmark, including behaviors for reporting on the summary. + class Summary + # @return [ Array ] the timings of each iteration in the + # benchmark + attr_reader :timings + + # @return [ Percentiles ] the percentiles object for querying the + # timing at a given percentile value. + attr_reader :percentiles + + # @return [ Numeric ] the composite score for the benchmark + attr_reader :score + + # Construct a new Summary object with the given timings, percentiles, + # and score. + # + # @param [ Array ] timings the timings of each iteration in the + # benchmark + # @param [ Percentiles ] percentiles the percentiles object for querying + # the timing at a given percentile value + # @param [ Numeric ] score the composite score for the benchmark + def initialize(timings, percentiles, score) + @timings = timings + @percentiles = percentiles + @score = score + end + + # @return [ Numeric ] the median timing for the benchmark. + def median + percentiles[50] + end + + # Formats and displays the results of a single benchmark run. + # + # @param [ Integer ] indent how much the report should be indented + # @param [ Array ] points the percentile points to report + # + # @return [ String ] a YAML-formatted summary + def summary(indent, points) + [].tap do |lines| + lines << format('%*sscore: %g', indent, '', score) + lines << format('%*smedian: %g', indent, '', median) + lines << format('%*spercentiles:', indent, '') + points.each do |pct| + lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct]) + end + end.join("\n") + end + end + end +end From 000765752eba5f01a36cfcbe0625838c556a3737 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 15 Aug 2023 10:38:29 -0600 Subject: [PATCH 6/8] clean up the Rakefile --- Rakefile | 163 +--------------------- profile/benchmarking/rake/bson.rake | 109 +++++++++++++++ profile/benchmarking/rake/multi_doc.rake | 33 +++++ profile/benchmarking/rake/parallel.rake | 35 +++++ profile/benchmarking/rake/single_doc.rake | 26 ++++ profile/benchmarking/rake/tasks.rake | 11 ++ 6 files changed, 215 insertions(+), 162 deletions(-) create mode 100644 profile/benchmarking/rake/bson.rake create mode 100644 profile/benchmarking/rake/multi_doc.rake create mode 100644 profile/benchmarking/rake/parallel.rake create mode 100644 profile/benchmarking/rake/single_doc.rake create mode 100644 profile/benchmarking/rake/tasks.rake diff --git a/Rakefile b/Rakefile index 49733ed4af..4a4458070d 100644 --- a/Rakefile +++ b/Rakefile @@ -131,165 +131,4 @@ namespace :docs do end end -require_relative "profile/benchmarking" - -# Some require data files, available from the drivers team. See the comments above each task for details." -namespace :benchmark do - desc "Run the bson benchmarking tests" - task :bson do - puts "BSON BENCHMARK" - Mongo::Benchmarking.report({ - bson: Mongo::Benchmarking::BSON.run_all({ - flat: %i[ encode decode ], - deep: %i[ encode decode ], - full: %i[ encode decode ], - }) - }) - end - - namespace :bson do - namespace :flat do - desc "Benchmarking for flat bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. - task :encode do - puts "BSON BENCHMARK :: FLAT :: ENCODE" - Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json. - task :decode do - puts "BSON BENCHMARK :: FLAT :: DECODE" - Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) - end - end - - namespace :deep do - desc "Benchmarking for deep bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. - task :encode do - puts "BSON BENCHMARK :: DEEP :: ENCODE" - Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json. - task :decode do - puts "BSON BENCHMARK :: DEEP :: DECODE" - Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) - end - end - - namespace :full do - desc "Benchmarking for full bson documents." - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. - task :encode do - puts "BSON BENCHMARK :: FULL :: ENCODE" - Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json. - task :decode do - puts "BSON BENCHMARK :: FULL :: DECODE" - Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) - end - end - end - - namespace :single_doc do - desc "Run the common driver single-document benchmarking tests" - task :command do - puts "SINGLE DOC BENCHMARK:: COMMAND" - Mongo::Benchmarking::SingleDoc.run(:command) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. - task :find_one do - puts "SINGLE DOC BENCHMARK:: FIND ONE BY ID" - Mongo::Benchmarking::SingleDoc.run(:find_one) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. - task :insert_one_small do - puts "SINGLE DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT" - Mongo::Benchmarking::SingleDoc.run(:insert_one_small) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. - task :insert_one_large do - puts "SINGLE DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT" - Mongo::Benchmarking::SingleDoc.run(:insert_one_large) - end - end - - namespace :multi_doc do - desc "Run the common driver multi-document benchmarking tests" - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. - task :find_many do - puts "MULTI DOCUMENT BENCHMARK:: FIND MANY" - Mongo::Benchmarking::MultiDoc.run(:find_many) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. - task :bulk_insert_small do - puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL" - Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. - task :bulk_insert_large do - puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE" - Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. - task :gridfs_upload do - puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD" - Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) - end - - # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. - task :gridfs_download do - puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD" - Mongo::Benchmarking::MultiDoc.run(:gridfs_download) - end - end - - namespace :parallel do - desc "Run the common driver paralell ETL benchmarking tests" - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, - # with the files used in this task. - task :import do - puts "PARALLEL ETL BENCHMARK:: IMPORT" - Mongo::Benchmarking::Parallel.run(:import) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, - # with the files used in this task. - # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI" - # called 'output'. - task :export do - puts "PARALLEL ETL BENCHMARK:: EXPORT" - Mongo::Benchmarking::Parallel.run(:export) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, - # with the files used in this task. - task :gridfs_upload do - puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD" - Mongo::Benchmarking::Parallel.run(:gridfs_upload) - end - - # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, - # with the files used in this task. - # Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI" - # called 'output'. - task :gridfs_download do - puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD" - Mongo::Benchmarking::Parallel.run(:gridfs_download) - end - end -end +load 'profile/benchmarking/rake/tasks.rake' diff --git a/profile/benchmarking/rake/bson.rake b/profile/benchmarking/rake/bson.rake new file mode 100644 index 0000000000..2147eb2220 --- /dev/null +++ b/profile/benchmarking/rake/bson.rake @@ -0,0 +1,109 @@ +# frozen_string_literal: true + +desc 'Run the full BSON benchmarking suite' +task :bson do + puts 'BSON BENCHMARK SUITE' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ + flat: %i[ encode decode ], + deep: %i[ encode decode ], + full: %i[ encode decode ], + }) + }) +end + +namespace :bson do + desc 'Learn how to run the BSON benchmarks' + task :help do + puts <<~HELP + The BSON micro benchmarks require a set of data files that are stored in + the specifications repository, here: + + https://github.com/mongodb/specifications/tree/master/source/benchmarking/data + + Download the `extended_bson.tgz` file and extract its contents. It should + contain a single folder (`extended_bson`) with several files in it. Move + those files to: + + #{Mongo::Benchmarking::DATA_PATH} + + Once there, you may run any of the BSON benchmarking tasks: + + $ rake benchmark:bson:flat:encode + + Tasks may be run in aggregate, as well, by specifying the namespace + directly: + + $ rake benchmark:bson:flat # runs all flat BSON benchmarks + $ rake benchmark:bson:deep # runs all deep BSON benchmarks + $ rake benchmark:bson:full # runs all full BSON benchmarks + # rake benchmark:bson # runs all BSON benchmarks + HELP + end + + desc 'Run the `flat` BSON benchmarking suite' + task :flat do + puts 'BSON BENCHMARK :: FLAT' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ flat: %i[ encode decode ] }) + }) + end + + namespace :flat do + desc 'Run the `flat` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: FLAT :: ENCODE' + Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } }) + end + + desc 'Run the `flat` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: FLAT :: DECODE' + Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } }) + end + end + + desc 'Run the `deep` BSON benchmarking suite' + task :deep do + puts 'BSON BENCHMARK :: DEEP' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ deep: %i[ encode decode ] }) + }) + end + + namespace :deep do + desc 'Run the `deep` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: DEEP :: ENCODE' + Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } }) + end + + desc 'Run the `deep` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: DEEP :: DECODE' + Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } }) + end + end + + desc 'Run the `full` BSON benchmarking suite' + task :full do + puts 'BSON BENCHMARK :: FULL' + Mongo::Benchmarking.report({ + bson: Mongo::Benchmarking::BSON.run_all({ full: %i[ encode decode ] }) + }) + end + + namespace :full do + desc 'Run the `full` encoding BSON benchmark' + task :encode do + puts 'BSON BENCHMARK :: FULL :: ENCODE' + Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } }) + end + + desc 'Run the `full` decoding BSON benchmark' + task :decode do + puts 'BSON BENCHMARK :: FULL :: DECODE' + Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } }) + end + end +end diff --git a/profile/benchmarking/rake/multi_doc.rake b/profile/benchmarking/rake/multi_doc.rake new file mode 100644 index 0000000000..49d9aab95f --- /dev/null +++ b/profile/benchmarking/rake/multi_doc.rake @@ -0,0 +1,33 @@ +# frozen_string_literal: true + +namespace :multi_doc do + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. + task :find_many do + puts 'MULTI DOCUMENT BENCHMARK :: FIND MANY' + Mongo::Benchmarking::MultiDoc.run(:find_many) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. + task :bulk_insert_small do + puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT SMALL' + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. + task :bulk_insert_large do + puts 'MULTI DOCUMENT BENCHMARK :: BULK INSERT LARGE' + Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. + task :gridfs_upload do + puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS UPLOAD' + Mongo::Benchmarking::MultiDoc.run(:gridfs_upload) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE. + task :gridfs_download do + puts 'MULTI DOCUMENT BENCHMARK :: GRIDFS DOWNLOAD' + Mongo::Benchmarking::MultiDoc.run(:gridfs_download) + end +end diff --git a/profile/benchmarking/rake/parallel.rake b/profile/benchmarking/rake/parallel.rake new file mode 100644 index 0000000000..c8fa0fd5a2 --- /dev/null +++ b/profile/benchmarking/rake/parallel.rake @@ -0,0 +1,35 @@ +# frozen_string_literal: true + +namespace :parallel do + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. + task :import do + puts 'PARALLEL ETL BENCHMARK :: IMPORT' + Mongo::Benchmarking::Parallel.run(:import) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, + # with the files used in this task. + # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI' + # called 'output'. + task :export do + puts 'PARALLEL ETL BENCHMARK :: EXPORT' + Mongo::Benchmarking::Parallel.run(:export) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. + task :gridfs_upload do + puts 'PARALLEL ETL BENCHMARK :: GRIDFS UPLOAD' + Mongo::Benchmarking::Parallel.run(:gridfs_upload) + end + + # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI, + # with the files used in this task. + # Requirement: Another directory in '#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI' + # called 'output'. + task :gridfs_download do + puts 'PARALLEL ETL BENCHMARK :: GRIDFS DOWNLOAD' + Mongo::Benchmarking::Parallel.run(:gridfs_download) + end +end diff --git a/profile/benchmarking/rake/single_doc.rake b/profile/benchmarking/rake/single_doc.rake new file mode 100644 index 0000000000..9280779f0d --- /dev/null +++ b/profile/benchmarking/rake/single_doc.rake @@ -0,0 +1,26 @@ +# frozen_string_literal: true + +namespace :single_doc do + task :command do + puts 'SINGLE DOC BENCHMARK :: COMMAND' + Mongo::Benchmarking::SingleDoc.run(:command) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. + task :find_one do + puts 'SINGLE DOC BENCHMARK :: FIND ONE BY ID' + Mongo::Benchmarking::SingleDoc.run(:find_one) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json. + task :insert_one_small do + puts 'SINGLE DOC BENCHMARK :: INSERT ONE SMALL DOCUMENT' + Mongo::Benchmarking::SingleDoc.run(:insert_one_small) + end + + # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json. + task :insert_one_large do + puts 'SINGLE DOC BENCHMARK :: INSERT ONE LARGE DOCUMENT' + Mongo::Benchmarking::SingleDoc.run(:insert_one_large) + end +end diff --git a/profile/benchmarking/rake/tasks.rake b/profile/benchmarking/rake/tasks.rake new file mode 100644 index 0000000000..7feae8d2ab --- /dev/null +++ b/profile/benchmarking/rake/tasks.rake @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require_relative '../../benchmarking' + +# Some require data files, available from the drivers team. +# See the comments above each task for details. +namespace :benchmark do + %w[ bson single_doc multi_doc parallel ].each do |group| + load File.join(__dir__, "#{group}.rake") + end +end From b9c312540329cb1df05fdd750a5e4235a2de3c65 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 15 Aug 2023 11:10:47 -0600 Subject: [PATCH 7/8] progress indicator while benchmarks are running --- profile/benchmarking/bson.rb | 2 +- profile/benchmarking/helper.rb | 46 ++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb index 88cb979ab2..415e58114e 100644 --- a/profile/benchmarking/bson.rb +++ b/profile/benchmarking/bson.rb @@ -71,7 +71,7 @@ def score_for(type, percentiles, scale: 10_000) # @return [ Hash<:timings,:percentiles,:score> ] The test results for # the requested benchmark. def run(type, action) - timings = Benchmarking.without_gc { send(action, file_for(type)) } + timings = send(action, file_for(type)) percentiles = Percentiles.new(timings) score = score_for(type, percentiles) diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index b7a09d6e91..730e74ae34 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -58,13 +58,21 @@ def parse_json(document) # iterating. # # @return [ Array ] the timings for each iteration - def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block) + def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, + min_time: 60, + max_time: 5 * 60, + progress: default_progress_callback, + &block) + progress ||= -> (state) {} # fallback to a no-op callback + progress[:start] + [].tap do |results| iteration_count = 0 cumulative_time = 0 loop do - timing = Benchmark.realtime(&block) + timing = without_gc { Benchmark.realtime(&block) } + progress[:step] iteration_count += 1 cumulative_time += timing @@ -78,6 +86,8 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ # number of iterations have been reached. break if cumulative_time >= min_time && iteration_count >= max_iterations end + + progress[:end] end end @@ -118,5 +128,37 @@ def without_gc ensure GC.enable end + + private + + # Returns the proc object (or nil) corresponding to the "PROGRESS" + # environment variable. + # + # @return [ Proc | nil ] the callback proc to use (or nil if none should + # be used) + def default_progress_callback + case ENV['PROGRESS'] + when nil, '0', 'false', 'none' + nil + when '1', 'true', 'minimal' + method(:minimal_progress_callback).to_proc + else + raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}" + end + end + + # A minimal progress callback implementation, printing '|' when a benchmark + # starts and '.' for each iteration. + # + # @param [ :start | :step | :end ] state the current progress state + def minimal_progress_callback(state) + case state + when :start then print '|' + when :step then print '.' + when :end then puts + end + + $stdout.flush + end end end From 3f07148c0baae773bdbf51a65c257ba6ee2182fc Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Tue, 15 Aug 2023 12:12:30 -0600 Subject: [PATCH 8/8] rubocop --- profile/benchmarking/helper.rb | 6 +++--- profile/benchmarking/rake/bson.rake | 25 +++++++++++++++++------ profile/benchmarking/rake/multi_doc.rake | 1 + profile/benchmarking/rake/parallel.rake | 1 + profile/benchmarking/rake/single_doc.rake | 1 + 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 730e74ae34..70d4e34312 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -63,7 +63,7 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, max_time: 5 * 60, progress: default_progress_callback, &block) - progress ||= -> (state) {} # fallback to a no-op callback + progress ||= ->(state) {} # fallback to a no-op callback progress[:start] [].tap do |results| @@ -138,9 +138,9 @@ def without_gc # be used) def default_progress_callback case ENV['PROGRESS'] - when nil, '0', 'false', 'none' + when '0', 'false', 'none' nil - when '1', 'true', 'minimal' + when nil, '1', 'true', 'minimal' method(:minimal_progress_callback).to_proc else raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}" diff --git a/profile/benchmarking/rake/bson.rake b/profile/benchmarking/rake/bson.rake index 2147eb2220..4d5bdb1c04 100644 --- a/profile/benchmarking/rake/bson.rake +++ b/profile/benchmarking/rake/bson.rake @@ -1,18 +1,29 @@ # frozen_string_literal: true +# rubocop:disable Layout/FirstHashElementIndentation + desc 'Run the full BSON benchmarking suite' task :bson do puts 'BSON BENCHMARK SUITE' Mongo::Benchmarking.report({ - bson: Mongo::Benchmarking::BSON.run_all({ + bson: Mongo::Benchmarking::BSON.run_all( flat: %i[ encode decode ], deep: %i[ encode decode ], - full: %i[ encode decode ], - }) + full: %i[ encode decode ] + ) }) end -namespace :bson do +namespace :bson do # rubocop:disable Metrics/BlockLength + # a convenience task for running all of the bson benchmark tasks; this is + # only useful for testing that they all work. + task test: %w[ + bson + bson:flat bson:flat:encode bson:flat:decode + bson:deep bson:deep:encode bson:deep:decode + bson:full bson:full:encode bson:full:decode + ] + desc 'Learn how to run the BSON benchmarks' task :help do puts <<~HELP @@ -45,7 +56,7 @@ namespace :bson do task :flat do puts 'BSON BENCHMARK :: FLAT' Mongo::Benchmarking.report({ - bson: Mongo::Benchmarking::BSON.run_all({ flat: %i[ encode decode ] }) + bson: Mongo::Benchmarking::BSON.run_all(flat: %i[ encode decode ]) }) end @@ -67,7 +78,7 @@ namespace :bson do task :deep do puts 'BSON BENCHMARK :: DEEP' Mongo::Benchmarking.report({ - bson: Mongo::Benchmarking::BSON.run_all({ deep: %i[ encode decode ] }) + bson: Mongo::Benchmarking::BSON.run_all(deep: %i[ encode decode ]) }) end @@ -107,3 +118,5 @@ namespace :bson do end end end + +# rubocop:enable Layout/FirstHashElementIndentation diff --git a/profile/benchmarking/rake/multi_doc.rake b/profile/benchmarking/rake/multi_doc.rake index 49d9aab95f..86c190ef1f 100644 --- a/profile/benchmarking/rake/multi_doc.rake +++ b/profile/benchmarking/rake/multi_doc.rake @@ -1,4 +1,5 @@ # frozen_string_literal: true +# rubocop:todo all namespace :multi_doc do # Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json. diff --git a/profile/benchmarking/rake/parallel.rake b/profile/benchmarking/rake/parallel.rake index c8fa0fd5a2..98752e231e 100644 --- a/profile/benchmarking/rake/parallel.rake +++ b/profile/benchmarking/rake/parallel.rake @@ -1,4 +1,5 @@ # frozen_string_literal: true +# rubocop:todo all namespace :parallel do # Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI, diff --git a/profile/benchmarking/rake/single_doc.rake b/profile/benchmarking/rake/single_doc.rake index 9280779f0d..803e28d593 100644 --- a/profile/benchmarking/rake/single_doc.rake +++ b/profile/benchmarking/rake/single_doc.rake @@ -1,4 +1,5 @@ # frozen_string_literal: true +# rubocop:todo all namespace :single_doc do task :command do