From e56f3c3c0d8966bc5769f048d1097427fc0064b1 Mon Sep 17 00:00:00 2001 From: Jamis Buck Date: Mon, 14 Aug 2023 12:28:26 -0600 Subject: [PATCH] RUBY-3314 Implement variable iterations for benchmarks --- .gitignore | 1 + profile/benchmarking.rb | 97 +++++++++------------------------- profile/benchmarking/helper.rb | 51 +++++++++++++++--- profile/benchmarking/micro.rb | 35 ++++-------- 4 files changed, 79 insertions(+), 105 deletions(-) diff --git a/.gitignore b/.gitignore index a60fb2cf76..7af488f10d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock .env.private* .env build +profile/benchmarking/data diff --git a/profile/benchmarking.rb b/profile/benchmarking.rb index a054b3906b..43797982b2 100644 --- a/profile/benchmarking.rb +++ b/profile/benchmarking.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -23,107 +22,59 @@ require_relative 'benchmarking/parallel' module Mongo - # Module with all functionality for running driver benchmark tests. # # @since 2.2.3 module Benchmarking - extend self - # The current path. - # - # @return [ String ] The current path. - # - # @since 2.2.3 - CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze - - # The path to data files used in Benchmarking tests. - # # @return [ String ] Path to Benchmarking test files. - # - # @since 2.2.3 - DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze + DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze - # The file containing the single tweet document. - # - # @return [ String ] The file containing the tweet document. - # - # @since 2.2.3 - TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze + # @return [ String ] The file containing the single tweet document. + TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze - # The file containing the single small document. - # - # @return [ String ] The file containing the small document. - # - # @since 2.2.3 - SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single small document. + SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze - # The file containing the single large document. - # - # @return [ String ] The file containing the large document. - # - # @since 2.2.3 - LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze + # @return [ String ] The file containing the single large document. + LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze - # The file to upload when testing GridFS. - # - # @return [ String ] The file containing the GridFS test data. - # - # @since 2.2.3 - GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze + # @return [ String ] The file to upload when testing GridFS. + GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze - # The file path and base name for the LDJSON files. - # # @return [ String ] The file path and base name for the LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze + LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze - # The file path and base name for the outputted LDJSON files. - # - # @return [ String ] The file path and base name for the outputted LDJSON files. - # - # @since 2.2.3 - LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze + # @return [ String ] The file path and base name for the emitted LDJSON files. + LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze - # The file path and base name for the GRIDFS files to upload. - # # @return [ String ] The file path and base name for the GRIDFS files to upload. - # - # @since 2.2.3 - GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze + GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze - # The file path and base name for the outputted GRIDFS downloaded files. - # - # @return [ String ] The file path and base name for the outputted GRIDFS downloaded files. - # - # @since 2.2.3 - GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze + # @return [ String ] The file path and base name for the emitted GRIDFS downloaded files. + GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze - # The default number of test repetitions. - # # @return [ Integer ] The number of test repetitions. - # - # @since 2.2.3 - TEST_REPETITIONS = 100.freeze + TEST_REPETITIONS = 100 - # The number of default warmup repetitions of the test to do before - # recording times. - # - # @return [ Integer ] The default number of warmup repetitions. + # Convenience helper for loading the single tweet document. # - # @since 2.2.3 - WARMUP_REPETITIONS = 10.freeze - + # @return [ Hash ] a single parsed JSON document def tweet_document Benchmarking.load_file(TWEET_DOCUMENT_FILE).first end + # Convenience helper for loading the single small document. + # + # @return [ Hash ] a single parsed JSON document def small_document Benchmarking.load_file(SMALL_DOCUMENT_FILE).first end + # Convenience helper for loading the single large document. + # + # @return [ Hash ] a single parsed JSON document def large_document Benchmarking.load_file(LARGE_DOCUMENT_FILE).first end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 11d9f670ab..654eddbfd9 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true -# rubocop:todo all module Mongo - # Helper functions used by benchmarking tasks module Benchmarking - extend self # Load a json file and represent each document as a Hash. @@ -19,7 +16,7 @@ module Benchmarking # # @since 2.2.3 def load_file(file_name) - File.open(file_name, "r") do |f| + File.open(file_name, 'r') do |f| f.each_line.collect do |line| parse_json(line) end @@ -39,8 +36,47 @@ def load_file(file_name) # @since 2.2.3 def parse_json(document) JSON.parse(document).tap do |doc| - if doc['_id'] && doc['_id']['$oid'] - doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) + doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid'] + end + end + + # The spec requires that most benchmarks use a variable number of + # iterations, defined as follows: + # + # * iterations should loop for at least 1 minute cumulative execution + # time + # * iterations should stop after 100 iterations or 5 minutes cumulative + # execution time, whichever is shorter + # + # This method will yield once for each iteration. + # + # @param [ Integer ] max_iterations the maximum number of iterations to + # attempt (default: 100) + # @param [ Integer ] min_time the minimum number of seconds to spend + # iterating + # @param [ Integer ] max_time the maximum number of seconds to spend + # iterating. + # + # @return [ Array ] the timings for each iteration + def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block) + [].tap do |results| + iteration_count = 0 + cumulative_time = 0 + + loop do + timing = Benchmark.realtime(&block) + + iteration_count += 1 + cumulative_time += timing + results.push timing + + # always stop after the maximum time has elapsed, regardless of + # iteration count. + break if cumulative_time > max_time + + # otherwise, break if the minimum time has elapsed, and the maximum + # number of iterations have been reached. + break if cumulative_time >= min_time && iteration_count >= max_iterations end end end @@ -56,7 +92,8 @@ def parse_json(document) # # @since 2.2.3 def median(values) - values.sort![values.size/2-1] + i = (values.size / 2) - 1 + values.sort[i] end end end diff --git a/profile/benchmarking/micro.rb b/profile/benchmarking/micro.rb index 2f560bb310..99edf2ae10 100644 --- a/profile/benchmarking/micro.rb +++ b/profile/benchmarking/micro.rb @@ -1,5 +1,4 @@ # frozen_string_literal: true -# rubocop:todo all # Copyright (C) 2015-2020 MongoDB Inc. # @@ -17,13 +16,11 @@ module Mongo module Benchmarking - # These tests focus on BSON encoding and decoding; they are client-side only and # do not involve any transmission of data to or from the server. # # @since 2.2.3 module Micro - extend self # Run a micro benchmark test. @@ -38,10 +35,11 @@ module Micro # # @since 2.2.3 def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS) - file_name = type.to_s << "_bson.json" + file_name = type.to_s << '_bson.json' GC.disable - file_path = [Benchmarking::DATA_PATH, file_name].join('/') + file_path = [ Benchmarking::DATA_PATH, file_name ].join('/') puts "#{action} : #{send(action, file_path, repetitions)}" + GC.enable end # Run an encoding micro benchmark test. @@ -59,16 +57,8 @@ def encode(file_name, repetitions) data = Benchmarking.load_file(file_name) document = BSON::Document.new(data.first) - # WARMUP_REPETITIONS.times do - # doc.to_bson - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - document.to_bson - end - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times { document.to_bson } end Benchmarking.median(results) end @@ -88,18 +78,13 @@ def decode(file_name, repetitions) data = Benchmarking.load_file(file_name) buffer = BSON::Document.new(data.first).to_bson - # WARMUP_REPETITIONS.times do - # BSON::Document.from_bson(buffers.shift) - # end - - results = repetitions.times.collect do - Benchmark.realtime do - 10_000.times do - BSON::Document.from_bson(buffer) - buffer.rewind! - end + results = Benchmarking.benchmark(max_iterations: repetitions) do + 10_000.times do + BSON::Document.from_bson(buffer) + buffer.rewind! end end + Benchmarking.median(results) end end