Skip to content

RUBY-3316 Quality of life updates for BSON benchmarks #2774

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 1 addition & 162 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -131,165 +131,4 @@ namespace :docs do
end
end

require_relative "profile/benchmarking"

# Some require data files, available from the drivers team. See the comments above each task for details."
namespace :benchmark do
desc "Run the bson benchmarking tests"
task :bson do
puts "BSON BENCHMARK"
Mongo::Benchmarking.report({
bson: Mongo::Benchmarking::BSON.run_all({
flat: %i[ encode decode ],
deep: %i[ encode decode ],
full: %i[ encode decode ],
})
})
end

namespace :bson do
namespace :flat do
desc "Benchmarking for flat bson documents."

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
task :encode do
puts "BSON BENCHMARK :: FLAT :: ENCODE"
Mongo::Benchmarking.report({ bson: { flat: { encode: Mongo::Benchmarking::BSON.run(:flat, :encode) } } })
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called flat_bson.json.
task :decode do
puts "BSON BENCHMARK :: FLAT :: DECODE"
Mongo::Benchmarking.report({ bson: { flat: { decode: Mongo::Benchmarking::BSON.run(:flat, :decode) } } })
end
end

namespace :deep do
desc "Benchmarking for deep bson documents."

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
task :encode do
puts "BSON BENCHMARK :: DEEP :: ENCODE"
Mongo::Benchmarking.report({ bson: { deep: { encode: Mongo::Benchmarking::BSON.run(:deep, :encode) } } })
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called deep_bson.json.
task :decode do
puts "BSON BENCHMARK :: DEEP :: DECODE"
Mongo::Benchmarking.report({ bson: { deep: { decode: Mongo::Benchmarking::BSON.run(:deep, :decode) } } })
end
end

namespace :full do
desc "Benchmarking for full bson documents."

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
task :encode do
puts "BSON BENCHMARK :: FULL :: ENCODE"
Mongo::Benchmarking.report({ bson: { full: { encode: Mongo::Benchmarking::BSON.run(:full, :encode) } } })
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called full_bson.json.
task :decode do
puts "BSON BENCHMARK :: FULL :: DECODE"
Mongo::Benchmarking.report({ bson: { full: { decode: Mongo::Benchmarking::BSON.run(:full, :decode) } } })
end
end
end

namespace :single_doc do
desc "Run the common driver single-document benchmarking tests"
task :command do
puts "SINGLE DOC BENCHMARK:: COMMAND"
Mongo::Benchmarking::SingleDoc.run(:command)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
task :find_one do
puts "SINGLE DOC BENCHMARK:: FIND ONE BY ID"
Mongo::Benchmarking::SingleDoc.run(:find_one)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
task :insert_one_small do
puts "SINGLE DOC BENCHMARK:: INSERT ONE SMALL DOCUMENT"
Mongo::Benchmarking::SingleDoc.run(:insert_one_small)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
task :insert_one_large do
puts "SINGLE DOC BENCHMARK:: INSERT ONE LARGE DOCUMENT"
Mongo::Benchmarking::SingleDoc.run(:insert_one_large)
end
end

namespace :multi_doc do
desc "Run the common driver multi-document benchmarking tests"

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called TWEET.json.
task :find_many do
puts "MULTI DOCUMENT BENCHMARK:: FIND MANY"
Mongo::Benchmarking::MultiDoc.run(:find_many)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called SMALL_DOC.json.
task :bulk_insert_small do
puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT SMALL"
Mongo::Benchmarking::MultiDoc.run(:bulk_insert_small)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called LARGE_DOC.json.
task :bulk_insert_large do
puts "MULTI DOCUMENT BENCHMARK:: BULK INSERT LARGE"
Mongo::Benchmarking::MultiDoc.run(:bulk_insert_large)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
task :gridfs_upload do
puts "MULTI DOCUMENT BENCHMARK:: GRIDFS UPLOAD"
Mongo::Benchmarking::MultiDoc.run(:gridfs_upload)
end

# Requirement: A file in Mongo::Benchmarking::DATA_PATH, called GRIDFS_LARGE.
task :gridfs_download do
puts "MULTI DOCUMENT BENCHMARK:: GRIDFS DOWNLOAD"
Mongo::Benchmarking::MultiDoc.run(:gridfs_download)
end
end

namespace :parallel do
desc "Run the common driver paralell ETL benchmarking tests"

# Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
# with the files used in this task.
task :import do
puts "PARALLEL ETL BENCHMARK:: IMPORT"
Mongo::Benchmarking::Parallel.run(:import)
end

# Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called LDJSON_MULTI,
# with the files used in this task.
# Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/LDJSON_MULTI"
# called 'output'.
task :export do
puts "PARALLEL ETL BENCHMARK:: EXPORT"
Mongo::Benchmarking::Parallel.run(:export)
end

# Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
# with the files used in this task.
task :gridfs_upload do
puts "PARALLEL ETL BENCHMARK:: GRIDFS UPLOAD"
Mongo::Benchmarking::Parallel.run(:gridfs_upload)
end

# Requirement: A directory in Mongo::Benchmarking::DATA_PATH, called GRIDFS_MULTI,
# with the files used in this task.
# Requirement: Another directory in "#{Mongo::Benchmarking::DATA_PATH}/GRIDFS_MULTI"
# called 'output'.
task :gridfs_download do
puts "PARALLEL ETL BENCHMARK:: GRIDFS DOWNLOAD"
Mongo::Benchmarking::Parallel.run(:gridfs_download)
end
end
end
load 'profile/benchmarking/rake/tasks.rake'
2 changes: 1 addition & 1 deletion profile/benchmarking/bson.rb
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def score_for(type, percentiles, scale: 10_000)
# @return [ Hash<:timings,:percentiles,:score> ] The test results for
# the requested benchmark.
def run(type, action)
timings = Benchmarking.without_gc { send(action, file_for(type)) }
timings = send(action, file_for(type))
percentiles = Percentiles.new(timings)
score = score_for(type, percentiles)

Expand Down
72 changes: 44 additions & 28 deletions profile/benchmarking/helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,21 @@ def parse_json(document)
# iterating.
#
# @return [ Array<Float> ] the timings for each iteration
def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block)
def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS,
min_time: 60,
max_time: 5 * 60,
progress: default_progress_callback,
&block)
progress ||= ->(state) {} # fallback to a no-op callback
progress[:start]

[].tap do |results|
iteration_count = 0
cumulative_time = 0

loop do
timing = Benchmark.realtime(&block)
timing = without_gc { Benchmark.realtime(&block) }
progress[:step]

iteration_count += 1
cumulative_time += timing
Expand All @@ -78,6 +86,8 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
# number of iterations have been reached.
break if cumulative_time >= min_time && iteration_count >= max_iterations
end

progress[:end]
end
end

Expand All @@ -98,32 +108,6 @@ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
end
end

# A utility class for returning the list item at a given percentile
# value.
class Percentiles
# @return [ Array<Number> ] the sorted list of numbers to consider
attr_reader :list

# Create a new Percentiles object that encapsulates the given list of
# numbers.
#
# @param [ Array<Number> ] list the list of numbers to considier
def initialize(list)
@list = list.sort
end

# Finds and returns the element in the list that represents the given
# percentile value.
#
# @param [ Number ] percentile a number in the range [1,100]
#
# @return [ Number ] the element of the list for the given percentile.
def [](percentile)
i = (list.size * percentile / 100.0).ceil - 1
list[i]
end
end

# Get the median of values in a list.
#
# @example Get the median.
Expand All @@ -144,5 +128,37 @@ def without_gc
ensure
GC.enable
end

private

# Returns the proc object (or nil) corresponding to the "PROGRESS"
# environment variable.
#
# @return [ Proc | nil ] the callback proc to use (or nil if none should
# be used)
def default_progress_callback
case ENV['PROGRESS']
when '0', 'false', 'none'
nil
when nil, '1', 'true', 'minimal'
method(:minimal_progress_callback).to_proc
else
raise ArgumentError, "unsupported progress callback #{ENV['PROGRESS'].inspect}"
end
end

# A minimal progress callback implementation, printing '|' when a benchmark
# starts and '.' for each iteration.
#
# @param [ :start | :step | :end ] state the current progress state
def minimal_progress_callback(state)
case state
when :start then print '|'
when :step then print '.'
when :end then puts
end

$stdout.flush
end
end
end
Loading