Skip to content

RUBY-3314 Implement variable iterations for benchmarks #2771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ gemfiles/*.gemfile.lock
.env.private*
.env
build
profile/benchmarking/data
97 changes: 24 additions & 73 deletions profile/benchmarking.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# frozen_string_literal: true
# rubocop:todo all

# Copyright (C) 2015-2020 MongoDB Inc.
#
Expand All @@ -23,107 +22,59 @@
require_relative 'benchmarking/parallel'

module Mongo

# Module with all functionality for running driver benchmark tests.
#
# @since 2.2.3
module Benchmarking

extend self

# The current path.
#
# @return [ String ] The current path.
#
# @since 2.2.3
CURRENT_PATH = File.expand_path(File.dirname(__FILE__)).freeze

# The path to data files used in Benchmarking tests.
#
# @return [ String ] Path to Benchmarking test files.
#
# @since 2.2.3
DATA_PATH = [CURRENT_PATH, 'benchmarking', 'data'].join('/').freeze
DATA_PATH = [ __dir__, 'benchmarking', 'data' ].join('/').freeze

# The file containing the single tweet document.
#
# @return [ String ] The file containing the tweet document.
#
# @since 2.2.3
TWEET_DOCUMENT_FILE = [DATA_PATH, 'TWEET.json'].join('/').freeze
# @return [ String ] The file containing the single tweet document.
TWEET_DOCUMENT_FILE = [ DATA_PATH, 'TWEET.json' ].join('/').freeze

# The file containing the single small document.
#
# @return [ String ] The file containing the small document.
#
# @since 2.2.3
SMALL_DOCUMENT_FILE = [DATA_PATH, 'SMALL_DOC.json'].join('/').freeze
# @return [ String ] The file containing the single small document.
SMALL_DOCUMENT_FILE = [ DATA_PATH, 'SMALL_DOC.json' ].join('/').freeze

# The file containing the single large document.
#
# @return [ String ] The file containing the large document.
#
# @since 2.2.3
LARGE_DOCUMENT_FILE = [DATA_PATH, 'LARGE_DOC.json'].join('/').freeze
# @return [ String ] The file containing the single large document.
LARGE_DOCUMENT_FILE = [ DATA_PATH, 'LARGE_DOC.json' ].join('/').freeze

# The file to upload when testing GridFS.
#
# @return [ String ] The file containing the GridFS test data.
#
# @since 2.2.3
GRIDFS_FILE = [DATA_PATH, 'GRIDFS_LARGE'].join('/').freeze
# @return [ String ] The file to upload when testing GridFS.
GRIDFS_FILE = [ DATA_PATH, 'GRIDFS_LARGE' ].join('/').freeze

# The file path and base name for the LDJSON files.
#
# @return [ String ] The file path and base name for the LDJSON files.
#
# @since 2.2.3
LDJSON_FILE_BASE = [DATA_PATH, 'LDJSON_MULTI', 'LDJSON'].join('/').freeze
LDJSON_FILE_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'LDJSON' ].join('/').freeze

# The file path and base name for the outputted LDJSON files.
#
# @return [ String ] The file path and base name for the outputted LDJSON files.
#
# @since 2.2.3
LDJSON_FILE_OUTPUT_BASE = [DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON'].join('/').freeze
# @return [ String ] The file path and base name for the emitted LDJSON files.
LDJSON_FILE_OUTPUT_BASE = [ DATA_PATH, 'LDJSON_MULTI', 'output', 'LDJSON' ].join('/').freeze

# The file path and base name for the GRIDFS files to upload.
#
# @return [ String ] The file path and base name for the GRIDFS files to upload.
#
# @since 2.2.3
GRIDFS_MULTI_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'file'].join('/').freeze
GRIDFS_MULTI_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'file' ].join('/').freeze

# The file path and base name for the outputted GRIDFS downloaded files.
#
# @return [ String ] The file path and base name for the outputted GRIDFS downloaded files.
#
# @since 2.2.3
GRIDFS_MULTI_OUTPUT_BASE = [DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output'].join('/').freeze
# @return [ String ] The file path and base name for the emitted GRIDFS downloaded files.
GRIDFS_MULTI_OUTPUT_BASE = [ DATA_PATH, 'GRIDFS_MULTI', 'output', 'file-output' ].join('/').freeze

# The default number of test repetitions.
#
# @return [ Integer ] The number of test repetitions.
#
# @since 2.2.3
TEST_REPETITIONS = 100.freeze
TEST_REPETITIONS = 100

# The number of default warmup repetitions of the test to do before
# recording times.
#
# @return [ Integer ] The default number of warmup repetitions.
# Convenience helper for loading the single tweet document.
#
# @since 2.2.3
WARMUP_REPETITIONS = 10.freeze

# @return [ Hash ] a single parsed JSON document
def tweet_document
Benchmarking.load_file(TWEET_DOCUMENT_FILE).first
end

# Convenience helper for loading the single small document.
#
# @return [ Hash ] a single parsed JSON document
def small_document
Benchmarking.load_file(SMALL_DOCUMENT_FILE).first
end

# Convenience helper for loading the single large document.
#
# @return [ Hash ] a single parsed JSON document
def large_document
Benchmarking.load_file(LARGE_DOCUMENT_FILE).first
end
Expand Down
51 changes: 44 additions & 7 deletions profile/benchmarking/helper.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
# frozen_string_literal: true
# rubocop:todo all

module Mongo

# Helper functions used by benchmarking tasks
module Benchmarking

extend self

# Load a json file and represent each document as a Hash.
Expand All @@ -19,7 +16,7 @@ module Benchmarking
#
# @since 2.2.3
def load_file(file_name)
File.open(file_name, "r") do |f|
File.open(file_name, 'r') do |f|
f.each_line.collect do |line|
parse_json(line)
end
Expand All @@ -39,8 +36,47 @@ def load_file(file_name)
# @since 2.2.3
def parse_json(document)
JSON.parse(document).tap do |doc|
if doc['_id'] && doc['_id']['$oid']
doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid'])
doc['_id'] = BSON::ObjectId.from_string(doc['_id']['$oid']) if doc['_id'] && doc['_id']['$oid']
end
end

# The spec requires that most benchmarks use a variable number of
# iterations, defined as follows:
#
# * iterations should loop for at least 1 minute cumulative execution
# time
# * iterations should stop after 100 iterations or 5 minutes cumulative
# execution time, whichever is shorter
#
# This method will yield once for each iteration.
#
# @param [ Integer ] max_iterations the maximum number of iterations to
# attempt (default: 100)
# @param [ Integer ] min_time the minimum number of seconds to spend
# iterating
# @param [ Integer ] max_time the maximum number of seconds to spend
# iterating.
#
# @return [ Array<Float> ] the timings for each iteration
def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_time: 5 * 60, &block)
[].tap do |results|
iteration_count = 0
cumulative_time = 0

loop do
timing = Benchmark.realtime(&block)

iteration_count += 1
cumulative_time += timing
results.push timing

# always stop after the maximum time has elapsed, regardless of
# iteration count.
break if cumulative_time > max_time

# otherwise, break if the minimum time has elapsed, and the maximum
# number of iterations have been reached.
break if cumulative_time >= min_time && iteration_count >= max_iterations
end
end
end
Expand All @@ -56,7 +92,8 @@ def parse_json(document)
#
# @since 2.2.3
def median(values)
values.sort![values.size/2-1]
i = (values.size / 2) - 1
values.sort[i]
end
end
end
35 changes: 10 additions & 25 deletions profile/benchmarking/micro.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# frozen_string_literal: true
# rubocop:todo all

# Copyright (C) 2015-2020 MongoDB Inc.
#
Expand All @@ -17,13 +16,11 @@

module Mongo
module Benchmarking

# These tests focus on BSON encoding and decoding; they are client-side only and
# do not involve any transmission of data to or from the server.
#
# @since 2.2.3
module Micro

extend self

# Run a micro benchmark test.
Expand All @@ -38,10 +35,11 @@ module Micro
#
# @since 2.2.3
def run(type, action, repetitions = Benchmarking::TEST_REPETITIONS)
file_name = type.to_s << "_bson.json"
file_name = type.to_s << '_bson.json'
GC.disable
file_path = [Benchmarking::DATA_PATH, file_name].join('/')
file_path = [ Benchmarking::DATA_PATH, file_name ].join('/')
puts "#{action} : #{send(action, file_path, repetitions)}"
GC.enable
end

# Run an encoding micro benchmark test.
Expand All @@ -59,16 +57,8 @@ def encode(file_name, repetitions)
data = Benchmarking.load_file(file_name)
document = BSON::Document.new(data.first)

# WARMUP_REPETITIONS.times do
# doc.to_bson
# end

results = repetitions.times.collect do
Benchmark.realtime do
10_000.times do
document.to_bson
end
end
results = Benchmarking.benchmark(max_iterations: repetitions) do
10_000.times { document.to_bson }
end
Benchmarking.median(results)
end
Expand All @@ -88,18 +78,13 @@ def decode(file_name, repetitions)
data = Benchmarking.load_file(file_name)
buffer = BSON::Document.new(data.first).to_bson

# WARMUP_REPETITIONS.times do
# BSON::Document.from_bson(buffers.shift)
# end

results = repetitions.times.collect do
Benchmark.realtime do
10_000.times do
BSON::Document.from_bson(buffer)
buffer.rewind!
end
results = Benchmarking.benchmark(max_iterations: repetitions) do
10_000.times do
BSON::Document.from_bson(buffer)
buffer.rewind!
end
end

Benchmarking.median(results)
end
end
Expand Down