Skip to content

test: count errors of cluster down #378

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ Also, it should handle errors.
* https://redis.io/docs/reference/cluster-spec/
* https://github.com/redis/redis-rb/issues/1070
* https://github.com/redis/redis/issues/8948
* https://github.com/valkey-io/valkey/issues/384
* https://github.com/antirez/redis-rb-cluster
* https://twitter.com/antirez
* http://antirez.com/latest/0
Expand Down
29 changes: 18 additions & 11 deletions test/test_against_cluster_broken.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
require 'testing_helper'

class TestAgainstClusterBroken < TestingWrapper
WAIT_SEC = 3
WAIT_SEC = 1
MAX_ATTEMPTS = 60
NUMBER_OF_KEYS = 10

def setup
@captured_commands = ::Middlewares::CommandCapture::CommandBuffer.new
Expand All @@ -24,23 +26,26 @@ def setup
)
@captured_commands.clear
@redirect_count.clear
@cluster_down_error_count = 0
end

def teardown
@client&.close
@controller&.close
print "#{@redirect_count.get}, ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')} = "
print "#{@redirect_count.get}, "\
"ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')}, "\
"ClusterDownError: #{@cluster_down_error_count} = "
end

def test_a_replica_is_down
sacrifice = @controller.select_sacrifice_of_replica
do_test_a_node_is_down(sacrifice, number_of_keys: 10)
do_test_a_node_is_down(sacrifice, number_of_keys: NUMBER_OF_KEYS)
refute(@captured_commands.count('cluster', 'nodes').zero?, @captured_commands.to_a.map(&:command))
end

def test_a_primary_is_down
sacrifice = @controller.select_sacrifice_of_primary
do_test_a_node_is_down(sacrifice, number_of_keys: 10)
do_test_a_node_is_down(sacrifice, number_of_keys: NUMBER_OF_KEYS)
refute(@captured_commands.count('cluster', 'nodes').zero?, @captured_commands.to_a.map(&:command))
end

Expand All @@ -57,8 +62,8 @@ def wait_for_replication
def do_test_a_node_is_down(sacrifice, number_of_keys:)
prepare_test_data(number_of_keys: number_of_keys)

kill_a_node(sacrifice, kill_attempts: 10)
wait_for_cluster_to_be_ready(wait_attempts: 10)
kill_a_node(sacrifice, kill_attempts: MAX_ATTEMPTS)
wait_for_cluster_to_be_ready(wait_attempts: MAX_ATTEMPTS)

assert_equal('PONG', @client.call('PING'), 'Case: PING')
do_assertions_without_pipelining(number_of_keys: number_of_keys)
Expand All @@ -75,15 +80,15 @@ def kill_a_node(sacrifice, kill_attempts:)
refute_nil(sacrifice, "#{sacrifice.config.host}:#{sacrifice.config.port}")

loop do
break if kill_attempts <= 0
raise MaxRetryExceeded if kill_attempts <= 0

kill_attempts -= 1
sacrifice.call('SHUTDOWN', 'NOSAVE')
rescue ::RedisClient::CommandError => e
raise unless e.message.include?('Errors trying to SHUTDOWN')
rescue ::RedisClient::ConnectionError
break
ensure
kill_attempts -= 1
sleep WAIT_SEC
end

Expand All @@ -92,11 +97,13 @@ def kill_a_node(sacrifice, kill_attempts:)

def wait_for_cluster_to_be_ready(wait_attempts:)
loop do
break if wait_attempts <= 0 || @client.call('PING') == 'PONG'
raise MaxRetryExceeded if wait_attempts <= 0

wait_attempts -= 1
break if @client.call('PING') == 'PONG'
rescue ::RedisClient::Cluster::NodeMightBeDown
# ignore
@cluster_down_error_count += 1
ensure
wait_attempts -= 1
sleep WAIT_SEC
end
end
Expand Down
29 changes: 22 additions & 7 deletions test/test_against_cluster_scale.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
require 'testing_helper'

class TestAgainstClusterScale < TestingWrapper
WAIT_SEC = 1
MAX_ATTEMPTS = 20
NUMBER_OF_KEYS = 20_000

def self.test_order
Expand All @@ -23,12 +25,15 @@ def setup
@client.call('echo', 'init')
@captured_commands.clear
@redirect_count.clear
@cluster_down_error_count = 0
end

def teardown
@client&.close
@controller&.close
print "#{@redirect_count.get}, ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')} = "
print "#{@redirect_count.get}, "\
"ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')}, "\
"ClusterDownError: #{@cluster_down_error_count} = "
end

def test_01_scale_out
Expand Down Expand Up @@ -57,12 +62,8 @@ def test_02_scale_in
@controller.scale_in

NUMBER_OF_KEYS.times do |i|
assert_equal(i.to_s, @client.call('GET', "key#{i}"), "Case: key#{i}")
rescue ::RedisClient::CommandError => e
raise unless e.message.start_with?('CLUSTERDOWN Hash slot not served')

# FIXME: Why does the error occur?
p "key#{i}"
got = retry_call(attempts: MAX_ATTEMPTS) { @client.call('GET', "key#{i}") }
assert_equal(i.to_s, got, "Case: key#{i}")
end

want = TEST_NODE_URIS.size
Expand Down Expand Up @@ -98,4 +99,18 @@ def build_additional_node_urls
max = TEST_REDIS_PORTS.max
(max + 1..max + 2).map { |port| "#{TEST_REDIS_SCHEME}://#{TEST_REDIS_HOST}:#{port}" }
end

def retry_call(attempts:)
loop do
raise MaxRetryExceeded if attempts <= 0

attempts -= 1
break yield
rescue ::RedisClient::CommandError => e
raise unless e.message.start_with?('CLUSTERDOWN Hash slot not served')

@cluster_down_error_count += 1
sleep WAIT_SEC
end
end
end
3 changes: 2 additions & 1 deletion test/test_against_cluster_state.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def setup
def teardown
@controller&.close
@client&.close
print "#{@redirect_count.get}, ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')} = "
print "#{@redirect_count.get}, "\
"ClusterNodesCall: #{@captured_commands.count('cluster', 'nodes')} = "
end

def test_the_state_of_cluster_down
Expand Down
2 changes: 2 additions & 0 deletions test/testing_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
when 'hiredis' then require 'hiredis-client'
end

MaxRetryExceeded = Class.new(StandardError)

class TestingWrapper < Minitest::Test
private

Expand Down
Loading