1
- import time
2
1
import unittest
3
2
4
3
from parameterized import parameterized
5
4
from crate .client import connect
6
5
import random
7
6
from random import sample
8
7
9
- from crate .qa .tests import NodeProvider , insert_data , UpgradePath
8
+ from crate .qa .tests import NodeProvider , insert_data , UpgradePath , assert_busy
10
9
11
- UPGRADE_42_TO_43 = ('4.2.x to 4.3.x' , UpgradePath ('4.2.x' , '4.3.x' ), 3 ,)
12
- UPGRADE_43_TO_LATEST = ('4.3.x to latest-nightly' , UpgradePath ('4.3.x' , 'latest-nightly' ), 3 ,)
10
+ NUMBER_OF_NODES = 3
11
+ UPGRADE_42_TO_43 = ('4.2.x to 4.3.x' , UpgradePath ('4.2.x' , '4.3.x' ), NUMBER_OF_NODES ,)
12
+ UPGRADE_43_TO_LATEST = ('4.3.x to latest-nightly' , UpgradePath ('4.3.x' , 'latest-nightly' ), NUMBER_OF_NODES ,)
13
13
14
14
15
15
class RecoveryTest (NodeProvider , unittest .TestCase ):
16
-
17
16
"""
18
17
In depth testing of the recovery mechanism during a rolling restart.
19
18
Based on org.elasticsearch.upgrades.RecoveryIT.java
20
19
"""
20
+
21
21
def _assert_num_docs_by_node_id (self , conn , schema , table_name , node_id , expected_count ):
22
22
c = conn .cursor ()
23
23
c .execute ('''select num_docs from sys.shards where schema_name = ? and table_name = ? and node['id'] = ?''' ,
24
24
(schema , table_name , node_id ))
25
25
number_of_docs = c .fetchone ()
26
- self .assertEqual (number_of_docs [0 ], expected_count )
26
+ self .assertTrue (number_of_docs )
27
+ self .assertEqual (expected_count , number_of_docs [0 ])
27
28
28
29
def _assert_is_green (self , conn , schema , table_name ):
29
30
c = conn .cursor ()
@@ -79,8 +80,7 @@ def test_recovery_with_concurrent_indexing(self, name, path, nodes):
79
80
# insert data into the initial homogeneous cluster
80
81
insert_data (conn , 'doc' , 'test' , 10 )
81
82
82
- time .sleep (3 )
83
- self ._assert_is_green (conn , 'doc' , 'test' )
83
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
84
84
# make sure that we can index while the replicas are recovering
85
85
c .execute ('''alter table doc.test set ("routing.allocation.enable"='primaries')''' )
86
86
@@ -98,9 +98,9 @@ def test_recovery_with_concurrent_indexing(self, name, path, nodes):
98
98
node_ids = c .fetchall ()
99
99
self .assertEqual (len (node_ids ), nodes )
100
100
101
- time . sleep ( 3 )
101
+ assert_busy ( lambda : self . _assert_is_green ( conn , 'doc' , 'test' ) )
102
102
for node_id in node_ids :
103
- self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , node_id [0 ], 60 )
103
+ assert_busy ( lambda : self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , node_id [0 ], 60 ) )
104
104
105
105
c .execute ('''alter table doc.test set ("routing.allocation.enable"='primaries')''' )
106
106
# upgrade the full cluster
@@ -117,12 +117,13 @@ def test_recovery_with_concurrent_indexing(self, name, path, nodes):
117
117
node_ids = c .fetchall ()
118
118
self .assertEqual (len (node_ids ), nodes )
119
119
120
- time .sleep (3 )
121
120
for node_id in node_ids :
122
- self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , node_id [0 ], 105 )
121
+ assert_busy ( lambda : self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , node_id [0 ], 105 ) )
123
122
124
123
@parameterized .expand ([UPGRADE_42_TO_43 , UPGRADE_43_TO_LATEST ])
125
124
def test_relocation_with_concurrent_indexing (self , name , path , nodes ):
125
+ nodes = 3
126
+ path = UpgradePath ('4.2.x' , '4.3.x' )
126
127
cluster = self ._new_cluster (path .from_version , nodes )
127
128
cluster .start ()
128
129
@@ -135,8 +136,7 @@ def test_relocation_with_concurrent_indexing(self, name, path, nodes):
135
136
136
137
insert_data (conn , 'doc' , 'test' , 10 )
137
138
138
- time .sleep (3 )
139
- self ._assert_is_green (conn , 'doc' , 'test' )
139
+ assert_busy (lambda :self ._assert_is_green (conn , 'doc' , 'test' ))
140
140
# make sure that no shards are allocated, so we can make sure the primary stays
141
141
# on the old node (when one node stops, we lose the master too, so a replica
142
142
# will not be promoted)
@@ -157,18 +157,16 @@ def test_relocation_with_concurrent_indexing(self, name, path, nodes):
157
157
"routing.allocation.include._id"=?
158
158
)''' , (old_node_id , ))
159
159
160
- self ._assert_is_green (conn , 'doc' , 'test' )
160
+ assert_busy ( lambda : self ._assert_is_green (conn , 'doc' , 'test' ) )
161
161
162
162
c .execute ('''alter table doc.test set ("routing.allocation.include._id"=?)''' , (new_node_id , ))
163
163
insert_data (conn , 'doc' , 'test' , 50 )
164
164
165
165
# ensure the relocation from old node to new node has occurred; otherwise the table is green
166
166
# even though shards haven't moved to the new node yet (allocation was throttled).
167
- time .sleep (3 )
168
- c .execute ('select current_state from sys.allocations where node_id =?' , (new_node_id ,))
169
- current_state = c .fetchone ()[0 ]
170
- self .assertEqual (current_state , 'STARTED' )
171
- self ._assert_is_green (conn , 'doc' , 'test' )
167
+ assert_busy (lambda : self ._assert_shard_state (conn , 'doc' , 'test' , new_node_id , 'STARTED' ))
168
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
169
+
172
170
c .execute ('refresh table doc.test' )
173
171
self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , new_node_id , 60 )
174
172
@@ -180,17 +178,23 @@ def test_relocation_with_concurrent_indexing(self, name, path, nodes):
180
178
181
179
insert_data (conn , 'doc' , 'test' , 45 )
182
180
183
- time .sleep (3 )
184
- self ._assert_is_green (conn , 'doc' , 'test' )
181
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
185
182
c .execute ('refresh table doc.test' )
186
- time .sleep (5 )
187
183
c .execute ('select id from sys.nodes' )
188
184
node_ids = c .fetchall ()
189
185
self .assertEqual (len (node_ids ), nodes )
190
186
191
187
for node_id in node_ids :
192
188
self ._assert_num_docs_by_node_id (conn , 'doc' , 'test' , node_id [0 ], 105 )
193
189
190
+ def _assert_shard_state (self , conn , schema , table_name , node_id , state ):
191
+ c = conn .cursor ()
192
+ c .execute ('select current_state from sys.allocations where node_id =? and table_name = ? and table_schema = ?' ,
193
+ (node_id , table_name , schema ))
194
+ current_state = c .fetchone ()
195
+ self .assertTrue (current_state )
196
+ self .assertEqual (current_state [0 ], state )
197
+
194
198
@parameterized .expand ([UPGRADE_42_TO_43 , UPGRADE_43_TO_LATEST ])
195
199
def test_recovery (self , name , path , nodes ):
196
200
"""
@@ -218,16 +222,15 @@ def test_recovery(self, name, path, nodes):
218
222
# upgrade to mixed cluster
219
223
self ._upgrade_cluster (cluster , path .to_version , random .randint (1 , nodes - 1 ))
220
224
221
- time .sleep (5 )
222
- self ._assert_is_green (conn , 'doc' , 'test' )
225
+ assert_busy (lambda :self ._assert_is_green (conn , 'doc' , 'test' ))
223
226
224
227
# upgrade fully to the new version
225
228
self ._upgrade_cluster (cluster , path .to_version , nodes )
226
229
227
230
if random .choice ([True , False ]):
228
231
c .execute ("refresh table doc.test" )
229
232
230
- self ._assert_is_green (conn , 'doc' , 'test' )
233
+ assert_busy ( lambda : self ._assert_is_green (conn , 'doc' , 'test' ) )
231
234
232
235
@parameterized .expand ([UPGRADE_42_TO_43 , UPGRADE_43_TO_LATEST ])
233
236
def test_recovery_closed_index (self , name , path , nodes ):
@@ -246,8 +249,7 @@ def test_recovery_closed_index(self, name, path, nodes):
246
249
"unassigned.node_left.delayed_timeout" = '100ms', "allocation.max_retries" = '0')
247
250
''' )
248
251
249
- time .sleep (3 )
250
- self ._assert_is_green (conn , 'doc' , 'test' )
252
+ assert_busy (lambda :self ._assert_is_green (conn , 'doc' , 'test' ))
251
253
252
254
c .execute ('alter table doc.test close' )
253
255
@@ -291,7 +293,7 @@ def test_closed_index_during_rolling_upgrade(self, name, path, nodes):
291
293
create table doc.mixed_cluster(x int) clustered into 1 shards with( number_of_replicas = 0)
292
294
''' )
293
295
294
- self ._assert_is_green (conn , 'doc' , 'mixed_cluster' )
296
+ assert_busy ( lambda : self ._assert_is_green (conn , 'doc' , 'mixed_cluster' ) )
295
297
c .execute ('alter table doc.mixed_cluster close' )
296
298
297
299
self ._assert_is_closed (conn , 'doc' , 'mixed_cluster' )
@@ -306,7 +308,7 @@ def test_closed_index_during_rolling_upgrade(self, name, path, nodes):
306
308
create table doc.upgraded_cluster(x int) clustered into 1 shards with( number_of_replicas = 0)
307
309
''' )
308
310
309
- self ._assert_is_green (conn , 'doc' , 'upgraded_cluster' )
311
+ assert_busy ( lambda : self ._assert_is_green (conn , 'doc' , 'upgraded_cluster' ) )
310
312
c .execute ('alter table doc.upgraded_cluster close' )
311
313
312
314
self ._assert_is_closed (conn , 'doc' , 'upgraded_cluster' )
@@ -335,8 +337,7 @@ def test_update_docs(self, name, path, nodes):
335
337
self ._upgrade_cluster (cluster , path .to_version , random .randint (1 , nodes - 1 ))
336
338
337
339
if random .choice ([True , False ]):
338
- time .sleep (5 )
339
- self ._assert_is_green (conn , 'doc' , 'test' )
340
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
340
341
341
342
# update the data in a mixed cluster
342
343
updates = [(i , str (random .randint )) for i in range (0 , 100 )]
@@ -382,8 +383,7 @@ def test_operation_based_recovery(self, name, path, nodes):
382
383
"soft_deletes.enabled" = true)
383
384
''' )
384
385
385
- time .sleep (3 )
386
- self ._assert_is_green (conn , 'doc' , 'test' )
386
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
387
387
388
388
insert_data (conn , 'doc' , 'test' , random .randint (100 , 200 ))
389
389
c .execute ('refresh table doc.test' )
@@ -396,8 +396,7 @@ def test_operation_based_recovery(self, name, path, nodes):
396
396
# upgrade to mixed cluster
397
397
self ._upgrade_cluster (cluster , path .to_version , random .randint (1 , nodes - 1 ))
398
398
399
- time .sleep (3 )
400
- self ._assert_is_green (conn , 'doc' , 'test' )
399
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
401
400
402
401
num_docs = random .randint (0 , 3 )
403
402
if num_docs > 0 :
@@ -407,8 +406,7 @@ def test_operation_based_recovery(self, name, path, nodes):
407
406
# upgrade fully to the new version
408
407
self ._upgrade_cluster (cluster , path .to_version , nodes )
409
408
410
- time .sleep (3 )
411
- self ._assert_is_green (conn , 'doc' , 'test' )
409
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
412
410
413
411
num_docs = random .randint (0 , 3 )
414
412
if num_docs > 0 :
@@ -434,8 +432,7 @@ def test_turnoff_translog_retention_after_upgraded(self, name, path, nodes):
434
432
"soft_deletes.enabled" = true)
435
433
''' , (number_of_replicas , ))
436
434
437
- time .sleep (3 )
438
- self ._assert_is_green (conn , 'doc' , 'test' )
435
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
439
436
440
437
insert_data (conn , 'doc' , 'test' , random .randint (100 , 200 ))
441
438
c .execute ('refresh table doc.test' )
@@ -447,8 +444,7 @@ def test_turnoff_translog_retention_after_upgraded(self, name, path, nodes):
447
444
# update the cluster to the new version
448
445
self ._upgrade_cluster (cluster , path .to_version , nodes )
449
446
450
- time .sleep (3 )
451
- self ._assert_is_green (conn , 'doc' , 'test' )
447
+ assert_busy (lambda : self ._assert_is_green (conn , 'doc' , 'test' ))
452
448
c .execute ('refresh table doc.test' )
453
449
self ._assert_translog_is_empty (conn , 'doc' , 'test' )
454
450
0 commit comments