Skip to content

Commit 99b35c0

Browse files
Storage operations within checkpoint manager did not utilize the blob_prefix. (#9086)
* Storage operations within checkpoint manager did not utilize the blob_prefix. * Adds unit tests to validate this as well. * Add new parameter, use_consumer_group_as_directory, to control consumer_group pathing in concert with storage_blob_prefix. * Add docstring improvements for these lease path parameters. * Add unit tests for new parameters * Add release notes
1 parent 5868e83 commit 99b35c0

File tree

4 files changed

+166
-17
lines changed

4 files changed

+166
-17
lines changed

sdk/eventhub/azure-eventhubs/HISTORY.rst

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,20 @@
33
Release History
44
===============
55

6+
1.3.4 (TBD)
7+
-----------
8+
9+
**Features**
10+
11+
- Add new parameter to AzureStorageCheckpointLeaseManager, use_consumer_group_as_directory, to control consumer_group pathing in concert with storage_blob_prefix.
12+
13+
**BugFixes**
14+
15+
- Ensures storage_blob_prefix within AzureStorageCheckpointLeaseManager is actually applied.
16+
17+
618
1.3.3 (2019-12-4)
7-
------------------
19+
-----------------
820

921
**Features**
1022

sdk/eventhub/azure-eventhubs/azure/eventprocessorhost/azure_storage_checkpoint_manager.py

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ class AzureStorageCheckpointLeaseManager(AbstractCheckpointManager, AbstractLeas
3737
will be used.
3838
:param str lease_container_name: The name of the container that will be used to store
3939
leases. If it does not already exist it will be created. Default value is 'eph-leases'.
40+
Leases are named via internal partition_ids, locations can be modified via
41+
storage_blob_prefix and use_consumer_group_as_directory.
42+
:param str storage_blob_prefix: If populated, prepends a prefix when constructing
43+
the location that leases are stored within the lease_container. Default None.
44+
If consumer_group_as_directory is also provided, it is unified as such <prefix><group>/<id>.
4045
:param int lease_renew_interval: The interval in seconds at which EPH will attempt to
4146
renew the lease of a particular partition. Default value is 10.
4247
:param int lease_duration: The duration in seconds of a lease on a partition.
@@ -49,11 +54,16 @@ class AzureStorageCheckpointLeaseManager(AbstractCheckpointManager, AbstractLeas
4954
:param str connection_string: If specified, this will override all other endpoint parameters.
5055
See http://azure.microsoft.com/en-us/documentation/articles/storage-configure-connection-string/
5156
for the connection string format.
57+
:param bool use_consumer_group_as_directory: If true, includes the consumer group as part of the
58+
location we use to store leases within the container, as such: <consumer_group>/<partition_id>,
59+
otherwise leases are simply named by their partition_id. Default False.
60+
If storage_blob_prefix is provided this prefix will be prepended in either case.
5261
"""
5362

5463
def __init__(self, storage_account_name=None, storage_account_key=None, lease_container_name="eph-leases",
5564
storage_blob_prefix=None, lease_renew_interval=10, lease_duration=30,
56-
sas_token=None, endpoint_suffix="core.windows.net", connection_string=None):
65+
sas_token=None, endpoint_suffix="core.windows.net", connection_string=None,
66+
use_consumer_group_as_directory=False):
5767
AbstractCheckpointManager.__init__(self)
5868
AbstractLeaseManager.__init__(self, lease_renew_interval, lease_duration)
5969
self.storage_account_name = storage_account_name
@@ -63,6 +73,7 @@ def __init__(self, storage_account_name=None, storage_account_key=None, lease_co
6373
self.connection_string = connection_string
6474
self.lease_container_name = lease_container_name
6575
self.storage_blob_prefix = storage_blob_prefix
76+
self.use_consumer_group_as_directory = use_consumer_group_as_directory
6677
self.storage_client = None
6778
self.consumer_group_directory = None
6879
self.host = None
@@ -97,7 +108,7 @@ def initialize(self, host):
97108
endpoint_suffix=self.endpoint_suffix,
98109
connection_string=self.connection_string,
99110
request_session=self.request_session)
100-
self.consumer_group_directory = self.storage_blob_prefix + self.host.eh_config.consumer_group
111+
self.consumer_group_directory = self.host.eh_config.consumer_group if self.use_consumer_group_as_directory else ""
101112

102113
# Checkpoint Managment Methods
103114

@@ -213,11 +224,13 @@ async def get_lease_async(self, partition_id):
213224
:rtype: ~azure.eventprocessorhost.lease.Lease
214225
"""
215226
try:
227+
blob_path = self._get_lease_blob_path(partition_id)
216228
blob = await self.host.loop.run_in_executor(
217229
self.executor,
218230
functools.partial(
219231
self.storage_client.get_blob_to_text,
220-
self.lease_container_name, partition_id))
232+
self.lease_container_name,
233+
blob_path))
221234
lease = AzureBlobLease()
222235
lease.with_blob(blob)
223236
async def state():
@@ -231,7 +244,7 @@ async def state():
231244
functools.partial(
232245
self.storage_client.get_blob_properties,
233246
self.lease_container_name,
234-
partition_id))
247+
blob_path))
235248
return res.properties.lease.state
236249
except Exception as err: # pylint: disable=broad-except
237250
_logger.error("Failed to get lease state %r %r", err, partition_id)
@@ -269,20 +282,21 @@ async def create_lease_if_not_exists_async(self, partition_id):
269282
"""
270283
return_lease = None
271284
try:
285+
blob_path = self._get_lease_blob_path(partition_id)
272286
return_lease = AzureBlobLease()
273287
return_lease.partition_id = partition_id
274288
serializable_lease = return_lease.serializable()
275289
json_lease = json.dumps(serializable_lease)
276290
_logger.info("Creating Lease %r %r %r",
277291
self.lease_container_name,
278-
partition_id,
292+
blob_path,
279293
json.dumps({k:v for k, v in serializable_lease.items() if k != 'event_processor_context'}))
280294
await self.host.loop.run_in_executor(
281295
self.executor,
282296
functools.partial(
283297
self.storage_client.create_blob_from_text,
284298
self.lease_container_name,
285-
partition_id,
299+
blob_path,
286300
json_lease))
287301
except Exception: # pylint: disable=broad-except
288302
try:
@@ -300,12 +314,13 @@ async def delete_lease_async(self, lease):
300314
:param lease: The stored lease to be deleted.
301315
:type lease: ~azure.eventprocessorhost.lease.Lease
302316
"""
317+
blob_path = self._get_lease_blob_path(lease.partition_id)
303318
await self.host.loop.run_in_executor(
304319
self.executor,
305320
functools.partial(
306321
self.storage_client.delete_blob,
307322
self.lease_container_name,
308-
lease.partition_id,
323+
blob_path,
309324
lease_id=lease.token))
310325

311326
async def acquire_lease_async(self, lease):
@@ -323,6 +338,7 @@ async def acquire_lease_async(self, lease):
323338
new_lease_id = str(uuid.uuid4())
324339
partition_id = lease.partition_id
325340
try:
341+
blob_path = self._get_lease_blob_path(partition_id)
326342
if asyncio.iscoroutinefunction(lease.state):
327343
state = await lease.state()
328344
else:
@@ -345,7 +361,7 @@ async def acquire_lease_async(self, lease):
345361
functools.partial(
346362
self.storage_client.change_blob_lease,
347363
self.lease_container_name,
348-
partition_id,
364+
blob_path,
349365
lease.token,
350366
new_lease_id))
351367
lease.token = new_lease_id
@@ -356,7 +372,7 @@ async def acquire_lease_async(self, lease):
356372
functools.partial(
357373
self.storage_client.acquire_blob_lease,
358374
self.lease_container_name,
359-
partition_id,
375+
blob_path,
360376
self.lease_duration,
361377
new_lease_id))
362378
lease.owner = self.host.host_name
@@ -381,12 +397,13 @@ async def renew_lease_async(self, lease):
381397
:rtype: bool
382398
"""
383399
try:
400+
blob_path = self._get_lease_blob_path(lease.partition_id)
384401
await self.host.loop.run_in_executor(
385402
self.executor,
386403
functools.partial(
387404
self.storage_client.renew_blob_lease,
388405
self.lease_container_name,
389-
lease.partition_id,
406+
blob_path,
390407
lease_id=lease.token,
391408
timeout=self.lease_duration))
392409
except Exception as err: # pylint: disable=broad-except
@@ -411,6 +428,7 @@ async def release_lease_async(self, lease):
411428
lease_id = None
412429
try:
413430
_logger.info("Releasing lease %r %r", self.host.guid, lease.partition_id)
431+
blob_path = self._get_lease_blob_path(lease.partition_id)
414432
lease_id = lease.token
415433
released_copy = AzureBlobLease()
416434
released_copy.with_lease(lease)
@@ -422,15 +440,15 @@ async def release_lease_async(self, lease):
422440
functools.partial(
423441
self.storage_client.create_blob_from_text,
424442
self.lease_container_name,
425-
lease.partition_id,
443+
blob_path,
426444
json.dumps(released_copy.serializable()),
427445
lease_id=lease_id))
428446
await self.host.loop.run_in_executor(
429447
self.executor,
430448
functools.partial(
431449
self.storage_client.release_blob_lease,
432450
self.lease_container_name,
433-
lease.partition_id,
451+
blob_path,
434452
lease_id))
435453
except Exception as err: # pylint: disable=broad-except
436454
_logger.error("Failed to release lease %r %r %r",
@@ -461,12 +479,13 @@ async def update_lease_async(self, lease):
461479
# First, renew the lease to make sure the update will go through.
462480
if await self.renew_lease_async(lease):
463481
try:
482+
blob_path = self._get_lease_blob_path(lease.partition_id)
464483
await self.host.loop.run_in_executor(
465484
self.executor,
466485
functools.partial(
467486
self.storage_client.create_blob_from_text,
468487
self.lease_container_name,
469-
lease.partition_id,
488+
blob_path,
470489
json.dumps(lease.serializable()),
471490
lease_id=lease.token))
472491

@@ -477,3 +496,18 @@ async def update_lease_async(self, lease):
477496
else:
478497
return False
479498
return True
499+
500+
def _get_lease_blob_path(self, partition_id):
501+
# Note: In a perfect world, without a prefix provided we'd prepend the
502+
# consumer group to the partition_id. However this would break
503+
# backcompat with a historical world in which we just had partition_id
504+
# within the container, and preclude any way for a user to generate
505+
# that behavior, so we will fix it in all cases in the Track2 library
506+
# and simply enable the proper full path here if the optional parameter
507+
# is present.
508+
path = partition_id
509+
if self.consumer_group_directory:
510+
path = str.format("{}/{}", self.consumer_group_directory, partition_id)
511+
if self.storage_blob_prefix:
512+
path = "{}{}".format(self.storage_blob_prefix, path)
513+
return path

sdk/eventhub/azure-eventhubs/conftest.py

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,16 +219,55 @@ def connstr_senders(connection_str):
219219
client.stop()
220220

221221

222-
@pytest.fixture()
223-
def storage_clm(eph):
222+
def _storage_clm(eph, use_storage_blob_prefix=False, use_consumer_group_as_directory=False):
224223
try:
225224
container = str(uuid.uuid4())
226225
storage_clm = AzureStorageCheckpointLeaseManager(
227226
os.environ['AZURE_STORAGE_ACCOUNT'],
228227
os.environ['AZURE_STORAGE_ACCESS_KEY'],
229-
container)
228+
container,
229+
storage_blob_prefix="testprefix" + str(uuid.uuid4()) if use_storage_blob_prefix else None,
230+
use_consumer_group_as_directory=use_consumer_group_as_directory)
230231
except KeyError:
231232
pytest.skip("Live Storage configuration not found.")
233+
return (storage_clm, container)
234+
235+
236+
@pytest.fixture()
237+
def storage_clm_with_prefix(eph):
238+
storage_clm, container = _storage_clm(eph, True)
239+
try:
240+
storage_clm.initialize(eph)
241+
storage_clm.storage_client.create_container(container)
242+
yield storage_clm
243+
finally:
244+
storage_clm.storage_client.delete_container(container)
245+
246+
@pytest.fixture()
247+
def storage_clm_with_prefix_and_consumer_dir(eph):
248+
storage_clm, container = _storage_clm(eph, True, True)
249+
try:
250+
storage_clm.initialize(eph)
251+
storage_clm.storage_client.create_container(container)
252+
yield storage_clm
253+
finally:
254+
storage_clm.storage_client.delete_container(container)
255+
256+
257+
@pytest.fixture()
258+
def storage_clm_with_consumer_dir(eph):
259+
storage_clm, container = _storage_clm(eph, False, True)
260+
try:
261+
storage_clm.initialize(eph)
262+
storage_clm.storage_client.create_container(container)
263+
yield storage_clm
264+
finally:
265+
storage_clm.storage_client.delete_container(container)
266+
267+
268+
@pytest.fixture()
269+
def storage_clm(eph):
270+
storage_clm, container = _storage_clm(eph)
232271
try:
233272
storage_clm.initialize(eph)
234273
storage_clm.storage_client.create_container(container)

sdk/eventhub/azure-eventhubs/tests/asynctests/test_checkpoint_manager.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,70 @@ def test_delete_lease(storage_clm):
7373
assert lease == None
7474

7575

76+
@pytest.mark.liveTest
77+
def test_lease_with_path_prefix(storage_clm_with_prefix):
78+
"""
79+
Test creating a lease with a blob prefix
80+
"""
81+
loop = asyncio.get_event_loop()
82+
local_checkpoint = loop.run_until_complete(storage_clm_with_prefix.create_checkpoint_if_not_exists_async("1"))
83+
assert local_checkpoint.partition_id == "1"
84+
assert local_checkpoint.offset == "-1"
85+
lease = loop.run_until_complete(storage_clm_with_prefix.get_lease_async("1"))
86+
87+
path_parts = storage_clm_with_prefix._get_lease_blob_path("0").split('/')
88+
assert "testprefix" in path_parts[0]
89+
assert "$default" not in path_parts[0]
90+
assert len(path_parts) == 1
91+
assert path_parts[-1][-1] == "0"
92+
93+
94+
@pytest.mark.liveTest
95+
def test_lease_with_path_prefix_and_consumer_dir(storage_clm_with_prefix_and_consumer_dir):
96+
"""
97+
Test creating a lease with a blob prefix
98+
"""
99+
loop = asyncio.get_event_loop()
100+
local_checkpoint = loop.run_until_complete(storage_clm_with_prefix_and_consumer_dir.create_checkpoint_if_not_exists_async("1"))
101+
assert local_checkpoint.partition_id == "1"
102+
assert local_checkpoint.offset == "-1"
103+
lease = loop.run_until_complete(storage_clm_with_prefix_and_consumer_dir.get_lease_async("1"))
104+
105+
path_parts = storage_clm_with_prefix_and_consumer_dir._get_lease_blob_path("0").split('/')
106+
assert "testprefix" in path_parts[0]
107+
assert "$default" in path_parts[0]
108+
assert len(path_parts) == 2
109+
assert path_parts[-1] == "0"
110+
111+
112+
@pytest.mark.liveTest
113+
def test_lease_with_consumer_dir(storage_clm_with_consumer_dir):
114+
"""
115+
Test creating a lease with a blob prefix
116+
"""
117+
loop = asyncio.get_event_loop()
118+
local_checkpoint = loop.run_until_complete(storage_clm_with_consumer_dir.create_checkpoint_if_not_exists_async("1"))
119+
assert local_checkpoint.partition_id == "1"
120+
assert local_checkpoint.offset == "-1"
121+
lease = loop.run_until_complete(storage_clm_with_consumer_dir.get_lease_async("1"))
122+
123+
path_parts = storage_clm_with_consumer_dir._get_lease_blob_path("0").split('/')
124+
assert "testprefix" not in path_parts[0]
125+
assert "$default" in path_parts[0]
126+
assert len(path_parts) == 2
127+
assert path_parts[-1] == "0"
128+
129+
130+
@pytest.mark.liveTest
131+
def test_lease_without_path_prefix(storage_clm):
132+
"""
133+
Test creating a lease with a blob prefix
134+
"""
135+
path_parts = storage_clm._get_lease_blob_path("0").split('/')
136+
assert len(path_parts) == 1
137+
assert path_parts[0] == "0"
138+
139+
76140
@pytest.mark.liveTest
77141
def test_checkpointing(storage_clm):
78142
"""

0 commit comments

Comments
 (0)