Skip to content

Commit a3dd531

Browse files
HonahXsebpretzer
andauthored
Glue endpoint config variable, continue apache#530 (apache#920)
Co-authored-by: Seb Pretzer <[email protected]>
1 parent 32e8f88 commit a3dd531

File tree

5 files changed

+35
-4
lines changed

5 files changed

+35
-4
lines changed

mkdocs/docs/configuration.md

+10
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,16 @@ catalog:
288288
region_name: <REGION_NAME>
289289
```
290290

291+
<!-- markdown-link-check-disable -->
292+
293+
| Key | Example | Description |
294+
| ----------------- | ------------------------------------ | ------------------------------------------------------------------------------- |
295+
| glue.id | 111111111111 | Configure the 12-digit ID of the Glue Catalog |
296+
| glue.skip-archive | true | Configure whether to skip the archival of older table versions. Default to true |
297+
| glue.endpoint | https://glue.us-east-1.amazonaws.com | Configure an alternative endpoint of the Glue service for GlueCatalog to access |
298+
299+
<!-- markdown-link-check-enable-->
300+
291301
## DynamoDB Catalog
292302

293303
If you want to use AWS DynamoDB as the catalog, you can use the last two ways to configure the pyiceberg and refer

pyiceberg/catalog/glue.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@
109109
GLUE_SKIP_ARCHIVE = "glue.skip-archive"
110110
GLUE_SKIP_ARCHIVE_DEFAULT = True
111111

112+
# Configure an alternative endpoint of the Glue service for GlueCatalog to access.
113+
# This could be used to use GlueCatalog with any glue-compatible metastore service that has a different endpoint
114+
GLUE_CATALOG_ENDPOINT = "glue.endpoint"
115+
112116
ICEBERG_FIELD_ID = "iceberg.field.id"
113117
ICEBERG_FIELD_OPTIONAL = "iceberg.field.optional"
114118
ICEBERG_FIELD_CURRENT = "iceberg.field.current"
@@ -289,7 +293,7 @@ def __init__(self, name: str, **properties: Any):
289293
aws_secret_access_key=properties.get("aws_secret_access_key"),
290294
aws_session_token=properties.get("aws_session_token"),
291295
)
292-
self.glue: GlueClient = session.client("glue")
296+
self.glue: GlueClient = session.client("glue", endpoint_url=properties.get(GLUE_CATALOG_ENDPOINT))
293297

294298
if glue_catalog_id := properties.get(GLUE_ID):
295299
_register_glue_catalog_id_with_glue_client(self.glue, glue_catalog_id)

tests/catalog/integration_test_glue.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from botocore.exceptions import ClientError
2626

2727
from pyiceberg.catalog import Catalog, MetastoreCatalog
28-
from pyiceberg.catalog.glue import GlueCatalog
28+
from pyiceberg.catalog.glue import GLUE_CATALOG_ENDPOINT, GlueCatalog
2929
from pyiceberg.exceptions import (
3030
NamespaceAlreadyExistsError,
3131
NamespaceNotEmptyError,
@@ -36,7 +36,7 @@
3636
from pyiceberg.io.pyarrow import _dataframe_to_data_files, schema_to_pyarrow
3737
from pyiceberg.schema import Schema
3838
from pyiceberg.types import IntegerType
39-
from tests.conftest import clean_up, get_bucket_name, get_s3_path
39+
from tests.conftest import clean_up, get_bucket_name, get_glue_endpoint, get_s3_path
4040

4141
# The number of tables/databases used in list_table/namespace test
4242
LIST_TEST_NUMBER = 2
@@ -51,7 +51,9 @@ def fixture_glue_client() -> boto3.client:
5151
@pytest.fixture(name="test_catalog", scope="module")
5252
def fixture_test_catalog() -> Generator[Catalog, None, None]:
5353
"""Configure the pre- and post-setting of aws integration test."""
54-
test_catalog = GlueCatalog(CATALOG_NAME, warehouse=get_s3_path(get_bucket_name()))
54+
test_catalog = GlueCatalog(
55+
CATALOG_NAME, **{"warehouse": get_s3_path(get_bucket_name()), GLUE_CATALOG_ENDPOINT: get_glue_endpoint()}
56+
)
5557
yield test_catalog
5658
clean_up(test_catalog)
5759

tests/catalog/test_glue.py

+10
Original file line numberDiff line numberDiff line change
@@ -862,3 +862,13 @@ def test_register_table_with_given_location(
862862
table = test_catalog.register_table(identifier, location)
863863
assert table.identifier == (catalog_name,) + identifier
864864
assert test_catalog.table_exists(identifier) is True
865+
866+
867+
@mock_aws
868+
def test_glue_endpoint_override(_bucket_initialize: None, moto_endpoint_url: str, database_name: str) -> None:
869+
catalog_name = "glue"
870+
test_endpoint = "https://test-endpoint"
871+
test_catalog = GlueCatalog(
872+
catalog_name, **{"s3.endpoint": moto_endpoint_url, "warehouse": f"s3://{BUCKET_NAME}", "glue.endpoint": test_endpoint}
873+
)
874+
assert test_catalog.glue.meta.endpoint_url == test_endpoint

tests/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -2043,6 +2043,11 @@ def get_bucket_name() -> str:
20432043
return bucket_name
20442044

20452045

2046+
def get_glue_endpoint() -> Optional[str]:
2047+
"""Set the optional environment variable AWS_TEST_GLUE_ENDPOINT for a glue endpoint to test."""
2048+
return os.getenv("AWS_TEST_GLUE_ENDPOINT")
2049+
2050+
20462051
def get_s3_path(bucket_name: str, database_name: Optional[str] = None, table_name: Optional[str] = None) -> str:
20472052
result_path = f"s3://{bucket_name}"
20482053
if database_name is not None:

0 commit comments

Comments
 (0)