Skip to content

Commit 45c60cb

Browse files
committed
Rewrite tests
1 parent e328524 commit 45c60cb

File tree

4 files changed

+33
-36
lines changed

4 files changed

+33
-36
lines changed

dev/provision.py

-24
Original file line numberDiff line numberDiff line change
@@ -401,27 +401,3 @@
401401
)
402402
spark.sql(f"ALTER TABLE {catalog_name}.default.test_empty_scan_ordered_str WRITE ORDERED BY id")
403403
spark.sql(f"INSERT INTO {catalog_name}.default.test_empty_scan_ordered_str VALUES 'a', 'c'")
404-
405-
spark.sql(
406-
f"""
407-
CREATE OR REPLACE TABLE {catalog_name}.default.test_table_statistics_operations (
408-
number integer
409-
)
410-
USING iceberg
411-
TBLPROPERTIES (
412-
'format-version'='2'
413-
);
414-
"""
415-
)
416-
spark.sql(
417-
f"""
418-
INSERT INTO {catalog_name}.default.test_table_statistics_operations
419-
VALUES (1)
420-
"""
421-
)
422-
spark.sql(
423-
f"""
424-
INSERT INTO {catalog_name}.default.test_table_statistics_operations
425-
VALUES (2)
426-
"""
427-
)

tests/conftest.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1010,7 +1010,7 @@ def generate_snapshot(
10101010
"file-footer-size-in-bytes": 42,
10111011
"blob-metadata": [
10121012
{
1013-
"type": "ndv",
1013+
"type": "apache-datasketches-theta-v1",
10141014
"snapshot-id": 3051729675574597004,
10151015
"sequence-number": 1,
10161016
"fields": [1],
@@ -1024,7 +1024,7 @@ def generate_snapshot(
10241024
"file-footer-size-in-bytes": 42,
10251025
"blob-metadata": [
10261026
{
1027-
"type": "ndv",
1027+
"type": "deletion-vector-v1",
10281028
"snapshot-id": 3055729675574597004,
10291029
"sequence-number": 1,
10301030
"fields": [1],

tests/integration/test_statistics_operations.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -14,24 +14,45 @@
1414
# KIND, either express or implied. See the License for the
1515
# specific language governing permissions and limitations
1616
# under the License.
17+
from typing import TYPE_CHECKING
18+
1719
import pytest
1820

19-
from pyiceberg.catalog import Catalog
21+
from pyiceberg.exceptions import NoSuchTableError
2022
from pyiceberg.table.statistics import BlobMetadata, StatisticsFile
2123

24+
if TYPE_CHECKING:
25+
import pyarrow as pa
26+
27+
from pyiceberg.catalog import Catalog
28+
from pyiceberg.schema import Schema
29+
from pyiceberg.table import Table
30+
31+
32+
def _create_table_with_schema(catalog: "Catalog", schema: "Schema") -> "Table":
33+
tbl_name = "default.test_table_statistics_operations"
34+
35+
try:
36+
catalog.drop_table(tbl_name)
37+
except NoSuchTableError:
38+
pass
39+
return catalog.create_table(identifier=tbl_name, schema=schema)
40+
2241

2342
@pytest.mark.integration
2443
@pytest.mark.parametrize("catalog", [pytest.lazy_fixture("session_catalog_hive"), pytest.lazy_fixture("session_catalog")])
25-
def test_manage_statistics(catalog: Catalog) -> None:
26-
identifier = "default.test_table_statistics_operations"
27-
tbl = catalog.load_table(identifier)
44+
def test_manage_statistics(catalog: "Catalog", arrow_table_with_null: "pa.Table") -> None:
45+
tbl = _create_table_with_schema(catalog, arrow_table_with_null.schema)
46+
47+
tbl.append(arrow_table_with_null)
48+
tbl.append(arrow_table_with_null)
2849

2950
add_snapshot_id_1 = tbl.history()[0].snapshot_id
3051
add_snapshot_id_2 = tbl.history()[1].snapshot_id
3152

32-
def create_statistics_file(snapshot_id: int) -> StatisticsFile:
53+
def create_statistics_file(snapshot_id: int, type_name: str) -> StatisticsFile:
3354
blob_metadata = BlobMetadata(
34-
type="boring-type",
55+
type=type_name,
3556
snapshot_id=snapshot_id,
3657
sequence_number=2,
3758
fields=[1],
@@ -48,8 +69,8 @@ def create_statistics_file(snapshot_id: int) -> StatisticsFile:
4869

4970
return statistics_file
5071

51-
statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1)
52-
statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2)
72+
statistics_file_snap_1 = create_statistics_file(add_snapshot_id_1, "apache-datasketches-theta-v1")
73+
statistics_file_snap_2 = create_statistics_file(add_snapshot_id_2, "deletion-vector-v1")
5374

5475
with tbl.update_statistics() as update:
5576
update.set_statistics(add_snapshot_id_1, statistics_file_snap_1)

tests/table/test_init.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1257,7 +1257,7 @@ def test_set_statistics_update(table_v2_with_statistics: Table) -> None:
12571257
snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id
12581258

12591259
blob_metadata = BlobMetadata(
1260-
type="boring-type",
1260+
type="apache-datasketches-theta-v1",
12611261
snapshot_id=snapshot_id,
12621262
sequence_number=2,
12631263
fields=[1],
@@ -1290,7 +1290,7 @@ def test_set_statistics_update(table_v2_with_statistics: Table) -> None:
12901290
"file-footer-size-in-bytes": 27,
12911291
"blob-metadata": [
12921292
{
1293-
"type": "boring-type",
1293+
"type": "apache-datasketches-theta-v1",
12941294
"snapshot-id": 3055729675574597004,
12951295
"sequence-number": 2,
12961296
"fields": [

0 commit comments

Comments
 (0)