14
14
# KIND, either express or implied. See the License for the
15
15
# specific language governing permissions and limitations
16
16
# under the License.
17
+ from typing import TYPE_CHECKING
18
+
17
19
import pytest
18
20
19
- from pyiceberg .catalog import Catalog
21
+ from pyiceberg .exceptions import NoSuchTableError
20
22
from pyiceberg .table .statistics import BlobMetadata , StatisticsFile
21
23
24
+ if TYPE_CHECKING :
25
+ import pyarrow as pa
26
+
27
+ from pyiceberg .catalog import Catalog
28
+ from pyiceberg .schema import Schema
29
+ from pyiceberg .table import Table
30
+
31
+
32
+ def _create_table_with_schema (catalog : "Catalog" , schema : "Schema" ) -> "Table" :
33
+ tbl_name = "default.test_table_statistics_operations"
34
+
35
+ try :
36
+ catalog .drop_table (tbl_name )
37
+ except NoSuchTableError :
38
+ pass
39
+ return catalog .create_table (identifier = tbl_name , schema = schema )
40
+
22
41
23
42
@pytest .mark .integration
24
43
@pytest .mark .parametrize ("catalog" , [pytest .lazy_fixture ("session_catalog_hive" ), pytest .lazy_fixture ("session_catalog" )])
25
- def test_manage_statistics (catalog : Catalog ) -> None :
26
- identifier = "default.test_table_statistics_operations"
27
- tbl = catalog .load_table (identifier )
44
+ def test_manage_statistics (catalog : "Catalog" , arrow_table_with_null : "pa.Table" ) -> None :
45
+ tbl = _create_table_with_schema (catalog , arrow_table_with_null .schema )
46
+
47
+ tbl .append (arrow_table_with_null )
48
+ tbl .append (arrow_table_with_null )
28
49
29
50
add_snapshot_id_1 = tbl .history ()[0 ].snapshot_id
30
51
add_snapshot_id_2 = tbl .history ()[1 ].snapshot_id
31
52
32
- def create_statistics_file (snapshot_id : int ) -> StatisticsFile :
53
+ def create_statistics_file (snapshot_id : int , type_name : str ) -> StatisticsFile :
33
54
blob_metadata = BlobMetadata (
34
- type = "boring-type" ,
55
+ type = type_name ,
35
56
snapshot_id = snapshot_id ,
36
57
sequence_number = 2 ,
37
58
fields = [1 ],
@@ -48,8 +69,8 @@ def create_statistics_file(snapshot_id: int) -> StatisticsFile:
48
69
49
70
return statistics_file
50
71
51
- statistics_file_snap_1 = create_statistics_file (add_snapshot_id_1 )
52
- statistics_file_snap_2 = create_statistics_file (add_snapshot_id_2 )
72
+ statistics_file_snap_1 = create_statistics_file (add_snapshot_id_1 , "apache-datasketches-theta-v1" )
73
+ statistics_file_snap_2 = create_statistics_file (add_snapshot_id_2 , "deletion-vector-v1" )
53
74
54
75
with tbl .update_statistics () as update :
55
76
update .set_statistics (add_snapshot_id_1 , statistics_file_snap_1 )
0 commit comments