@@ -2382,10 +2382,122 @@ def arrow_table_date_timestamps() -> "pa.Table":
2382
2382
2383
2383
2384
2384
@pytest .fixture (scope = "session" )
2385
- def arrow_table_date_timestamps_schema () -> Schema :
2386
- """Pyarrow table Schema with only date, timestamp and timestamptz values."""
2385
+ def table_date_timestamps_schema () -> Schema :
2386
+ """Iceberg table Schema with only date, timestamp and timestamptz values."""
2387
2387
return Schema (
2388
2388
NestedField (field_id = 1 , name = "date" , field_type = DateType (), required = False ),
2389
2389
NestedField (field_id = 2 , name = "timestamp" , field_type = TimestampType (), required = False ),
2390
2390
NestedField (field_id = 3 , name = "timestamptz" , field_type = TimestamptzType (), required = False ),
2391
2391
)
2392
+
2393
+
2394
+ @pytest .fixture (scope = "session" )
2395
+ def arrow_table_schema_with_all_timestamp_precisions () -> "pa.Schema" :
2396
+ """Pyarrow Schema with all supported timestamp types."""
2397
+ import pyarrow as pa
2398
+
2399
+ return pa .schema ([
2400
+ ("timestamp_s" , pa .timestamp (unit = "s" )),
2401
+ ("timestamptz_s" , pa .timestamp (unit = "s" , tz = "UTC" )),
2402
+ ("timestamp_ms" , pa .timestamp (unit = "ms" )),
2403
+ ("timestamptz_ms" , pa .timestamp (unit = "ms" , tz = "UTC" )),
2404
+ ("timestamp_us" , pa .timestamp (unit = "us" )),
2405
+ ("timestamptz_us" , pa .timestamp (unit = "us" , tz = "UTC" )),
2406
+ ("timestamp_ns" , pa .timestamp (unit = "ns" )),
2407
+ ("timestamptz_ns" , pa .timestamp (unit = "ns" , tz = "UTC" )),
2408
+ ("timestamptz_us_etc_utc" , pa .timestamp (unit = "us" , tz = "Etc/UTC" )),
2409
+ ("timestamptz_ns_z" , pa .timestamp (unit = "ns" , tz = "Z" )),
2410
+ ("timestamptz_s_0000" , pa .timestamp (unit = "s" , tz = "+00:00" )),
2411
+ ])
2412
+
2413
+
2414
+ @pytest .fixture (scope = "session" )
2415
+ def arrow_table_with_all_timestamp_precisions (arrow_table_schema_with_all_timestamp_precisions : "pa.Schema" ) -> "pa.Table" :
2416
+ """Pyarrow table with all supported timestamp types."""
2417
+ import pandas as pd
2418
+ import pyarrow as pa
2419
+
2420
+ test_data = pd .DataFrame ({
2421
+ "timestamp_s" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2422
+ "timestamptz_s" : [
2423
+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2424
+ None ,
2425
+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2426
+ ],
2427
+ "timestamp_ms" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2428
+ "timestamptz_ms" : [
2429
+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2430
+ None ,
2431
+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2432
+ ],
2433
+ "timestamp_us" : [datetime (2023 , 1 , 1 , 19 , 25 , 00 ), None , datetime (2023 , 3 , 1 , 19 , 25 , 00 )],
2434
+ "timestamptz_us" : [
2435
+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2436
+ None ,
2437
+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2438
+ ],
2439
+ "timestamp_ns" : [
2440
+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 6 ),
2441
+ None ,
2442
+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 7 ),
2443
+ ],
2444
+ "timestamptz_ns" : [
2445
+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2446
+ None ,
2447
+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2448
+ ],
2449
+ "timestamptz_us_etc_utc" : [
2450
+ datetime (2023 , 1 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2451
+ None ,
2452
+ datetime (2023 , 3 , 1 , 19 , 25 , 00 , tzinfo = timezone .utc ),
2453
+ ],
2454
+ "timestamptz_ns_z" : [
2455
+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 6 , tz = "UTC" ),
2456
+ None ,
2457
+ pd .Timestamp (year = 2024 , month = 7 , day = 11 , hour = 3 , minute = 30 , second = 0 , microsecond = 12 , nanosecond = 7 , tz = "UTC" ),
2458
+ ],
2459
+ "timestamptz_s_0000" : [
2460
+ datetime (2023 , 1 , 1 , 19 , 25 , 1 , tzinfo = timezone .utc ),
2461
+ None ,
2462
+ datetime (2023 , 3 , 1 , 19 , 25 , 1 , tzinfo = timezone .utc ),
2463
+ ],
2464
+ })
2465
+ return pa .Table .from_pandas (test_data , schema = arrow_table_schema_with_all_timestamp_precisions )
2466
+
2467
+
2468
+ @pytest .fixture (scope = "session" )
2469
+ def arrow_table_schema_with_all_microseconds_timestamp_precisions () -> "pa.Schema" :
2470
+ """Pyarrow Schema with all microseconds timestamp."""
2471
+ import pyarrow as pa
2472
+
2473
+ return pa .schema ([
2474
+ ("timestamp_s" , pa .timestamp (unit = "us" )),
2475
+ ("timestamptz_s" , pa .timestamp (unit = "us" , tz = "UTC" )),
2476
+ ("timestamp_ms" , pa .timestamp (unit = "us" )),
2477
+ ("timestamptz_ms" , pa .timestamp (unit = "us" , tz = "UTC" )),
2478
+ ("timestamp_us" , pa .timestamp (unit = "us" )),
2479
+ ("timestamptz_us" , pa .timestamp (unit = "us" , tz = "UTC" )),
2480
+ ("timestamp_ns" , pa .timestamp (unit = "us" )),
2481
+ ("timestamptz_ns" , pa .timestamp (unit = "us" , tz = "UTC" )),
2482
+ ("timestamptz_us_etc_utc" , pa .timestamp (unit = "us" , tz = "UTC" )),
2483
+ ("timestamptz_ns_z" , pa .timestamp (unit = "us" , tz = "UTC" )),
2484
+ ("timestamptz_s_0000" , pa .timestamp (unit = "us" , tz = "UTC" )),
2485
+ ])
2486
+
2487
+
2488
+ @pytest .fixture (scope = "session" )
2489
+ def table_schema_with_all_microseconds_timestamp_precision () -> Schema :
2490
+ """Iceberg table Schema with only date, timestamp and timestamptz values."""
2491
+ return Schema (
2492
+ NestedField (field_id = 1 , name = "timestamp_s" , field_type = TimestampType (), required = False ),
2493
+ NestedField (field_id = 2 , name = "timestamptz_s" , field_type = TimestamptzType (), required = False ),
2494
+ NestedField (field_id = 3 , name = "timestamp_ms" , field_type = TimestampType (), required = False ),
2495
+ NestedField (field_id = 4 , name = "timestamptz_ms" , field_type = TimestamptzType (), required = False ),
2496
+ NestedField (field_id = 5 , name = "timestamp_us" , field_type = TimestampType (), required = False ),
2497
+ NestedField (field_id = 6 , name = "timestamptz_us" , field_type = TimestamptzType (), required = False ),
2498
+ NestedField (field_id = 7 , name = "timestamp_ns" , field_type = TimestampType (), required = False ),
2499
+ NestedField (field_id = 8 , name = "timestamptz_ns" , field_type = TimestamptzType (), required = False ),
2500
+ NestedField (field_id = 9 , name = "timestamptz_us_etc_utc" , field_type = TimestamptzType (), required = False ),
2501
+ NestedField (field_id = 10 , name = "timestamptz_ns_z" , field_type = TimestamptzType (), required = False ),
2502
+ NestedField (field_id = 11 , name = "timestamptz_s_0000" , field_type = TimestamptzType (), required = False ),
2503
+ )
0 commit comments