Skip to content

Commit 39fec53

Browse files
authored
fix: correct LZ0 to LZO in compression options (#995)
* fix: correct LZ0 to LZO in compression options * fix: disable LZO compression option and update tests to reflect its unavailability * fix: ruff format expected string in test_execution_plan * fix: update test for execution plan and add validation for invalid LZO compression * fix: remove LZO compression option and related test cases * ruff autoformat * fix: remove TODO comment regarding LZO compression implementation
1 parent 2d8b1d3 commit 39fec53

File tree

4 files changed

+16
-12
lines changed

4 files changed

+16
-12
lines changed

python/datafusion/dataframe.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,9 @@ class Compression(Enum):
5757
GZIP = "gzip"
5858
BROTLI = "brotli"
5959
LZ4 = "lz4"
60-
LZ0 = "lz0"
60+
# lzo is not implemented yet
61+
# https://github.com/apache/arrow-rs/issues/6970
62+
# LZO = "lzo"
6163
ZSTD = "zstd"
6264
LZ4_RAW = "lz4_raw"
6365

@@ -696,10 +698,10 @@ def write_parquet(
696698
- "snappy": Snappy compression.
697699
- "gzip": Gzip compression.
698700
- "brotli": Brotli compression.
699-
- "lz0": LZ0 compression.
700701
- "lz4": LZ4 compression.
701702
- "lz4_raw": LZ4_RAW compression.
702703
- "zstd": Zstandard compression.
704+
Note: LZO is not yet implemented in arrow-rs and is therefore excluded.
703705
compression_level: Compression level to use. For ZSTD, the
704706
recommended range is 1 to 22, with the default being 4. Higher levels
705707
provide better compression but slower speed.

python/tests/test_dataframe.py

+2
Original file line numberDiff line numberDiff line change
@@ -1115,6 +1115,8 @@ def test_write_compressed_parquet_invalid_compression(df, tmp_path, compression)
11151115
df.write_parquet(str(path), compression=compression)
11161116

11171117

1118+
# not testing lzo because it it not implemented yet
1119+
# https://github.com/apache/arrow-rs/issues/6970
11181120
@pytest.mark.parametrize("compression", ["zstd", "brotli", "gzip"])
11191121
def test_write_compressed_parquet_default_compression_level(df, tmp_path, compression):
11201122
# Test write_parquet with zstd, brotli, gzip default compression level,

python/tests/test_functions.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -790,9 +790,9 @@ def test_hash_functions(df):
790790
)
791791
assert result.column(2) == pa.array(
792792
[
793-
b("185F8DB32271FE25F561A6FC938B2E26" "4306EC304EDA518007D1764826381969"),
794-
b("78AE647DC5544D227130A0682A51E30B" "C7777FBB6D8A8F17007463A3ECD1D524"),
795-
b("BB7208BC9B5D7C04F1236A82A0093A5E" "33F40423D5BA8D4266F7092C3BA43B62"),
793+
b("185F8DB32271FE25F561A6FC938B2E264306EC304EDA518007D1764826381969"),
794+
b("78AE647DC5544D227130A0682A51E30BC7777FBB6D8A8F17007463A3ECD1D524"),
795+
b("BB7208BC9B5D7C04F1236A82A0093A5E33F40423D5BA8D4266F7092C3BA43B62"),
796796
]
797797
)
798798
assert result.column(3) == pa.array(
@@ -838,16 +838,16 @@ def test_hash_functions(df):
838838
)
839839
assert result.column(5) == pa.array(
840840
[
841-
b("F73A5FBF881F89B814871F46E26AD3FA" "37CB2921C5E8561618639015B3CCBB71"),
842-
b("B792A0383FB9E7A189EC150686579532" "854E44B71AC394831DAED169BA85CCC5"),
843-
b("27988A0E51812297C77A433F63523334" "6AEE29A829DCF4F46E0F58F402C6CFCB"),
841+
b("F73A5FBF881F89B814871F46E26AD3FA37CB2921C5E8561618639015B3CCBB71"),
842+
b("B792A0383FB9E7A189EC150686579532854E44B71AC394831DAED169BA85CCC5"),
843+
b("27988A0E51812297C77A433F635233346AEE29A829DCF4F46E0F58F402C6CFCB"),
844844
]
845845
)
846846
assert result.column(6) == pa.array(
847847
[
848-
b("FBC2B0516EE8744D293B980779178A35" "08850FDCFE965985782C39601B65794F"),
849-
b("BF73D18575A736E4037D45F9E316085B" "86C19BE6363DE6AA789E13DEAACC1C4E"),
850-
b("C8D11B9F7237E4034ADBCD2005735F9B" "C4C597C75AD89F4492BEC8F77D15F7EB"),
848+
b("FBC2B0516EE8744D293B980779178A3508850FDCFE965985782C39601B65794F"),
849+
b("BF73D18575A736E4037D45F9E316085B86C19BE6363DE6AA789E13DEAACC1C4E"),
850+
b("C8D11B9F7237E4034ADBCD2005735F9BC4C597C75AD89F4492BEC8F77D15F7EB"),
851851
]
852852
)
853853
assert result.column(7) == result.column(1) # SHA-224

src/dataframe.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ impl PyDataFrame {
491491
ZstdLevel::try_new(verify_compression_level(compression_level)? as i32)
492492
.map_err(|e| PyValueError::new_err(format!("{e}")))?,
493493
),
494-
"lz0" => Compression::LZO,
494+
"lzo" => Compression::LZO,
495495
"lz4" => Compression::LZ4,
496496
"lz4_raw" => Compression::LZ4_RAW,
497497
"uncompressed" => Compression::UNCOMPRESSED,

0 commit comments

Comments
 (0)