|
21 | 21 | from random import randint, random, randbytes
|
22 | 22 |
|
23 | 23 | from test.support import script_helper
|
24 |
| -from test.support import (findfile, requires_zlib, requires_bz2, |
25 |
| - requires_lzma, captured_stdout, requires_subprocess) |
| 24 | +from test.support import ( |
| 25 | + findfile, requires_zlib, requires_bz2, requires_lzma, |
| 26 | + captured_stdout, captured_stderr, requires_subprocess |
| 27 | +) |
26 | 28 | from test.support.os_helper import (
|
27 | 29 | TESTFN, unlink, rmtree, temp_dir, temp_cwd, fd_count
|
28 | 30 | )
|
@@ -3210,5 +3212,139 @@ def test_inheritance(self, alpharep):
|
3210 | 3212 | assert isinstance(file, cls)
|
3211 | 3213 |
|
3212 | 3214 |
|
| 3215 | +class EncodedMetadataTests(unittest.TestCase): |
| 3216 | + file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three' |
| 3217 | + file_content = [ |
| 3218 | + "This is pure ASCII.\n".encode('ascii'), |
| 3219 | + # This is modern Japanese. (UTF-8) |
| 3220 | + "\u3053\u308c\u306f\u73fe\u4ee3\u7684\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('utf-8'), |
| 3221 | + # This is obsolete Japanese. (Shift JIS) |
| 3222 | + "\u3053\u308c\u306f\u53e4\u3044\u65e5\u672c\u8a9e\u3067\u3059\u3002\n".encode('shift_jis'), |
| 3223 | + ] |
| 3224 | + |
| 3225 | + def setUp(self): |
| 3226 | + self.addCleanup(unlink, TESTFN) |
| 3227 | + # Create .zip of 3 members with Han names encoded in Shift JIS. |
| 3228 | + # Each name is 1 Han character encoding to 2 bytes in Shift JIS. |
| 3229 | + # The ASCII names are arbitrary as long as they are length 2 and |
| 3230 | + # not otherwise contained in the zip file. |
| 3231 | + # Data elements are encoded bytes (ascii, utf-8, shift_jis). |
| 3232 | + placeholders = ["n1", "n2"] + self.file_names[2:] |
| 3233 | + with zipfile.ZipFile(TESTFN, mode="w") as tf: |
| 3234 | + for temp, content in zip(placeholders, self.file_content): |
| 3235 | + tf.writestr(temp, content, zipfile.ZIP_STORED) |
| 3236 | + # Hack in the Shift JIS names with flag bit 11 (UTF-8) unset. |
| 3237 | + with open(TESTFN, "rb") as tf: |
| 3238 | + data = tf.read() |
| 3239 | + for name, temp in zip(self.file_names, placeholders[:2]): |
| 3240 | + data = data.replace(temp.encode('ascii'), |
| 3241 | + name.encode('shift_jis')) |
| 3242 | + with open(TESTFN, "wb") as tf: |
| 3243 | + tf.write(data) |
| 3244 | + |
| 3245 | + def _test_read(self, zipfp, expected_names, expected_content): |
| 3246 | + # Check the namelist |
| 3247 | + names = zipfp.namelist() |
| 3248 | + self.assertEqual(sorted(names), sorted(expected_names)) |
| 3249 | + |
| 3250 | + # Check infolist |
| 3251 | + infos = zipfp.infolist() |
| 3252 | + names = [zi.filename for zi in infos] |
| 3253 | + self.assertEqual(sorted(names), sorted(expected_names)) |
| 3254 | + |
| 3255 | + # check getinfo |
| 3256 | + for name, content in zip(expected_names, expected_content): |
| 3257 | + info = zipfp.getinfo(name) |
| 3258 | + self.assertEqual(info.filename, name) |
| 3259 | + self.assertEqual(info.file_size, len(content)) |
| 3260 | + self.assertEqual(zipfp.read(name), content) |
| 3261 | + |
| 3262 | + def test_read_with_metadata_encoding(self): |
| 3263 | + # Read the ZIP archive with correct metadata_encoding |
| 3264 | + with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: |
| 3265 | + self._test_read(zipfp, self.file_names, self.file_content) |
| 3266 | + |
| 3267 | + def test_read_without_metadata_encoding(self): |
| 3268 | + # Read the ZIP archive without metadata_encoding |
| 3269 | + expected_names = [name.encode('shift_jis').decode('cp437') |
| 3270 | + for name in self.file_names[:2]] + self.file_names[2:] |
| 3271 | + with zipfile.ZipFile(TESTFN, "r") as zipfp: |
| 3272 | + self._test_read(zipfp, expected_names, self.file_content) |
| 3273 | + |
| 3274 | + def test_read_with_incorrect_metadata_encoding(self): |
| 3275 | + # Read the ZIP archive with incorrect metadata_encoding |
| 3276 | + expected_names = [name.encode('shift_jis').decode('koi8-u') |
| 3277 | + for name in self.file_names[:2]] + self.file_names[2:] |
| 3278 | + with zipfile.ZipFile(TESTFN, "r", metadata_encoding='koi8-u') as zipfp: |
| 3279 | + self._test_read(zipfp, expected_names, self.file_content) |
| 3280 | + |
| 3281 | + def test_read_with_unsuitable_metadata_encoding(self): |
| 3282 | + # Read the ZIP archive with metadata_encoding unsuitable for |
| 3283 | + # decoding metadata |
| 3284 | + with self.assertRaises(UnicodeDecodeError): |
| 3285 | + zipfile.ZipFile(TESTFN, "r", metadata_encoding='ascii') |
| 3286 | + with self.assertRaises(UnicodeDecodeError): |
| 3287 | + zipfile.ZipFile(TESTFN, "r", metadata_encoding='utf-8') |
| 3288 | + |
| 3289 | + def test_read_after_append(self): |
| 3290 | + newname = '\u56db' # Han 'four' |
| 3291 | + expected_names = [name.encode('shift_jis').decode('cp437') |
| 3292 | + for name in self.file_names[:2]] + self.file_names[2:] |
| 3293 | + expected_names.append(newname) |
| 3294 | + expected_content = (*self.file_content, b"newcontent") |
| 3295 | + |
| 3296 | + with zipfile.ZipFile(TESTFN, "a") as zipfp: |
| 3297 | + zipfp.writestr(newname, "newcontent") |
| 3298 | + self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) |
| 3299 | + |
| 3300 | + with zipfile.ZipFile(TESTFN, "r") as zipfp: |
| 3301 | + self._test_read(zipfp, expected_names, expected_content) |
| 3302 | + |
| 3303 | + with zipfile.ZipFile(TESTFN, "r", metadata_encoding='shift_jis') as zipfp: |
| 3304 | + self.assertEqual(sorted(zipfp.namelist()), sorted(expected_names)) |
| 3305 | + for i, (name, content) in enumerate(zip(expected_names, expected_content)): |
| 3306 | + info = zipfp.getinfo(name) |
| 3307 | + self.assertEqual(info.filename, name) |
| 3308 | + self.assertEqual(info.file_size, len(content)) |
| 3309 | + if i < 2: |
| 3310 | + with self.assertRaises(zipfile.BadZipFile): |
| 3311 | + zipfp.read(name) |
| 3312 | + else: |
| 3313 | + self.assertEqual(zipfp.read(name), content) |
| 3314 | + |
| 3315 | + def test_write_with_metadata_encoding(self): |
| 3316 | + ZF = zipfile.ZipFile |
| 3317 | + for mode in ("w", "x", "a"): |
| 3318 | + with self.assertRaisesRegex(ValueError, |
| 3319 | + "^metadata_encoding is only"): |
| 3320 | + ZF("nonesuch.zip", mode, metadata_encoding="shift_jis") |
| 3321 | + |
| 3322 | + def test_cli_with_metadata_encoding(self): |
| 3323 | + errmsg = "Non-conforming encodings not supported with -c." |
| 3324 | + args = ["--metadata-encoding=shift_jis", "-c", "nonesuch", "nonesuch"] |
| 3325 | + with captured_stdout() as stdout: |
| 3326 | + with captured_stderr() as stderr: |
| 3327 | + self.assertRaises(SystemExit, zipfile.main, args) |
| 3328 | + self.assertEqual(stdout.getvalue(), "") |
| 3329 | + self.assertIn(errmsg, stderr.getvalue()) |
| 3330 | + |
| 3331 | + with captured_stdout() as stdout: |
| 3332 | + zipfile.main(["--metadata-encoding=shift_jis", "-t", TESTFN]) |
| 3333 | + listing = stdout.getvalue() |
| 3334 | + |
| 3335 | + with captured_stdout() as stdout: |
| 3336 | + zipfile.main(["--metadata-encoding=shift_jis", "-l", TESTFN]) |
| 3337 | + listing = stdout.getvalue() |
| 3338 | + for name in self.file_names: |
| 3339 | + self.assertIn(name, listing) |
| 3340 | + |
| 3341 | + os.mkdir(TESTFN2) |
| 3342 | + self.addCleanup(rmtree, TESTFN2) |
| 3343 | + zipfile.main(["--metadata-encoding=shift_jis", "-e", TESTFN, TESTFN2]) |
| 3344 | + listing = os.listdir(TESTFN2) |
| 3345 | + for name in self.file_names: |
| 3346 | + self.assertIn(name, listing) |
| 3347 | + |
| 3348 | + |
3213 | 3349 | if __name__ == "__main__":
|
3214 | 3350 | unittest.main()
|
0 commit comments