Skip to content

Commit 9124ab3

Browse files
authored
Merge pull request #692 from BLKSerene/dev
Fix notifications that newer versions of corpora are available
2 parents 976eb28 + 739f04d commit 9124ab3

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed

pythainlp/corpus/core.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,11 @@ def download(
399399
return False
400400
corpus_versions = corpus["versions"][version]
401401
file_name = corpus_versions["filename"]
402-
found = ''
402+
found = ""
403403
for i, item in local_db["_default"].items():
404-
if item["name"] == name and item["version"] == version:
405-
# Record corpus no. if found
404+
# Do not check version here
405+
if item["name"] == name:
406+
# Record corpus no. if found in local database
406407
found = i
407408
break
408409

@@ -445,7 +446,9 @@ def download(
445446
# This awkward behavior is for backward-compatibility with
446447
# database files generated previously using TinyDB
447448
if local_db["_default"]:
448-
corpus_no = max((int(no) for no in local_db["_default"])) + 1
449+
corpus_no = max((
450+
int(no) for no in local_db["_default"]
451+
)) + 1
449452
else:
450453
corpus_no = 1
451454
local_db["_default"][str(corpus_no)] = {
@@ -458,8 +461,10 @@ def download(
458461

459462
with open(corpus_db_path(), "w", encoding="utf-8") as f:
460463
json.dump(local_db, f, ensure_ascii=False)
464+
# Check if versions match if the corpus is found in local database
465+
# but a re-download is not forced
461466
else:
462-
current_ver = local_db['_default'][found]["version"]
467+
current_ver = local_db["_default"][found]["version"]
463468

464469
if current_ver == version:
465470
# Already has the same version
@@ -519,8 +524,8 @@ def remove(name: str) -> bool:
519524
os.remove(path)
520525
for i, corpus in db["_default"].copy().items():
521526
if corpus["name"] == name:
522-
del db['_default'][i]
523-
with open(corpus_db_path(), 'w', encoding='utf-8') as f:
527+
del db["_default"][i]
528+
with open(corpus_db_path(), "w", encoding="utf-8") as f:
524529
json.dump(db, f, ensure_ascii=False)
525530
return True
526531

tests/test_corpus.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,10 @@ def test_corpus(self):
9898
self.assertIsNotNone(download(name="test", version="0.0.9"))
9999
self.assertIsNotNone(download(name="test", version="0.0.10"))
100100
with self.assertRaises(Exception) as context:
101-
self.assertIsNotNone(download(name="test", version="0.0.11"))
101+
# Force re-downloading since the corpus already exists
102+
self.assertIsNotNone(download(
103+
name="test", version="0.0.11", force=True
104+
))
102105
self.assertTrue(
103106
"Hash does not match expected."
104107
in

0 commit comments

Comments
 (0)