Skip to content

Commit e541d55

Browse files
committed
feat: adds a tool to consolidate (hard-link) duplicated files
Many files in the libs/ directory are duplicated because most archs share a lot in common. Mantaining modification along all possible archs is tedious, so this tool hardlinks all identical files son modifying one is modifying all of them. Git is agnostic to this.
1 parent d143081 commit e541d55

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

Diff for: tools/consolidate-libs.py

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
#!/usr/bin/env python
2+
3+
__doc__ = """Scans src/lib/<arch>/** and does hardlinks to files
4+
with the same name and content"""
5+
6+
import glob
7+
import os
8+
import re
9+
10+
from collections import defaultdict
11+
from pathlib import Path
12+
from typing import NamedTuple
13+
14+
ROOT_DIR = Path(__file__).parent.parent.absolute() / "src" / "lib" / "arch"
15+
ARCHS = "zx48k", "zxnext"
16+
17+
18+
class FileInfo(NamedTuple):
19+
path: str
20+
hash: int
21+
22+
23+
def get_file_list(root: Path) -> list[str]:
24+
filelist = glob.glob(str(root / "**" / "*"), recursive=True)
25+
return [f for f in filelist if os.path.isfile(f)]
26+
27+
28+
def scan_arch(root: Path) -> dict[FileInfo, list[str]]:
29+
result = defaultdict(list)
30+
re_arch = re.compile(r"^.*?/src/lib/arch/([^/]+)/(.*)$")
31+
32+
files = get_file_list(root)
33+
for file in files:
34+
match = re_arch.match(file)
35+
if not match:
36+
continue
37+
38+
arch, path = match.group(1), match.group(2)
39+
result[FileInfo(path=path, hash=hash(open(file, "rb").read()))].append(file)
40+
41+
return result
42+
43+
44+
def fold_files(scan: dict[FileInfo, list[str]]) -> None:
45+
for path, files in scan.items():
46+
if len(files) == 1:
47+
continue
48+
49+
main_file = files[0]
50+
for file in files[1:]:
51+
print(f"Linking {main_file} to {file}")
52+
os.unlink(file)
53+
os.link(main_file, file)
54+
55+
56+
def main():
57+
scan = scan_arch(ROOT_DIR)
58+
fold_files(scan)
59+
60+
61+
if __name__ == "__main__":
62+
main()

0 commit comments

Comments
 (0)