Skip to content

Commit 0b1775a

Browse files
committed
feat(lang-support): add shared objects post in English and French
1 parent 7547302 commit 0b1775a

File tree

3 files changed

+291
-19
lines changed

3 files changed

+291
-19
lines changed
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
---
2+
audio: true
3+
lang: en
4+
layout: post
5+
title: Shared Objects in Multiple Threads
6+
translated: false
7+
---
8+
9+
10+
11+
## Lesson
12+
13+
The code demonstrates a peculiar bug that appears inconsistently. Sometimes the bug occurs, and sometimes it does not, making it difficult to reproduce and debug.
14+
15+
This intermittent behavior stems from the way the `translate_markdown_file` function, particularly the `translate_front_matter` function, handles shared data. These functions might be accessing and modifying shared data structures, such as dictionaries or lists, without proper synchronization.
16+
17+
When multiple threads access and modify the same data concurrently, it can lead to race conditions. Race conditions occur when the final state of the data depends on the unpredictable order in which threads execute. This can result in data corruption, unexpected program behavior, and the intermittent bugs you are observing.
18+
19+
To fix this, you should either avoid sharing mutable data between threads or use proper synchronization mechanisms, such as locks, to protect shared data. In this case, the `front_matter_dict` is being modified in place, which is not thread-safe. The fix is to create a copy of the dictionary before modifying it. This is already done in the code, but it's important to understand why it's necessary.
20+
21+
## Context
22+
23+
```python
24+
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
25+
futures = []
26+
for filename in changed_files:
27+
input_file = filename
28+
29+
for lang in languages:
30+
31+
print(f"Submitting translation job for {filename} to {lang}...")
32+
future = executor.submit(translate_markdown_file, input_file, os.path.join(f"_posts/{lang}", os.path.basename(filename).replace(".md", f"-{lang}.md")), lang, dry_run)
33+
futures.append(future)
34+
35+
for future in concurrent.futures.as_completed(futures):
36+
try:
37+
future.result()
38+
except Exception as e:
39+
print(f"A thread failed: {e}")
40+
```
41+
42+
## Before
43+
44+
```python
45+
def translate_front_matter(front_matter, target_language, input_file):
46+
print(f" Translating front matter for: {input_file}")
47+
if not front_matter:
48+
print(f" No front matter found for: {input_file}")
49+
return ""
50+
try:
51+
front_matter_dict = {}
52+
if front_matter:
53+
front_matter_dict = yaml.safe_load(front_matter)
54+
print(f" Front matter after safe_load: {front_matter_dict}")
55+
if 'title' in front_matter_dict:
56+
print(f" Translating title: {front_matter_dict['title']}")
57+
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
58+
if isinstance(front_matter_dict['title'], str):
59+
translated_title = translate_text(front_matter_dict['title'], target_language)
60+
if translated_title:
61+
translated_title = translated_title.strip()
62+
if len(translated_title) > 300:
63+
translated_title = translated_title.split('\n')[0]
64+
front_matter_dict['title'] = translated_title
65+
print(f" Translated title to: {translated_title}")
66+
else:
67+
print(f" Title translation failed for: {input_file}")
68+
else:
69+
print(f" Title is not a string, skipping translation for: {input_file}")
70+
else:
71+
print(f" Skipping title translation for {input_file} to {target_language}")
72+
# Always set lang to target_language
73+
74+
# Determine if the file is a translation
75+
original_lang = 'en' # Default to english
76+
if 'lang' in front_matter_dict:
77+
original_lang = front_matter_dict['lang']
78+
79+
if target_language != original_lang:
80+
front_matter_dict['lang'] = target_language
81+
front_matter_dict['translated'] = True
82+
print(f" Marked as translated to {target_language} for: {input_file}")
83+
else:
84+
front_matter_dict['translated'] = False
85+
print(f" Not marked as translated for: {input_file}")
86+
87+
88+
result = "---\n" + yaml.dump(front_matter_dict, allow_unicode=True) + "---"
89+
print(f" Front matter translation complete for: {input_file}")
90+
return result
91+
except yaml.YAMLError as e:
92+
print(f" Error parsing front matter: {e}")
93+
return front_matter
94+
```
95+
96+
## After
97+
98+
```python
99+
def translate_front_matter(front_matter, target_language, input_file):
100+
print(f" Translating front matter for: {input_file}")
101+
if not front_matter:
102+
print(f" No front matter found for: {input_file}")
103+
return ""
104+
try:
105+
front_matter_dict = {}
106+
if front_matter:
107+
front_matter_dict = yaml.safe_load(front_matter)
108+
print(f" Front matter after safe_load: {front_matter_dict}")
109+
110+
front_matter_dict_copy = front_matter_dict.copy()
111+
112+
if 'title' in front_matter_dict_copy:
113+
print(f" Translating title: {front_matter_dict_copy['title']}")
114+
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
115+
if isinstance(front_matter_dict_copy['title'], str):
116+
translated_title = translate_text(front_matter_dict_copy['title'], target_language)
117+
if translated_title:
118+
translated_title = translated_title.strip()
119+
if len(translated_title) > 300:
120+
translated_title = translated_title.split('\n')[0]
121+
front_matter_dict_copy['title'] = translated_title
122+
print(f" Translated title to: {translated_title}")
123+
else:
124+
print(f" Title translation failed for: {input_file}")
125+
else:
126+
print(f" Title is not a string, skipping translation for: {input_file}")
127+
else:
128+
print(f" Skipping title translation for {input_file} to {target_language}")
129+
# Always set lang to target_language
130+
131+
front_matter_dict_copy['lang'] = target_language
132+
front_matter_dict_copy['translated'] = True
133+
134+
result = "---\n" + yaml.dump(front_matter_dict_copy, allow_unicode=True) + "---"
135+
print(f" Front matter translation complete for: {input_file}")
136+
return result
137+
except yaml.YAMLError as e:
138+
print(f" Error parsing front matter: {e}")
139+
return front_matter
140+
```
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
---
2+
audio: true
3+
lang: fr
4+
layout: post
5+
title: Objets Partagés dans Plusieurs Threads
6+
translated: true
7+
---
8+
9+
## Leçon
10+
11+
Le code démontre un bug particulier qui apparaît de manière incohérente. Parfois, le bug se produit, et parfois il ne se produit pas, ce qui le rend difficile à reproduire et à déboguer.
12+
13+
Ce comportement intermittent découle de la manière dont la fonction `translate_markdown_file`, en particulier la fonction `translate_front_matter`, gère les données partagées. Ces fonctions pourraient accéder et modifier des structures de données partagées, comme des dictionnaires ou des listes, sans une synchronisation appropriée.
14+
15+
Lorsque plusieurs threads accèdent et modifient les mêmes données de manière concurrente, cela peut entraîner des conditions de course. Les conditions de course se produisent lorsque l'état final des données dépend de l'ordre imprévisible dans lequel les threads s'exécutent. Cela peut entraîner une corruption des données, un comportement inattendu du programme et les bugs intermittents que vous observez.
16+
17+
Pour résoudre ce problème, vous devriez soit éviter de partager des données mutables entre les threads, soit utiliser des mécanismes de synchronisation appropriés, tels que des verrous, pour protéger les données partagées. Dans ce cas, le `front_matter_dict` est modifié en place, ce qui n'est pas thread-safe. La solution consiste à créer une copie du dictionnaire avant de le modifier. Cela est déjà fait dans le code, mais il est important de comprendre pourquoi c'est nécessaire.
18+
19+
## Contexte
20+
21+
```python
22+
with concurrent.futures.ThreadPoolExecutor(max_workers=MAX_THREADS) as executor:
23+
futures = []
24+
for filename in changed_files:
25+
input_file = filename
26+
27+
for lang in languages:
28+
29+
print(f"Submitting translation job for {filename} to {lang}...")
30+
future = executor.submit(translate_markdown_file, input_file, os.path.join(f"_posts/{lang}", os.path.basename(filename).replace(".md", f"-{lang}.md")), lang, dry_run)
31+
futures.append(future)
32+
33+
for future in concurrent.futures.as_completed(futures):
34+
try:
35+
future.result()
36+
except Exception as e:
37+
print(f"A thread failed: {e}")
38+
```
39+
40+
## Avant
41+
42+
```python
43+
def translate_front_matter(front_matter, target_language, input_file):
44+
print(f" Translating front matter for: {input_file}")
45+
if not front_matter:
46+
print(f" No front matter found for: {input_file}")
47+
return ""
48+
try:
49+
front_matter_dict = {}
50+
if front_matter:
51+
front_matter_dict = yaml.safe_load(front_matter)
52+
print(f" Front matter after safe_load: {front_matter_dict}")
53+
if 'title' in front_matter_dict:
54+
print(f" Translating title: {front_matter_dict['title']}")
55+
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
56+
if isinstance(front_matter_dict['title'], str):
57+
translated_title = translate_text(front_matter_dict['title'], target_language)
58+
if translated_title:
59+
translated_title = translated_title.strip()
60+
if len(translated_title) > 300:
61+
translated_title = translated_title.split('\n')[0]
62+
front_matter_dict['title'] = translated_title
63+
print(f" Translated title to: {translated_title}")
64+
else:
65+
print(f" Title translation failed for: {input_file}")
66+
else:
67+
print(f" Title is not a string, skipping translation for: {input_file}")
68+
else:
69+
print(f" Skipping title translation for {input_file} to {target_language}")
70+
# Always set lang to target_language
71+
72+
# Determine if the file is a translation
73+
original_lang = 'en' # Default to english
74+
if 'lang' in front_matter_dict:
75+
original_lang = front_matter_dict['lang']
76+
77+
if target_language != original_lang:
78+
front_matter_dict['lang'] = target_language
79+
front_matter_dict['translated'] = True
80+
print(f" Marked as translated to {target_language} for: {input_file}")
81+
else:
82+
front_matter_dict['translated'] = False
83+
print(f" Not marked as translated for: {input_file}")
84+
85+
86+
result = "---\n" + yaml.dump(front_matter_dict, allow_unicode=True) + "---"
87+
print(f" Front matter translation complete for: {input_file}")
88+
return result
89+
except yaml.YAMLError as e:
90+
print(f" Error parsing front matter: {e}")
91+
return front_matter
92+
```
93+
94+
## Après
95+
96+
```python
97+
def translate_front_matter(front_matter, target_language, input_file):
98+
print(f" Translating front matter for: {input_file}")
99+
if not front_matter:
100+
print(f" No front matter found for: {input_file}")
101+
return ""
102+
try:
103+
front_matter_dict = {}
104+
if front_matter:
105+
front_matter_dict = yaml.safe_load(front_matter)
106+
print(f" Front matter after safe_load: {front_matter_dict}")
107+
108+
front_matter_dict_copy = front_matter_dict.copy()
109+
110+
if 'title' in front_matter_dict_copy:
111+
print(f" Translating title: {front_matter_dict_copy['title']}")
112+
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
113+
if isinstance(front_matter_dict_copy['title'], str):
114+
translated_title = translate_text(front_matter_dict_copy['title'], target_language)
115+
if translated_title:
116+
translated_title = translated_title.strip()
117+
if len(translated_title) > 300:
118+
translated_title = translated_title.split('\n')[0]
119+
front_matter_dict_copy['title'] = translated_title
120+
print(f" Translated title to: {translated_title}")
121+
else:
122+
print(f" Title translation failed for: {input_file}")
123+
else:
124+
print(f" Title is not a string, skipping translation for: {input_file}")
125+
else:
126+
print(f" Skipping title translation for {input_file} to {target_language}")
127+
# Always set lang to target_language
128+
129+
front_matter_dict_copy['lang'] = target_language
130+
front_matter_dict_copy['translated'] = True
131+
132+
result = "---\n" + yaml.dump(front_matter_dict_copy, allow_unicode=True) + "---"
133+
print(f" Front matter translation complete for: {input_file}")
134+
return result
135+
except yaml.YAMLError as e:
136+
print(f" Error parsing front matter: {e}")
137+
return front_matter
138+
```

scripts/update_lang.py

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -87,21 +87,15 @@ def translate_front_matter(front_matter, target_language, input_file):
8787

8888
if 'title' in front_matter_dict_copy:
8989
print(f" Translating title: {front_matter_dict_copy['title']}")
90-
if not (input_file == 'original/2025-01-11-resume-en.md' and target_language in ['zh', 'fr']):
91-
if isinstance(front_matter_dict_copy['title'], str):
92-
translated_title = translate_text(front_matter_dict_copy['title'], target_language)
93-
if translated_title:
94-
translated_title = translated_title.strip()
95-
if len(translated_title) > 300:
96-
translated_title = translated_title.split('\n')[0]
97-
front_matter_dict_copy['title'] = translated_title
98-
print(f" Translated title to: {translated_title}")
99-
else:
100-
print(f" Title translation failed for: {input_file}")
101-
else:
102-
print(f" Title is not a string, skipping translation for: {input_file}")
90+
translated_title = translate_text(front_matter_dict_copy['title'], target_language)
91+
if translated_title:
92+
translated_title = translated_title.strip()
93+
front_matter_dict_copy['title'] = translated_title
94+
print(f" Translated title to: {translated_title}")
10395
else:
104-
print(f" Skipping title translation for {input_file} to {target_language}")
96+
print(f" Title translation failed for: {input_file}")
97+
else:
98+
print(f" Skipping title translation for {input_file} to {target_language}")
10599
# Always set lang to target_language
106100

107101
front_matter_dict_copy['lang'] = target_language
@@ -176,9 +170,9 @@ def get_changed_files():
176170
with open(output_file, 'r', encoding='utf-8') as translated_infile:
177171
translated_content = translated_infile.read()
178172

179-
translated_front_matter_match = re.match(r'---\n(.*?)\n---', translated_content, re.DOTALL)
180-
translated_front_matter = translated_front_matter_match.group(1) if translated_front_matter_match else ""
181-
translated_content_without_front_matter = translated_content[len(translated_front_matter_match.group(0)):] if translated_front_matter_match else translated_content
173+
target_front_matter_match = re.match(r'---\n(.*?)\n---', translated_content, re.DOTALL)
174+
translated_front_matter = target_front_matter_match.group(1) if target_front_matter_match else ""
175+
translated_content_without_front_matter = translated_content[len(target_front_matter_match.group(0)):] if target_front_matter_match else translated_content
182176
translated_front_matter_dict = yaml.safe_load(translated_front_matter) if translated_front_matter else {}
183177
translated_title = translated_front_matter_dict.get('title', '')
184178

@@ -251,8 +245,8 @@ def main():
251245
for lang in languages:
252246
output_dir = f"_posts/{lang}"
253247
os.makedirs(output_dir, exist_ok=True)
254-
255-
output_filename = os.path.basename(filename).replace(".md", f"-{lang}.md")
248+
249+
output_filename = os.path.basename(filename).replace("-en.md", f"-{lang}.md")
256250

257251
output_file = os.path.join(output_dir, output_filename)
258252

0 commit comments

Comments
 (0)