Skip to content

Commit 84877eb

Browse files
committed
feat(nytimes): update news_bot.py
1 parent 001e7d1 commit 84877eb

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

scripts/nytimes/news_bot.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def fetch_and_summarize(url, fallback_title=None):
148148
text_content = text_content.strip()
149149
if len(text_content) > 3000:
150150
text_content = text_content[:3000]
151-
summary = ai_summarize(text_content, url)
151+
summary = ai_summarize(text_content, url, title)
152152
return {"url": url, "summary": summary, "title": title}
153153

154154
def limit_to_n_words(text, n):
@@ -157,16 +157,17 @@ def limit_to_n_words(text, n):
157157
return text.strip()
158158
return ' '.join(words[:n]) + "..."
159159

160-
def ai_summarize(text, url=None):
160+
def ai_summarize(text, url=None, title=None):
161161
if not MISTRAL_API_KEY:
162162
print("No MISTRAL_API_KEY set. Returning first 15 words as summary.")
163163
return limit_to_n_words(text, 15)
164164
prompt = (
165-
"If the original text is in Chinese, first translate it to English, then summarize. "
165+
"If the original text is in Chinese, summarize it in English. "
166166
"Summarize the following web page content in clear, concise English. "
167167
"Focus on the single most important point or insight. "
168168
"Your summary should be around 300 characters. "
169169
"Output only the summary sentence:\n"
170+
f"Title: {title if title else ''}\n"
170171
f"{text}\n"
171172
f"{'Original link: ' + url if url else ''}"
172173
)
@@ -223,7 +224,7 @@ def summarize_nytimes_article(url):
223224
title_element = soup.select_one('.article-area .article-content .article-header header h1')
224225
title = title_element.text.strip() if title_element else (soup.title.text.strip() if soup.title else url)
225226
# Extract the main article text
226-
article_area = soup.find('div', class_='article-area')
227+
article_area = soup.find('section', class_='article-body')
227228
if article_area:
228229
article_text = article_area.get_text(separator='\n', strip=True)
229230
else:
@@ -232,7 +233,7 @@ def summarize_nytimes_article(url):
232233
article_text = soup.get_text(separator='\n', strip=True)
233234
if len(article_text) > 3000:
234235
article_text = article_text[:3000]
235-
summary = ai_summarize(article_text, url)
236+
summary = ai_summarize(article_text, url, title)
236237
return {"url": url, "summary": summary, "title": title}
237238

238239
def main():

0 commit comments

Comments
 (0)