@@ -148,7 +148,7 @@ def fetch_and_summarize(url, fallback_title=None):
148
148
text_content = text_content .strip ()
149
149
if len (text_content ) > 3000 :
150
150
text_content = text_content [:3000 ]
151
- summary = ai_summarize (text_content , url )
151
+ summary = ai_summarize (text_content , url , title )
152
152
return {"url" : url , "summary" : summary , "title" : title }
153
153
154
154
def limit_to_n_words (text , n ):
@@ -157,16 +157,17 @@ def limit_to_n_words(text, n):
157
157
return text .strip ()
158
158
return ' ' .join (words [:n ]) + "..."
159
159
160
- def ai_summarize (text , url = None ):
160
+ def ai_summarize (text , url = None , title = None ):
161
161
if not MISTRAL_API_KEY :
162
162
print ("No MISTRAL_API_KEY set. Returning first 15 words as summary." )
163
163
return limit_to_n_words (text , 15 )
164
164
prompt = (
165
- "If the original text is in Chinese, first translate it to English, then summarize . "
165
+ "If the original text is in Chinese, summarize it in English. "
166
166
"Summarize the following web page content in clear, concise English. "
167
167
"Focus on the single most important point or insight. "
168
168
"Your summary should be around 300 characters. "
169
169
"Output only the summary sentence:\n "
170
+ f"Title: { title if title else '' } \n "
170
171
f"{ text } \n "
171
172
f"{ 'Original link: ' + url if url else '' } "
172
173
)
@@ -223,7 +224,7 @@ def summarize_nytimes_article(url):
223
224
title_element = soup .select_one ('.article-area .article-content .article-header header h1' )
224
225
title = title_element .text .strip () if title_element else (soup .title .text .strip () if soup .title else url )
225
226
# Extract the main article text
226
- article_area = soup .find ('div ' , class_ = 'article-area ' )
227
+ article_area = soup .find ('section ' , class_ = 'article-body ' )
227
228
if article_area :
228
229
article_text = article_area .get_text (separator = '\n ' , strip = True )
229
230
else :
@@ -232,7 +233,7 @@ def summarize_nytimes_article(url):
232
233
article_text = soup .get_text (separator = '\n ' , strip = True )
233
234
if len (article_text ) > 3000 :
234
235
article_text = article_text [:3000 ]
235
- summary = ai_summarize (article_text , url )
236
+ summary = ai_summarize (article_text , url , title )
236
237
return {"url" : url , "summary" : summary , "title" : title }
237
238
238
239
def main ():
0 commit comments