@@ -203,18 +203,25 @@ def post(self, uri: str, data: dict | None, is_creator: bool = False, is_custome
203
203
else :
204
204
return self .request (method = "POST" , url = f"{ endpoint } { uri } " , ** kwargs )
205
205
206
- def get_note_by_id (self , note_id : str ):
206
+ def get_note_by_id (self , note_id : str , xsec_token : str , xsec_source : str = "pc_feed" ):
207
207
"""
208
208
:param note_id: note_id you want to fetch
209
209
:type note_id: str
210
210
:rtype: dict
211
211
"""
212
- data = {"source_note_id" : note_id , "image_scenes" : ["CRD_WM_WEBP" ]}
212
+
213
+ data = {
214
+ "source_note_id" : note_id ,
215
+ "image_formats" : ["jpg" , "webp" , "avif" ],
216
+ "extra" : {"need_body_topic" : 1 },
217
+ "xsec_source" : xsec_source ,
218
+ "xsec_token" : xsec_token
219
+ }
213
220
uri = "/api/sns/web/v1/feed"
214
221
res = self .post (uri , data )
215
222
return res ["items" ][0 ]["note_card" ]
216
223
217
- def get_note_by_id_from_html (self , note_id : str ):
224
+ def get_note_by_id_from_html (self , note_id : str , xsec_token : str , xsec_source : str = "pc_feed" ):
218
225
"""get note info from "https://www.xiaohongshu.com/explore/" + note_id,
219
226
and the return obj is equal to get_note_by_id
220
227
@@ -245,7 +252,7 @@ def transform_json_keys(json_data):
245
252
dict_new [new_key ] = value
246
253
return dict_new
247
254
248
- url = "https://www.xiaohongshu.com/explore/" + note_id
255
+ url = f "https://www.xiaohongshu.com/explore/{ note_id } ?xsec_token= { xsec_token } &xsec_source= { xsec_source } "
249
256
res = self .session .get (url , headers = {"user-agent" : self .user_agent , "referer" : "https://www.xiaohongshu.com/" })
250
257
html = res .text
251
258
state = re .findall (r"window.__INITIAL_STATE__=({.*})</script>" , html )[0 ].replace ("undefined" , '""' )
@@ -463,11 +470,10 @@ def get_user_all_notes(self, user_id: str, crawl_interval: int = 1):
463
470
res = self .get_user_notes (user_id , cursor )
464
471
has_more = res ["has_more" ]
465
472
cursor = res ["cursor" ]
466
- note_ids = map (lambda item : item ["note_id" ], res ["notes" ])
467
473
468
- for note_id in note_ids :
474
+ for item in res [ "notes" ] :
469
475
try :
470
- note = self .get_note_by_id (note_id )
476
+ note = self .get_note_by_id (item [ " note_id" ], item [ "xsec_token" ] )
471
477
except DataFetchError as e :
472
478
if ErrorEnum .NOTE_ABNORMAL .value .msg in e .__repr__ () or ErrorEnum .NOTE_SECRETE_FAULT .value .msg in e .__repr__ ():
473
479
continue
0 commit comments