@@ -262,6 +262,7 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
262
262
263
263
# The number of milliseconds that the response should be considered valid.
264
264
expiration_ms = media_info .expires
265
+ author_name : Optional [str ] = None
265
266
266
267
if _is_media (media_info .media_type ):
267
268
file_id = media_info .filesystem_id
@@ -294,25 +295,33 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
294
295
# Check if this HTML document points to oEmbed information and
295
296
# defer to that.
296
297
oembed_url = self ._oembed .autodiscover_from_html (tree )
297
- og = {}
298
+ og_from_oembed : JsonDict = {}
298
299
if oembed_url :
299
300
oembed_info = await self ._download_url (oembed_url , user )
300
- og , expiration_ms = await self ._handle_oembed_response (
301
+ (
302
+ og_from_oembed ,
303
+ author_name ,
304
+ expiration_ms ,
305
+ ) = await self ._handle_oembed_response (
301
306
url , oembed_info , expiration_ms
302
307
)
303
308
304
- # If there was no oEmbed URL (or oEmbed parsing failed), attempt
305
- # to generate the Open Graph information from the HTML.
306
- if not oembed_url or not og :
307
- og = parse_html_to_open_graph (tree , media_info .uri )
309
+ # Parse Open Graph information from the HTML in case the oEmbed
310
+ # response failed or is incomplete.
311
+ og_from_html = parse_html_to_open_graph (tree , media_info .uri )
312
+
313
+ # Compile the Open Graph response by using the scraped
314
+ # information from the HTML and overlaying any information
315
+ # from the oEmbed response.
316
+ og = {** og_from_html , ** og_from_oembed }
308
317
309
318
await self ._precache_image_url (user , media_info , og )
310
319
else :
311
320
og = {}
312
321
313
322
elif oembed_url :
314
323
# Handle the oEmbed information.
315
- og , expiration_ms = await self ._handle_oembed_response (
324
+ og , author_name , expiration_ms = await self ._handle_oembed_response (
316
325
url , media_info , expiration_ms
317
326
)
318
327
await self ._precache_image_url (user , media_info , og )
@@ -321,6 +330,11 @@ async def _do_preview(self, url: str, user: UserID, ts: int) -> bytes:
321
330
logger .warning ("Failed to find any OG data in %s" , url )
322
331
og = {}
323
332
333
+ # If we don't have a title but we have author_name, copy it as
334
+ # title
335
+ if not og .get ("og:title" ) and author_name :
336
+ og ["og:title" ] = author_name
337
+
324
338
# filter out any stupidly long values
325
339
keys_to_remove = []
326
340
for k , v in og .items ():
@@ -484,7 +498,7 @@ async def _precache_image_url(
484
498
485
499
async def _handle_oembed_response (
486
500
self , url : str , media_info : MediaInfo , expiration_ms : int
487
- ) -> Tuple [JsonDict , int ]:
501
+ ) -> Tuple [JsonDict , Optional [ str ], int ]:
488
502
"""
489
503
Parse the downloaded oEmbed info.
490
504
@@ -497,11 +511,12 @@ async def _handle_oembed_response(
497
511
Returns:
498
512
A tuple of:
499
513
The Open Graph dictionary, if the oEmbed info can be parsed.
514
+ The author name if it could be retrieved from oEmbed.
500
515
The (possibly updated) length of time, in milliseconds, the media is valid for.
501
516
"""
502
517
# If JSON was not returned, there's nothing to do.
503
518
if not _is_json (media_info .media_type ):
504
- return {}, expiration_ms
519
+ return {}, None , expiration_ms
505
520
506
521
with open (media_info .filename , "rb" ) as file :
507
522
body = file .read ()
@@ -513,7 +528,7 @@ async def _handle_oembed_response(
513
528
if open_graph_result and oembed_response .cache_age is not None :
514
529
expiration_ms = oembed_response .cache_age
515
530
516
- return open_graph_result , expiration_ms
531
+ return open_graph_result , oembed_response . author_name , expiration_ms
517
532
518
533
def _start_expire_url_cache_data (self ) -> Deferred :
519
534
return run_as_background_process (
0 commit comments