1
1
from __future__ import absolute_import
2
2
# Zulip's main markdown implementation. See docs/markdown.md for
3
3
# detailed documentation on our markdown syntax.
4
- from typing import Any , Optional
4
+ from typing import Any , Optional , Callable , Union , Dict
5
5
from typing .re import Match
6
6
7
7
import markdown
8
+ from markdown .util .etree import ElementTree
8
9
import logging
9
10
import traceback
10
11
from six .moves import urllib
38
39
import zerver .lib .mention as mention
39
40
import six
40
41
from six .moves import range
42
+ from six import text_type
41
43
42
44
43
45
# Format version of the bugdown rendering; stored along with rendered
44
46
# messages so that we can efficiently determine what needs to be re-rendered
45
47
version = 1
46
48
47
49
def list_of_tlds ():
50
+ # type: () -> List[str]
48
51
# HACK we manually blacklist .py
49
52
blacklist = ['PY\n ' , ]
50
53
@@ -56,6 +59,7 @@ def list_of_tlds():
56
59
return tlds
57
60
58
61
def walk_tree (root , processor , stop_after_first = False ):
62
+ # type: (ElementTree, Callable[[ElementTree], Optional[ElementTree]], bool) -> List[ElementTree]
59
63
results = []
60
64
stack = [root ]
61
65
@@ -76,6 +80,8 @@ def walk_tree(root, processor, stop_after_first=False):
76
80
# height is not actually used
77
81
def add_a (root , url , link , height = "" , title = None , desc = None ,
78
82
class_attr = "message_inline_image" ):
83
+ # type: ignore # (ElementTree, text_type, text_type, text_type, Optional[text_type], Optional[text_type], text_type) -> None
84
+ # above ignored until mypy picks up https://github.com/python/typeshed/pull/244
79
85
title = title if title is not None else url_filename (link )
80
86
title = title if title else ""
81
87
desc = desc if desc is not None else ""
@@ -98,6 +104,7 @@ def add_a(root, url, link, height="", title=None, desc=None,
98
104
99
105
@cache_with_key (lambda tweet_id : tweet_id , cache_name = "database" , with_statsd_key = "tweet_data" )
100
106
def fetch_tweet_data (tweet_id ):
107
+ # type: (text_type) -> Optional[Dict[text_type, text_type]]
101
108
if settings .TEST_SUITE :
102
109
from . import testing_mocks
103
110
res = testing_mocks .twitter (tweet_id )
@@ -158,6 +165,7 @@ def fetch_tweet_data(tweet_id):
158
165
META_END_RE = re .compile ('^/meta[ >]' )
159
166
160
167
def fetch_open_graph_image (url ):
168
+ # type: (str) -> Optional[Dict[str, Any]]
161
169
in_head = False
162
170
# HTML will auto close meta tags, when we start the next tag add a closing tag if it has not been closed yet.
163
171
last_closed = True
@@ -223,21 +231,23 @@ def fetch_open_graph_image(url):
223
231
return {'image' : image , 'title' : title , 'desc' : desc }
224
232
225
233
def get_tweet_id (url ):
234
+ # type: (str) -> Union[bool, str]
226
235
parsed_url = urllib .parse .urlparse (url )
227
236
if not (parsed_url .netloc == 'twitter.com' or parsed_url .netloc .endswith ('.twitter.com' )):
228
- return False
237
+ return False # TODO: probably should return None instead and change return type to Optional[str]
229
238
to_match = parsed_url .path
230
239
# In old-style twitter.com/#!/wdaher/status/1231241234-style URLs, we need to look at the fragment instead
231
240
if parsed_url .path == '/' and len (parsed_url .fragment ) > 5 :
232
241
to_match = parsed_url .fragment
233
242
234
243
tweet_id_match = re .match (r'^!?/.*?/status(es)?/(?P<tweetid>\d{10,18})(/photo/[0-9])?/?$' , to_match )
235
244
if not tweet_id_match :
236
- return False
245
+ return False # TODO: probably should return None instead and change return type to Optional[str]
237
246
return tweet_id_match .group ("tweetid" )
238
247
239
248
class InlineHttpsProcessor (markdown .treeprocessors .Treeprocessor ):
240
249
def run (self , root ):
250
+ # type: (ElementTree) -> None
241
251
# Get all URLs from the blob
242
252
found_imgs = walk_tree (root , lambda e : e if e .tag == "img" else None )
243
253
for img in found_imgs :
@@ -252,11 +262,13 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
252
262
TWITTER_MAX_TO_PREVIEW = 3
253
263
254
264
def __init__ (self , md , bugdown ):
265
+ # type: (markdown.Markdown, Bugdown) -> None
255
266
# Passing in bugdown for access to config to check if realm is zulip.com
256
267
self .bugdown = bugdown
257
268
markdown .treeprocessors .Treeprocessor .__init__ (self , md )
258
269
259
270
def is_image (self , url ):
271
+ # type: (str) -> bool
260
272
if not settings .INLINE_IMAGE_PREVIEW :
261
273
return False
262
274
parsed_url = urllib .parse .urlparse (url )
@@ -267,6 +279,8 @@ def is_image(self, url):
267
279
return False
268
280
269
281
def dropbox_image (self , url ):
282
+ # type: (str) -> Optional[Dict]
283
+ # TODO: specify details of returned Dict
270
284
parsed_url = urllib .parse .urlparse (url )
271
285
if (parsed_url .netloc == 'dropbox.com' or parsed_url .netloc .endswith ('.dropbox.com' )):
272
286
is_album = parsed_url .path .startswith ('/sc/' ) or parsed_url .path .startswith ('/photos/' )
@@ -311,6 +325,7 @@ def dropbox_image(self, url):
311
325
return None
312
326
313
327
def youtube_image (self , url ):
328
+ # type: (str) -> Optional[str]
314
329
if not settings .INLINE_IMAGE_PREVIEW :
315
330
return None
316
331
# Youtube video id extraction regular expression from http://pastebin.com/KyKAFv1s
@@ -404,6 +419,7 @@ def set_text(text):
404
419
return p
405
420
406
421
def twitter_link (self , url ):
422
+ # type: (str) -> Optional[markdown.util.etree.Element]
407
423
tweet_id = get_tweet_id (url )
408
424
409
425
if not tweet_id :
@@ -475,6 +491,7 @@ def twitter_link(self, url):
475
491
return None
476
492
477
493
def run (self , root ):
494
+ # type: (ElementTree) -> None
478
495
# Get all URLs from the blob
479
496
found_urls = walk_tree (root , lambda e : e .get ("href" ) if e .tag == "a" else None )
480
497
@@ -520,6 +537,7 @@ def run(self, root):
520
537
521
538
class Avatar (markdown .inlinepatterns .Pattern ):
522
539
def handleMatch (self , match ):
540
+ # type: (Match) -> markdown.util.etree.Element
523
541
img = markdown .util .etree .Element ('img' )
524
542
email_address = match .group ('email' )
525
543
img .set ('class' , 'message_body_gravatar' )
@@ -540,6 +558,7 @@ def handleMatch(self, match):
540
558
541
559
542
560
def make_emoji (emoji_name , src , display_string ):
561
+ # type: (str, str, str) -> markdown.util.etree.Element
543
562
elt = markdown .util .etree .Element ('img' )
544
563
elt .set ('src' , src )
545
564
elt .set ('class' , 'emoji' )
@@ -569,6 +588,7 @@ class StreamSubscribeButton(markdown.inlinepatterns.Pattern):
569
588
# This markdown extension has required javascript in
570
589
# static/js/custom_markdown.js
571
590
def handleMatch (self , match ):
591
+ # type: (Match) -> markdown.util.etree.Element
572
592
stream_name = match .group ('stream_name' )
573
593
stream_name = stream_name .replace ('\\ )' , ')' ).replace ('\\ \\ ' , '\\ ' )
574
594
@@ -590,6 +610,7 @@ class ModalLink(markdown.inlinepatterns.Pattern):
590
610
A pattern that allows including in-app modal links in messages.
591
611
"""
592
612
def handleMatch (self , match ):
613
+ # type: (Match) -> markdown.util.etree.Element
593
614
relative_url = match .group ('relative_url' )
594
615
text = match .group ('text' )
595
616
@@ -603,6 +624,8 @@ def handleMatch(self, match):
603
624
604
625
upload_re = re .compile (r"^(?:https://%s.s3.amazonaws.com|/user_uploads/\d+)/[^/]*/([^/]*)$" % (settings .S3_BUCKET ,))
605
626
def url_filename (url ):
627
+ # type: ignore # (text_type) -> text_type
628
+ # above ignored until mypy picks up https://github.com/python/typeshed/pull/244
606
629
"""Extract the filename if a URL is an uploaded file, or return the original URL"""
607
630
match = upload_re .match (url )
608
631
if match :
@@ -611,13 +634,15 @@ def url_filename(url):
611
634
return url
612
635
613
636
def fixup_link (link , target_blank = True ):
637
+ # type: (markdown.element.etree.Element, bool) -> None
614
638
"""Set certain attributes we want on every link."""
615
639
if target_blank :
616
640
link .set ('target' , '_blank' )
617
641
link .set ('title' , url_filename (link .get ('href' )))
618
642
619
643
620
644
def sanitize_url (url ):
645
+ # type: (text_type) -> text_type
621
646
"""
622
647
Sanitize a url against xss attacks.
623
648
See the docstring on markdown.inlinepatterns.LinkPattern.sanitize_url.
0 commit comments