1
1
from __future__ import absolute_import
2
2
# Zulip's main markdown implementation. See docs/markdown.md for
3
3
# detailed documentation on our markdown syntax.
4
- from typing import Any , Optional
4
+ from typing import Any , Optional , Callable , Union , Dict
5
5
from typing .re import Match
6
6
7
7
import markdown
19
19
import itertools
20
20
from six .moves import urllib
21
21
import xml .etree .cElementTree as etree
22
+ from xml .etree .cElementTree import ElementTree
22
23
23
24
from collections import defaultdict
24
25
38
39
import zerver .lib .mention as mention
39
40
import six
40
41
from six .moves import range
42
+ from six import text_type
41
43
42
44
43
45
# Format version of the bugdown rendering; stored along with rendered
44
46
# messages so that we can efficiently determine what needs to be re-rendered
45
47
version = 1
46
48
47
49
def list_of_tlds ():
50
+ # type: () -> List[str]
48
51
# HACK we manually blacklist .py
49
52
blacklist = ['PY\n ' , ]
50
53
@@ -56,6 +59,7 @@ def list_of_tlds():
56
59
return tlds
57
60
58
61
def walk_tree (root , processor , stop_after_first = False ):
62
+ # type: (ElementTree, Callable[[ElementTree], Optional[ElementTree]], bool) -> List[ElementTree]
59
63
results = []
60
64
stack = [root ]
61
65
@@ -76,6 +80,8 @@ def walk_tree(root, processor, stop_after_first=False):
76
80
# height is not actually used
77
81
def add_a (root , url , link , height = "" , title = None , desc = None ,
78
82
class_attr = "message_inline_image" ):
83
+ # type: ignore # (ElementTree, text_type, text_type, text_type, Optional[text_type], Optional[text_type], text_type) -> None
84
+ # above ignored until mypy picks up https://github.com/python/typeshed/pull/244
79
85
title = title if title is not None else url_filename (link )
80
86
title = title if title else ""
81
87
desc = desc if desc is not None else ""
@@ -98,6 +104,7 @@ def add_a(root, url, link, height="", title=None, desc=None,
98
104
99
105
@cache_with_key (lambda tweet_id : tweet_id , cache_name = "database" , with_statsd_key = "tweet_data" )
100
106
def fetch_tweet_data (tweet_id ):
107
+ # type: (text_type) -> Optional[Dict[text_type, text_type]]
101
108
if settings .TEST_SUITE :
102
109
from . import testing_mocks
103
110
res = testing_mocks .twitter (tweet_id )
@@ -158,6 +165,7 @@ def fetch_tweet_data(tweet_id):
158
165
META_END_RE = re .compile ('^/meta[ >]' )
159
166
160
167
def fetch_open_graph_image (url ):
168
+ # type: (str) -> Optional[Dict[str, Any]]
161
169
in_head = False
162
170
# HTML will auto close meta tags, when we start the next tag add a closing tag if it has not been closed yet.
163
171
last_closed = True
@@ -223,21 +231,23 @@ def fetch_open_graph_image(url):
223
231
return {'image' : image , 'title' : title , 'desc' : desc }
224
232
225
233
def get_tweet_id (url ):
234
+ # type: (str) -> Union[bool, str]
226
235
parsed_url = urllib .parse .urlparse (url )
227
236
if not (parsed_url .netloc == 'twitter.com' or parsed_url .netloc .endswith ('.twitter.com' )):
228
- return False
237
+ return False # TODO: probably should return None instead and change return type to Optional[str]
229
238
to_match = parsed_url .path
230
239
# In old-style twitter.com/#!/wdaher/status/1231241234-style URLs, we need to look at the fragment instead
231
240
if parsed_url .path == '/' and len (parsed_url .fragment ) > 5 :
232
241
to_match = parsed_url .fragment
233
242
234
243
tweet_id_match = re .match (r'^!?/.*?/status(es)?/(?P<tweetid>\d{10,18})(/photo/[0-9])?/?$' , to_match )
235
244
if not tweet_id_match :
236
- return False
245
+ return False # TODO: probably should return None instead and change return type to Optional[str]
237
246
return tweet_id_match .group ("tweetid" )
238
247
239
248
class InlineHttpsProcessor (markdown .treeprocessors .Treeprocessor ):
240
249
def run (self , root ):
250
+ # type: (ElementTree) -> None
241
251
# Get all URLs from the blob
242
252
found_imgs = walk_tree (root , lambda e : e if e .tag == "img" else None )
243
253
for img in found_imgs :
@@ -252,11 +262,13 @@ class InlineInterestingLinkProcessor(markdown.treeprocessors.Treeprocessor):
252
262
TWITTER_MAX_TO_PREVIEW = 3
253
263
254
264
def __init__ (self , md , bugdown ):
265
+ # type: (markdown.Markdown, Bugdown) -> None
255
266
# Passing in bugdown for access to config to check if realm is zulip.com
256
267
self .bugdown = bugdown
257
268
markdown .treeprocessors .Treeprocessor .__init__ (self , md )
258
269
259
270
def is_image (self , url ):
271
+ # type: (str) -> bool
260
272
if not settings .INLINE_IMAGE_PREVIEW :
261
273
return False
262
274
parsed_url = urllib .parse .urlparse (url )
@@ -267,6 +279,8 @@ def is_image(self, url):
267
279
return False
268
280
269
281
def dropbox_image (self , url ):
282
+ # type: (str) -> Optional[Dict]
283
+ # TODO: specify details of returned Dict
270
284
parsed_url = urllib .parse .urlparse (url )
271
285
if (parsed_url .netloc == 'dropbox.com' or parsed_url .netloc .endswith ('.dropbox.com' )):
272
286
is_album = parsed_url .path .startswith ('/sc/' ) or parsed_url .path .startswith ('/photos/' )
@@ -311,6 +325,7 @@ def dropbox_image(self, url):
311
325
return None
312
326
313
327
def youtube_image (self , url ):
328
+ # type: (str) -> Optional[str]
314
329
if not settings .INLINE_IMAGE_PREVIEW :
315
330
return None
316
331
# Youtube video id extraction regular expression from http://pastebin.com/KyKAFv1s
@@ -404,6 +419,7 @@ def set_text(text):
404
419
return p
405
420
406
421
def twitter_link (self , url ):
422
+ # type: (str) -> Optional[markdown.util.etree.Element]
407
423
tweet_id = get_tweet_id (url )
408
424
409
425
if not tweet_id :
@@ -475,6 +491,7 @@ def twitter_link(self, url):
475
491
return None
476
492
477
493
def run (self , root ):
494
+ # type: (ElementTree) -> None
478
495
# Get all URLs from the blob
479
496
found_urls = walk_tree (root , lambda e : e .get ("href" ) if e .tag == "a" else None )
480
497
@@ -520,6 +537,7 @@ def run(self, root):
520
537
521
538
class Avatar (markdown .inlinepatterns .Pattern ):
522
539
def handleMatch (self , match ):
540
+ # type: (Match) -> markdown.util.etree.Element
523
541
img = markdown .util .etree .Element ('img' )
524
542
email_address = match .group ('email' )
525
543
img .set ('class' , 'message_body_gravatar' )
@@ -540,6 +558,7 @@ def handleMatch(self, match):
540
558
541
559
542
560
def make_emoji (emoji_name , src , display_string ):
561
+ # type: (str, str, str) -> markdown.util.etree.Element
543
562
elt = markdown .util .etree .Element ('img' )
544
563
elt .set ('src' , src )
545
564
elt .set ('class' , 'emoji' )
@@ -569,6 +588,7 @@ class StreamSubscribeButton(markdown.inlinepatterns.Pattern):
569
588
# This markdown extension has required javascript in
570
589
# static/js/custom_markdown.js
571
590
def handleMatch (self , match ):
591
+ # type: (Match) -> markdown.util.etree.Element
572
592
stream_name = match .group ('stream_name' )
573
593
stream_name = stream_name .replace ('\\ )' , ')' ).replace ('\\ \\ ' , '\\ ' )
574
594
@@ -590,6 +610,7 @@ class ModalLink(markdown.inlinepatterns.Pattern):
590
610
A pattern that allows including in-app modal links in messages.
591
611
"""
592
612
def handleMatch (self , match ):
613
+ # type: (Match) -> markdown.util.etree.Element
593
614
relative_url = match .group ('relative_url' )
594
615
text = match .group ('text' )
595
616
@@ -603,6 +624,8 @@ def handleMatch(self, match):
603
624
604
625
upload_re = re .compile (r"^(?:https://%s.s3.amazonaws.com|/user_uploads/\d+)/[^/]*/([^/]*)$" % (settings .S3_BUCKET ,))
605
626
def url_filename (url ):
627
+ # type: ignore # (text_type) -> text_type
628
+ # above ignored until mypy picks up https://github.com/python/typeshed/pull/244
606
629
"""Extract the filename if a URL is an uploaded file, or return the original URL"""
607
630
match = upload_re .match (url )
608
631
if match :
@@ -611,13 +634,15 @@ def url_filename(url):
611
634
return url
612
635
613
636
def fixup_link (link , target_blank = True ):
637
+ # type: (markdown.util.etree.Element, bool) -> None
614
638
"""Set certain attributes we want on every link."""
615
639
if target_blank :
616
640
link .set ('target' , '_blank' )
617
641
link .set ('title' , url_filename (link .get ('href' )))
618
642
619
643
620
644
def sanitize_url (url ):
645
+ # type: (text_type) -> text_type
621
646
"""
622
647
Sanitize a url against xss attacks.
623
648
See the docstring on markdown.inlinepatterns.LinkPattern.sanitize_url.
0 commit comments