46
46
ResponseHeaders = MutableMapping [str , str ]
47
47
48
48
49
- def _match_vcs_scheme (url ):
50
- # type: (str) -> Optional[str]
49
+ def _match_vcs_scheme (url : str ) -> Optional [str ]:
51
50
"""Look for VCS schemes in the URL.
52
51
53
52
Returns the matched VCS scheme, or None if there's no match.
@@ -59,15 +58,13 @@ def _match_vcs_scheme(url):
59
58
60
59
61
60
class _NotHTML (Exception ):
62
- def __init__ (self , content_type , request_desc ):
63
- # type: (str, str) -> None
61
+ def __init__ (self , content_type : str , request_desc : str ) -> None :
64
62
super ().__init__ (content_type , request_desc )
65
63
self .content_type = content_type
66
64
self .request_desc = request_desc
67
65
68
66
69
- def _ensure_html_header (response ):
70
- # type: (Response) -> None
67
+ def _ensure_html_header (response : Response ) -> None :
71
68
"""Check the Content-Type header to ensure the response contains HTML.
72
69
73
70
Raises `_NotHTML` if the content type is not text/html.
@@ -81,8 +78,7 @@ class _NotHTTP(Exception):
81
78
pass
82
79
83
80
84
- def _ensure_html_response (url , session ):
85
- # type: (str, PipSession) -> None
81
+ def _ensure_html_response (url : str , session : PipSession ) -> None :
86
82
"""Send a HEAD request to the URL, and ensure the response contains HTML.
87
83
88
84
Raises `_NotHTTP` if the URL is not available for a HEAD request, or
@@ -98,8 +94,7 @@ def _ensure_html_response(url, session):
98
94
_ensure_html_header (resp )
99
95
100
96
101
- def _get_html_response (url , session ):
102
- # type: (str, PipSession) -> Response
97
+ def _get_html_response (url : str , session : PipSession ) -> Response :
103
98
"""Access an HTML page with GET, and return the response.
104
99
105
100
This consists of three parts:
@@ -149,8 +144,7 @@ def _get_html_response(url, session):
149
144
return resp
150
145
151
146
152
- def _get_encoding_from_headers (headers ):
153
- # type: (ResponseHeaders) -> Optional[str]
147
+ def _get_encoding_from_headers (headers : ResponseHeaders ) -> Optional [str ]:
154
148
"""Determine if we have any encoding information in our headers.
155
149
"""
156
150
if headers and "Content-Type" in headers :
@@ -160,8 +154,7 @@ def _get_encoding_from_headers(headers):
160
154
return None
161
155
162
156
163
- def _determine_base_url (document , page_url ):
164
- # type: (HTMLElement, str) -> str
157
+ def _determine_base_url (document : HTMLElement , page_url : str ) -> str :
165
158
"""Determine the HTML document's base URL.
166
159
167
160
This looks for a ``<base>`` tag in the HTML document. If present, its href
@@ -180,17 +173,15 @@ def _determine_base_url(document, page_url):
180
173
return page_url
181
174
182
175
183
- def _clean_url_path_part (part ):
184
- # type: (str) -> str
176
+ def _clean_url_path_part (part : str ) -> str :
185
177
"""
186
178
Clean a "part" of a URL path (i.e. after splitting on "@" characters).
187
179
"""
188
180
# We unquote prior to quoting to make sure nothing is double quoted.
189
181
return urllib .parse .quote (urllib .parse .unquote (part ))
190
182
191
183
192
- def _clean_file_url_path (part ):
193
- # type: (str) -> str
184
+ def _clean_file_url_path (part : str ) -> str :
194
185
"""
195
186
Clean the first part of a URL path that corresponds to a local
196
187
filesystem path (i.e. the first part after splitting on "@" characters).
@@ -207,8 +198,7 @@ def _clean_file_url_path(part):
207
198
_reserved_chars_re = re .compile ('(@|%2F)' , re .IGNORECASE )
208
199
209
200
210
- def _clean_url_path (path , is_local_path ):
211
- # type: (str, bool) -> str
201
+ def _clean_url_path (path : str , is_local_path : bool ) -> str :
212
202
"""
213
203
Clean the path portion of a URL.
214
204
"""
@@ -230,8 +220,7 @@ def _clean_url_path(path, is_local_path):
230
220
return '' .join (cleaned_parts )
231
221
232
222
233
- def _clean_link (url ):
234
- # type: (str) -> str
223
+ def _clean_link (url : str ) -> str :
235
224
"""
236
225
Make sure a link is fully quoted.
237
226
For example, if ' ' occurs in the URL, it will be replaced with "%20",
@@ -247,11 +236,10 @@ def _clean_link(url):
247
236
248
237
249
238
def _create_link_from_element (
250
- anchor , # type: HTMLElement
251
- page_url , # type: str
252
- base_url , # type: str
253
- ):
254
- # type: (...) -> Optional[Link]
239
+ anchor : HTMLElement ,
240
+ page_url : str ,
241
+ base_url : str ,
242
+ ) -> Optional [Link ]:
255
243
"""
256
244
Convert an anchor element in a simple repository page to a Link.
257
245
"""
@@ -278,39 +266,33 @@ def _create_link_from_element(
278
266
279
267
280
268
class CacheablePageContent :
281
- def __init__ (self , page ):
282
- # type: (HTMLPage) -> None
269
+ def __init__ (self , page : "HTMLPage" ) -> None :
283
270
assert page .cache_link_parsing
284
271
self .page = page
285
272
286
- def __eq__ (self , other ):
287
- # type: (object) -> bool
273
+ def __eq__ (self , other : object ) -> bool :
288
274
return (isinstance (other , type (self )) and
289
275
self .page .url == other .page .url )
290
276
291
- def __hash__ (self ):
292
- # type: () -> int
277
+ def __hash__ (self ) -> int :
293
278
return hash (self .page .url )
294
279
295
280
296
281
def with_cached_html_pages (
297
- fn , # type: Callable[[HTMLPage], Iterable[Link]]
298
- ):
299
- # type: (...) -> Callable[[HTMLPage], List[Link]]
282
+ fn : Callable [["HTMLPage" ], Iterable [Link ]],
283
+ ) -> Callable [["HTMLPage" ], List [Link ]]:
300
284
"""
301
285
Given a function that parses an Iterable[Link] from an HTMLPage, cache the
302
286
function's result (keyed by CacheablePageContent), unless the HTMLPage
303
287
`page` has `page.cache_link_parsing == False`.
304
288
"""
305
289
306
290
@functools .lru_cache (maxsize = None )
307
- def wrapper (cacheable_page ):
308
- # type: (CacheablePageContent) -> List[Link]
291
+ def wrapper (cacheable_page : CacheablePageContent ) -> List [Link ]:
309
292
return list (fn (cacheable_page .page ))
310
293
311
294
@functools .wraps (fn )
312
- def wrapper_wrapper (page ):
313
- # type: (HTMLPage) -> List[Link]
295
+ def wrapper_wrapper (page : "HTMLPage" ) -> List [Link ]:
314
296
if page .cache_link_parsing :
315
297
return wrapper (CacheablePageContent (page ))
316
298
return list (fn (page ))
@@ -319,8 +301,7 @@ def wrapper_wrapper(page):
319
301
320
302
321
303
@with_cached_html_pages
322
- def parse_links (page ):
323
- # type: (HTMLPage) -> Iterable[Link]
304
+ def parse_links (page : "HTMLPage" ) -> Iterable [Link ]:
324
305
"""
325
306
Parse an HTML document, and yield its anchor elements as Link objects.
326
307
"""
@@ -348,12 +329,11 @@ class HTMLPage:
348
329
349
330
def __init__ (
350
331
self ,
351
- content , # type: bytes
352
- encoding , # type: Optional[str]
353
- url , # type: str
354
- cache_link_parsing = True , # type: bool
355
- ):
356
- # type: (...) -> None
332
+ content : bytes ,
333
+ encoding : Optional [str ],
334
+ url : str ,
335
+ cache_link_parsing : bool = True ,
336
+ ) -> None :
357
337
"""
358
338
:param encoding: the encoding to decode the given content.
359
339
:param url: the URL from which the HTML was downloaded.
@@ -366,24 +346,21 @@ def __init__(
366
346
self .url = url
367
347
self .cache_link_parsing = cache_link_parsing
368
348
369
- def __str__ (self ):
370
- # type: () -> str
349
+ def __str__ (self ) -> str :
371
350
return redact_auth_from_url (self .url )
372
351
373
352
374
353
def _handle_get_page_fail (
375
- link , # type: Link
376
- reason , # type: Union[str, Exception]
377
- meth = None # type: Optional[Callable[..., None]]
378
- ):
379
- # type: (...) -> None
354
+ link : Link ,
355
+ reason : Union [str , Exception ],
356
+ meth : Optional [Callable [..., None ]] = None
357
+ ) -> None :
380
358
if meth is None :
381
359
meth = logger .debug
382
360
meth ("Could not fetch URL %s: %s - skipping" , link , reason )
383
361
384
362
385
- def _make_html_page (response , cache_link_parsing = True ):
386
- # type: (Response, bool) -> HTMLPage
363
+ def _make_html_page (response : Response , cache_link_parsing : bool = True ) -> HTMLPage :
387
364
encoding = _get_encoding_from_headers (response .headers )
388
365
return HTMLPage (
389
366
response .content ,
@@ -392,8 +369,9 @@ def _make_html_page(response, cache_link_parsing=True):
392
369
cache_link_parsing = cache_link_parsing )
393
370
394
371
395
- def _get_html_page (link , session = None ):
396
- # type: (Link, Optional[PipSession]) -> Optional[HTMLPage]
372
+ def _get_html_page (
373
+ link : Link , session : Optional [PipSession ] = None
374
+ ) -> Optional ["HTMLPage" ]:
397
375
if session is None :
398
376
raise TypeError (
399
377
"_get_html_page() missing 1 required keyword argument: 'session'"
@@ -465,16 +443,18 @@ class LinkCollector:
465
443
466
444
def __init__ (
467
445
self ,
468
- session , # type: PipSession
469
- search_scope , # type: SearchScope
470
- ):
471
- # type: (...) -> None
446
+ session : PipSession ,
447
+ search_scope : SearchScope ,
448
+ ) -> None :
472
449
self .search_scope = search_scope
473
450
self .session = session
474
451
475
452
@classmethod
476
- def create (cls , session , options , suppress_no_index = False ):
477
- # type: (PipSession, Values, bool) -> LinkCollector
453
+ def create (
454
+ cls , session : PipSession ,
455
+ options : Values ,
456
+ suppress_no_index : bool = False
457
+ ) -> "LinkCollector" :
478
458
"""
479
459
:param session: The Session to use to make requests.
480
460
:param suppress_no_index: Whether to ignore the --no-index option
@@ -500,12 +480,10 @@ def create(cls, session, options, suppress_no_index=False):
500
480
return link_collector
501
481
502
482
@property
503
- def find_links (self ):
504
- # type: () -> List[str]
483
+ def find_links (self ) -> List [str ]:
505
484
return self .search_scope .find_links
506
485
507
- def fetch_page (self , location ):
508
- # type: (Link) -> Optional[HTMLPage]
486
+ def fetch_page (self , location : Link ) -> Optional [HTMLPage ]:
509
487
"""
510
488
Fetch an HTML page containing package links.
511
489
"""
0 commit comments