22
22
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
23
23
24
24
import json
25
+ import xxhash
25
26
from dephell_specifier import RangeSpecifier
26
27
from urllib .request import urlopen
27
28
from urllib .error import HTTPError
28
29
30
+ from vulnerabilities .models import AdvisoryHashes
31
+
29
32
30
33
NPM_URL = 'https://registry.npmjs.org{}'
31
34
PAGE = '/-/npm/v1/security/advisories?page=0'
@@ -93,15 +96,26 @@ def extract_data(JSON):
93
96
continue
94
97
# NPM registry has no data regarding this package finally we skip these
95
98
96
- package_vulnerabilities . append ( {
99
+ package_vulnerability = {
97
100
'package_name' : package_name ,
98
101
'summary' : obj .get ('overview' , '' ),
99
102
'cve_ids' : obj .get ('cves' , []),
100
103
'fixed_versions' : fixed_versions ,
101
104
'affected_versions' : affected_versions ,
102
105
'severity' : obj .get ('severity' , '' ),
103
106
'advisory' : obj .get ('url' , '' ),
104
- })
107
+ }
108
+
109
+ pkg_vuln_hash = xxhash .xxh32 (json .dumps (
110
+ package_vulnerability , sort_keys = True )).intdigest ()
111
+ hash_query = AdvisoryHashes .objects .filter (hash = pkg_vuln_hash )
112
+ if hash_query :
113
+ # In the past we already had this same data, so much work
114
+ # for nothing
115
+ continue
116
+ package_vulnerabilities .append (package_vulnerability )
117
+ AdvisoryHashes .objects .create (hash = pkg_vuln_hash )
118
+
105
119
return package_vulnerabilities
106
120
107
121
@@ -115,7 +129,15 @@ def scrape_vulnerabilities():
115
129
try :
116
130
cururl = NPM_URL .format (nextpage )
117
131
response = json .load (urlopen (cururl ))
118
- package_vulnerabilities .extend (extract_data (response ))
132
+ resp_hash = xxhash .xxh32 (json .dumps (
133
+ response , sort_keys = True )).intdigest ()
134
+ hash_query = AdvisoryHashes .objects .filter (hash = resp_hash )
135
+
136
+ if not hash_query :
137
+
138
+ package_vulnerabilities .extend (extract_data (response ))
139
+ AdvisoryHashes .objects .create (hash = resp_hash )
140
+
119
141
nextpage = response .get ('urls' , {}).get ('next' )
120
142
121
143
except HTTPError as error :
0 commit comments