Skip to content

Commit 38eee2b

Browse files
committed
Sync with main branch and fix latest tests.
* Added incremental time id in import_runner.py to prevent vulnerability id conflicts Signed-off-by: Shivam Sandbhor <[email protected]>
1 parent 295ad56 commit 38eee2b

14 files changed

+112
-80
lines changed

vulnerabilities/import_runner.py

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,19 @@ def process_advisories(data_source: DataSource, create_vulcodes) -> None:
139139
# Treat updated_advisories and added_advisories as same. Eventually
140140
# we want to refactor all data sources to provide advisories via a
141141
# single method.
142+
vulcoid = datetime.datetime.now()
142143
advisory_batches = chain(data_source.updated_advisories(), data_source.added_advisories())
143144
for batch in advisory_batches:
144145
for advisory in batch:
145146
try:
147+
146148
if not advisory.identifier and not create_vulcodes:
147149
continue
148150

151+
if not advisory.identifier:
152+
advisory.identifier = "VULCOID-" + vulcoid.strftime("%Y-%m-%d-%H:%M:%S")
153+
vulcoid += datetime.timedelta(seconds=1)
154+
149155
vuln, vuln_created = _get_or_create_vulnerability(advisory)
150156
for vuln_ref in advisory.vuln_references:
151157
ref = VulnerabilityReferenceInserter(
@@ -154,9 +160,11 @@ def process_advisories(data_source: DataSource, create_vulcodes) -> None:
154160
reference_id=vuln_ref.reference_id,
155161
)
156162

157-
if vuln_created or not vuln_ref_exists(vuln, vuln_ref.url, vuln_ref.reference_id):
158-
# A vulnerability reference can't exist if the vulnerability is just created so
159-
# insert it
163+
if vuln_created or not vuln_ref_exists(
164+
vuln, vuln_ref.url, vuln_ref.reference_id
165+
):
166+
# A vulnerability reference can't exist if the vulnerability is
167+
# just created, so insert it
160168
bulk_create_vuln_refs.add(ref)
161169

162170
for purl in chain(advisory.impacted_package_urls, advisory.resolved_package_urls):
@@ -176,25 +184,27 @@ def process_advisories(data_source: DataSource, create_vulcodes) -> None:
176184
existing_ref = get_vuln_pkg_refs(vuln, pkg)
177185
if not existing_ref:
178186
bulk_create_vuln_pkg_refs.add(pkg_vuln_ref)
179-
# A vulnerability-package relationship does not exist already if either the
180-
# vulnerability or the package is just created.
187+
# A vulnerability-package relationship does not exist already
188+
# if either the vulnerability or the package is just created.
181189

182190
else:
183-
# insert only if it there is no existing vulnerability-package relationship.
191+
# insert only if it there is no existing vulnerability-package relationship. # nopep8
184192
existing_ref = get_vuln_pkg_refs(vuln, pkg)
185193
if not existing_ref:
186194
bulk_create_vuln_pkg_refs.add(pkg_vuln_ref)
187195

188196
else:
189197
# This handles conflicts between existing data and obtained data
190198
if existing_ref[0].is_vulnerable != pkg_vuln_ref.is_vulnerable:
191-
handle_conflicts([existing_ref[0], pkg_vuln_ref.to_model_object()])
199+
handle_conflicts(
200+
[existing_ref[0], pkg_vuln_ref.to_model_object()]
201+
)
192202
existing_ref.delete()
203+
193204
except Exception:
194205
# TODO: store error but continue
195206
logger.error(
196-
f"Failed to process advisory: {advisory!r}:\n"
197-
+ traceback.format_exc()
207+
f"Failed to process advisory: {advisory!r}:\n" + traceback.format_exc()
198208
)
199209

200210
models.VulnerabilityReference.objects.bulk_create(
@@ -259,17 +269,20 @@ def _get_or_create_vulnerability(
259269
advisory: Advisory,
260270
) -> Tuple[models.Vulnerability, bool]:
261271

262-
vuln, created = models.Vulnerability.objects.get_or_create(identifier=advisory.identifier)
272+
try:
273+
vuln, created = models.Vulnerability.objects.get_or_create(identifier=advisory.identifier)
263274

264-
# Eventually we only want to keep summary from NVD and ignore other descriptions.
265-
if advisory.summary and vuln.summary != advisory.summary:
266-
vuln.summary = advisory.summary
267-
vuln.save()
275+
# Eventually we only want to keep summary from NVD and ignore other descriptions.
276+
if advisory.summary and vuln.summary != advisory.summary:
277+
vuln.summary = advisory.summary
278+
vuln.save()
279+
280+
return vuln, created
268281

269282
except Exception:
270283
logger.error(
271-
f"Failed to _get_or_create_vulnerability: {query_kwargs!r}:\n"
272-
+ traceback.format_exc())
284+
f"Failed to _get_or_create_vulnerability: {query_kwargs!r}:\n" + traceback.format_exc()
285+
)
273286
raise
274287

275288

vulnerabilities/importer_yielder.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -61,16 +61,16 @@
6161
'debian_tracker_url': 'https://security-tracker.debian.org/tracker/data/json'
6262
},
6363
},
64-
# {
65-
# 'name': 'safetydb',
66-
# 'license': 'cc-by-nc-4.0',
67-
# 'last_run': None,
68-
# 'data_source': 'SafetyDbDataSource',
69-
# 'data_source_cfg': {
70-
# 'url': 'https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json', # nopep8
71-
# 'etags': {}
72-
# },
73-
# },
64+
# {
65+
# 'name': 'safetydb',
66+
# 'license': 'cc-by-nc-4.0',
67+
# 'last_run': None,
68+
# 'data_source': 'SafetyDbDataSource',
69+
# 'data_source_cfg': {
70+
# 'url': 'https://raw.githubusercontent.com/pyupio/safety-db/master/data/insecure_full.json', # nopep8
71+
# 'etags': {}
72+
# },
73+
# },
7474
{
7575
'name': 'npm',
7676
'license': 'mit',

vulnerabilities/importers/apache_tomcat.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def to_advisories(self, apache_tomcat_advisory_html):
116116
summary="",
117117
impacted_package_urls=affected_packages,
118118
resolved_package_urls=fixed_package,
119-
cve_id=cve_id,
119+
identifier=cve_id,
120120
vuln_references=references,
121121
)
122122
)

vulnerabilities/importers/kaybee.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def yaml_file_to_advisory(yaml_path):
6666
references.append(Reference(url=f"{commit['repository']}/{commit['id']}"))
6767

6868
return Advisory(
69-
cve_id=vuln_id,
69+
identifier=vuln_id,
7070
summary=summary,
7171
impacted_package_urls=impacted_packages,
7272
resolved_package_urls=resolved_packages,

vulnerabilities/importers/nginx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ def to_advisories(self, data):
109109

110110
advisories.append(
111111
Advisory(
112-
cve_id=cve_id,
112+
identifier=cve_id,
113113
summary=summary,
114114
impacted_package_urls=vulnerable_packages,
115115
resolved_package_urls=fixed_packages,

vulnerabilities/importers/postgresql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def to_advisories(data):
105105

106106
advisories.append(
107107
Advisory(
108-
cve_id=cve_id,
108+
identifier=cve_id,
109109
summary=summary,
110110
vuln_references=references,
111111
impacted_package_urls=affected_packages,

vulnerabilities/importers/safety_db.py

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,17 @@
2525

2626
import asyncio
2727
import dataclasses
28-
import json
28+
import re
29+
import logging
2930
from typing import Any
3031
from typing import Iterable
3132
from typing import Mapping
3233
from typing import Set
3334
from typing import Tuple
3435

36+
import requests
3537
from dephell_specifier import RangeSpecifier
3638
from packageurl import PackageURL
37-
import requests
3839
from schema import Or
3940
from schema import Regex
4041
from schema import Schema
@@ -44,22 +45,21 @@
4445
from vulnerabilities.data_source import DataSourceConfiguration
4546
from vulnerabilities.data_source import Reference
4647
from vulnerabilities.package_managers import PypiVersionAPI
47-
from vulnerabilities.helpers import create_etag
48+
49+
logger = logging.getLogger(__name__)
4850

4951

5052
def validate_schema(advisory_dict):
5153

52-
scheme = {
53-
str: [
54-
{
55-
"advisory": str,
56-
"cve": Or(None, Regex(r"CVE-\d+-\d+")),
57-
"id": Regex(r"^pyup.io-\d"),
58-
"specs": list,
59-
"v": str,
60-
}
61-
]
62-
}
54+
scheme = [
55+
{
56+
"advisory": str,
57+
"cve": Or(None, str),
58+
"id": Regex(r"^pyup.io-\d"),
59+
"specs": list,
60+
"v": str,
61+
}
62+
]
6363

6464
Schema(scheme).validate(advisory_dict)
6565

@@ -77,7 +77,6 @@ class SafetyDbDataSource(DataSource):
7777
def __init__(self, *args, **kwargs):
7878
super().__init__(*args, **kwargs)
7979
self._api_response = self._fetch()
80-
# validate_schema(self._api_response)
8180

8281
def __enter__(self):
8382
self._versions = PypiVersionAPI()
@@ -91,9 +90,8 @@ def set_api(self, packages):
9190
asyncio.run(self._versions.load_api(packages))
9291

9392
def _fetch(self) -> Mapping[str, Any]:
94-
if create_etag(data_src=self, url=self.config.url, etag_key="ETag"):
93+
if self.create_etag(self.config.url):
9594
return requests.get(self.config.url).json()
96-
9795
return []
9896

9997
def collect_packages(self):
@@ -103,22 +101,33 @@ def updated_advisories(self) -> Set[Advisory]:
103101
advisories = []
104102

105103
for package_name in self._api_response:
104+
if package_name == "$meta":
105+
# This is the first entry in the data feed. It contains metadata of the feed.
106+
# Skip it.
107+
continue
108+
109+
try:
110+
validate_schema(self._api_response[package_name])
111+
112+
except Exception as e:
113+
logger.error(e)
114+
continue
115+
106116
all_package_versions = self.versions.get(package_name)
107-
if len(all_package_versions) == 0:
117+
if not len(all_package_versions):
108118
# PyPi does not have data about this package, we skip these
109119
continue
110120

111121
for advisory in self._api_response[package_name]:
112-
113122
impacted_purls, resolved_purls = categorize_versions(
114123
package_name, all_package_versions, advisory["specs"]
115124
)
116125

117-
cve_ids = advisory.get("cve") or [""]
118-
119-
# meaning if cve_ids is not [''] but either ['CVE-123'] or ['CVE-123, CVE-124']
120-
if len(cve_ids[0]):
121-
cve_ids = [s.strip() for s in cve_ids.split(",")]
126+
if advisory["cve"]:
127+
# Check on advisory["cve"] instead of using `get` because it can have null value
128+
cve_ids = re.findall(r"CVE-\d+-\d+", advisory["cve"])
129+
else:
130+
cve_ids = [None]
122131

123132
reference = [Reference(reference_id=advisory["id"])]
124133

@@ -135,17 +144,29 @@ def updated_advisories(self) -> Set[Advisory]:
135144

136145
return self.batch_advisories(advisories)
137146

147+
def create_etag(self, url):
148+
etag = requests.head(url).headers.get("ETag")
149+
if not etag:
150+
# Kind of inaccurate to return True since etag is
151+
# not created
152+
return True
153+
elif url in self.config.etags:
154+
if self.config.etags[url] == etag:
155+
return False
156+
self.config.etags[url] = etag
157+
return True
158+
138159

139160
def categorize_versions(
140161
package_name: str,
141162
all_versions: Set[str],
142-
version_ranges: Iterable[str],
163+
version_specs: Iterable[str],
143164
) -> Tuple[Set[PackageURL], Set[PackageURL]]:
144165
"""
145166
:return: impacted, resolved purls
146167
"""
147168
impacted_versions, impacted_purls = set(), set()
148-
ranges = [RangeSpecifier(s) for s in version_ranges]
169+
ranges = [RangeSpecifier(s) for s in version_specs]
149170

150171
for version in all_versions:
151172
if any([version in r for r in ranges]):

vulnerabilities/management/commands/push.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def cd(newdir):
4444
def get_vulcodes():
4545

4646
vulcodes = models.Vulnerability.objects.filter(
47-
identifier__startswith="VULCODE"
47+
identifier__startswith="VULCOID"
4848
).select_related()
4949
for vuln in vulcodes:
5050
yield {

vulnerabilities/models.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ class Vulnerability(models.Model):
4545

4646
def save(self, *args, **kwargs):
4747
if not self.identifier:
48-
# Replace `str(datetime.now())` with our custom identifier TBD.
49-
self.identifier = "VULCODE-" + str(datetime.now())
50-
48+
self.identifier = "VULCOID-" + datetime.now().strftime("%Y-%m-%d-%H:%M:%S")
5149
super().save(*args, **kwargs)
5250

5351
@property

vulnerabilities/tests/test_apache_tomcat.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ def test_to_advisories(self):
8989
reference_id="",
9090
),
9191
],
92-
cve_id="CVE-2016-0763",
92+
identifier="CVE-2016-0763",
9393
),
9494
Advisory(
9595
summary="",
@@ -127,7 +127,7 @@ def test_to_advisories(self):
127127
reference_id="",
128128
),
129129
],
130-
cve_id="CVE-2015-5351",
130+
identifier="CVE-2015-5351",
131131
),
132132
Advisory(
133133
summary="",
@@ -169,7 +169,7 @@ def test_to_advisories(self):
169169
reference_id="",
170170
),
171171
],
172-
cve_id="CVE-2016-0706",
172+
identifier="CVE-2016-0706",
173173
),
174174
Advisory(
175175
summary="",
@@ -207,16 +207,16 @@ def test_to_advisories(self):
207207
reference_id="",
208208
),
209209
],
210-
cve_id="CVE-2016-0714",
210+
identifier="CVE-2016-0714",
211211
),
212212
],
213-
key=lambda x: x.cve_id,
213+
key=lambda x: x.identifier,
214214
)
215215

216216
with open(TEST_DATA) as f:
217217
found_advisories = self.data_src.to_advisories(f)
218218

219-
found_advisories.sort(key=lambda x: x.cve_id)
219+
found_advisories.sort(key=lambda x: x.identifier)
220220

221221
for i in range(len(found_advisories)):
222222
found_advisories[i].vuln_references.sort(key=lambda x: x.url)

vulnerabilities/tests/test_debian.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_import(self):
7777
self.assert_for_package("mimetex", "1.74-1", "stretch")
7878
self.assert_for_package("mimetex", "1.50-1.1", "buster")
7979
self.assert_for_package("mimetex", "1.76-1", "buster")
80-
assert models.Vulnerability.objects.filter(cve_id__startswith="TEMP").count() == 0
80+
assert models.Vulnerability.objects.filter(identifier__startswith="TEMP").count() == 0
8181

8282
def test_response_is_new(self):
8383

vulnerabilities/tests/test_import_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,4 +343,4 @@ def test_ImportRunner_create_vulcodes(db):
343343

344344
assert models.Package.objects.all().count() == 4
345345
assert models.PackageRelatedVulnerability.objects.count() == 4
346-
assert models.Vulnerability.objects.filter(identifier__startswith="VULCODE").count() == 1
346+
assert models.Vulnerability.objects.filter(identifier__startswith="VULCOID").count() == 1

0 commit comments

Comments
 (0)