Skip to content

Commit 6404723

Browse files
committed
WIP
Signed-off-by: Haiko Schol <[email protected]>
1 parent 3265803 commit 6404723

15 files changed

+161
-191
lines changed

vulnerabilities/admin.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525

2626
from vulnerabilities.models import (
2727
ImpactedPackage,
28+
Importer,
2829
Package,
29-
PackageReference,
3030
ResolvedPackage,
3131
Vulnerability,
32-
VulnerabilityReference
32+
VulnerabilityReference,
3333
)
3434

3535

@@ -58,6 +58,6 @@ class ResolvedPackageAdmin(admin.ModelAdmin):
5858
pass
5959

6060

61-
@admin.register(PackageReference)
62-
class PackageReferenceAdmin(admin.ModelAdmin):
61+
@admin.register(Importer)
62+
class ImporterAdmin(admin.ModelAdmin):
6363
pass

vulnerabilities/api.py

-16
Original file line numberDiff line numberDiff line change
@@ -22,29 +22,15 @@
2222
# Visit https://github.com/nexB/vulnerablecode/ for support and download.
2323

2424
from rest_framework import serializers
25-
from rest_framework import status
2625
from rest_framework import viewsets
27-
from rest_framework.response import Response
2826

2927
from packageurl import PackageURL
3028

3129
from vulnerabilities.models import Package
32-
from vulnerabilities.models import PackageReference
3330
from vulnerabilities.models import Vulnerability
3431
from vulnerabilities.models import VulnerabilityReference
3532

3633

37-
class PackageReferenceSerializer(serializers.ModelSerializer):
38-
class Meta:
39-
model = PackageReference
40-
fields = [
41-
'repository',
42-
'platform',
43-
'name',
44-
'version',
45-
]
46-
47-
4834
class VulnerabilityReferenceSerializer(serializers.ModelSerializer):
4935
class Meta:
5036
model = VulnerabilityReference
@@ -69,7 +55,6 @@ class Meta:
6955

7056
class PackageSerializer(serializers.ModelSerializer):
7157
vulnerabilities = VulnerabilitySerializer(many=True)
72-
references = PackageReferenceSerializer(source='packagereference_set', many=True)
7358

7459
class Meta:
7560
model = Package
@@ -78,7 +63,6 @@ class Meta:
7863
'version',
7964
'package_url',
8065
'vulnerabilities',
81-
'references',
8266
]
8367

8468

vulnerabilities/data_dump.py

-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323

2424
from vulnerabilities.models import ImpactedPackage
2525
from vulnerabilities.models import Package
26-
from vulnerabilities.models import PackageReference
2726
from vulnerabilities.models import ResolvedPackage
2827
from vulnerabilities.models import Vulnerability
2928
from vulnerabilities.models import VulnerabilityReference

vulnerabilities/data_source.py

+28-15
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from datetime import datetime
2525
from typing import Any
2626
from typing import Mapping
27+
from typing import Optional
2728
from typing import Sequence
2829

2930

@@ -33,8 +34,8 @@ class DataSource:
3334
TODO
3435
"""
3536
batch_size: int
36-
cutoff_date: datetime
37-
config: Mapping[str, Any]
37+
config: Optional[Mapping[str, Any]] = None
38+
cutoff_date: Optional[datetime] = None
3839

3940
def __enter__(self):
4041
"""
@@ -48,33 +49,45 @@ def __exit__(self, exc_type, exc_val, exc_tb):
4849
"""
4950
pass
5051

51-
def __next__(self):
52+
def new_records(self):
5253
"""
53-
Subclasses return batch_size sized batches of VulnerabilityInfo objects
54+
Subclasses return batch_size sized batches of VulnerabilityInfo objects that have been added to the data source
55+
since self.cutoff_date.
5456
"""
55-
pass
57+
raise StopIteration
58+
59+
def updated_records(self):
60+
"""
61+
Subclasses return batch_size sized batches of VulnerabilityInfo objects that have been modified since
62+
self.cutoff_date.
63+
64+
NOTE: Data sources that do now enable detection of changes to existing records vs added records must only
65+
implement this method, not new_records(). The ImportRunner relies on this contract to decide between
66+
insert and update operations.
67+
"""
68+
raise StopIteration
5669

5770

5871
# The following data classes express the contract between data sources and the import runner.
59-
# Data sources are expected to be usable as context managers and generators, yielding
60-
# batches of VulnerabilityInfo sequences.
72+
# Data sources are expected to be usable as context managers and generators, yielding batches of VulnerabilityInfo
73+
# sequences.
6174

6275
@dataclass
6376
class Package:
6477
name: str
65-
namespace: str
6678
type: str
6779
version: str
68-
qualifiers: str
69-
subpath: str
70-
references: Sequence[str]
80+
namespace: Optional[str] = ''
81+
qualifiers: Optional[str] = ''
82+
subpath: Optional[str] = ''
83+
references: Optional[Sequence[str]] = None
7184

7285

7386
@dataclass
7487
class VulnerabilityInfo:
75-
cve_id: str
7688
summary: str
77-
affected_packages: Sequence[Package]
78-
unaffected_packages: Sequence[Package]
7989
fixed_packages: Sequence[Package]
80-
references: Sequence[str]
90+
affected_packages: Sequence[Package]
91+
unaffected_packages: Optional[Sequence[Package]] = None
92+
cve_id: Optional[str] = None
93+
references: Optional[Sequence[str]] = None

vulnerabilities/import_runner.py

+28-8
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,44 @@
2828
logger = logging.getLogger(__name__)
2929

3030

31-
# TODO This really should use asyncio for network and database, but sadly the Django ORM won't allow it.
3231
class ImportRunner:
32+
"""
33+
The ImportRunner is responsible for inserting and updating data about vulnerabilities and
34+
affected/unaffected/fixed packages in the database. The two main goals for the implementation are correctness and
35+
efficiency.
3336
37+
Correctness:
38+
- There must be no duplicates in the database (should be enforced by the schema).
39+
- No valid data from the data source must be skipped or truncated.
40+
41+
Efficiency:
42+
- Bulk inserts should be used whenever possible.
43+
- Checking whether a record already exists should be kept to a minimum
44+
(the data source should know this instead).
45+
- All update and select operations must use indexed columns.
46+
"""
3447
def __init__(self, importer, batch_size=None):
3548
self.importer = importer
3649
self.batch_size = batch_size
3750

3851
def run(self, cutoff_date=None):
52+
"""
53+
Create a data source for the given importer and store the data retrieved in the database.
54+
55+
Data sources provide two kinds of records: Vulnerabilities and packages. Vulnerabiltites are potentially shared
56+
across many packages, from the same data source and from different data sources. For example, a vulnerability
57+
in the Linux kernel is mentioned by advisories from all Linux distributions that include this kernel version.
58+
59+
Therefore this method always checks whether vulnerabilities emitted by the data source already exist and if so,
60+
their primary key is cached so they can be efficiently linked to packages that are inserted later.
61+
"""
3962
logger.debug(f'Starting import for {self.importer.name}.')
4063
data_source = self.importer.make_data_source(cutoff_date=cutoff_date, batch_size=self.batch_size)
64+
4165
with data_source as ds:
42-
for batch in ds:
43-
# TODO
44-
# Check if any Vulnerability or Package from this batch already exists in the DB
45-
# If not: Bulk insert everything
46-
# If yes: Update existing ones and bulk insert the rest
47-
pass
66+
# TODO
4867

4968
self.importer.last_run = datetime.datetime.utcnow()
5069
self.importer.save()
51-
logger.debug(f'Successfully finished import for {self.importer.name}.')
70+
71+
logger.debug(f'Successfully finished import for {self.importer.name}.')
File renamed without changes.

vulnerabilities/scraper/safety_db.py renamed to vulnerabilities/importers/safety_db.py

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def import_vulnerabilities():
4040
cve_id = advisory.get('cve')
4141
vuln_id = advisory['id']
4242
vuln_version_ranges = advisory['specs']
43+
affected_versions = set()
4344
for vuln_version_range in vuln_version_ranges:
4445
version_range = RangeSpecifier(vuln_version_range)
4546
affected_versions = set()

vulnerabilities/management/commands/import.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323

2424
from datetime import datetime
2525

26-
from django.core.management.base import BaseCommand, CommandError
26+
from django.core.management.base import BaseCommand
27+
from django.core.management.base import CommandError
2728

2829
from vulnerabilities.models import Importer
2930
from vulnerabilities.import_runner import ImportRunner
@@ -76,14 +77,14 @@ def list_sources(self):
7677
def import_data(self, names, cutoff_date):
7778
importers = []
7879
unknown_importers = set()
79-
80+
8081
# make sure all arguments are valid before running any importers
8182
for name in names:
8283
try:
8384
importers.append(Importer.objects.get(name=name))
8485
except Importer.DoesNotExist:
8586
unknown_importers.add(name)
86-
87+
8788
if unknown_importers:
8889
unknown_importers = ', '.join(unknown_importers)
8990
raise CommandError(f'Unknown data sources: {unknown_importers}')

vulnerabilities/migrations/0001_initial.py

-11
Original file line numberDiff line numberDiff line change
@@ -50,17 +50,6 @@ class Migration(migrations.Migration):
5050
('vulnerability', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Vulnerability')),
5151
],
5252
),
53-
migrations.CreateModel(
54-
name='PackageReference',
55-
fields=[
56-
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
57-
('repository', models.CharField(blank=True, help_text='Repository URL eg:http://central.maven.org', max_length=100)),
58-
('platform', models.CharField(blank=True, help_text='Platform eg:maven', max_length=50)),
59-
('name', models.CharField(blank=True, help_text='Package reference name eg:org.apache.commons.io', max_length=50)),
60-
('version', models.CharField(blank=True, help_text='Reference version', max_length=50)),
61-
('package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='vulnerabilities.Package')),
62-
],
63-
),
6453
migrations.AddField(
6554
model_name='package',
6655
name='vulnerabilities',

vulnerabilities/models.py

+10-46
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@
3333

3434
class Vulnerability(models.Model):
3535
"""
36-
A software vulnerability with minimal information.
37-
Identifiers other than CVE ID are stored as VulnerabilityReference.
36+
A software vulnerability with minimal information. Identifiers other than CVE ID are stored as
37+
VulnerabilityReference.
3838
"""
3939
cve_id = models.CharField(max_length=50, help_text='CVE ID', unique=True, null=True)
4040
summary = models.TextField(help_text='Summary of the vulnerability', blank=True)
@@ -49,8 +49,7 @@ class Meta:
4949

5050
class VulnerabilityReference(models.Model):
5151
"""
52-
A reference to a vulnerability such as a security advisory from
53-
a Linux distribution or language package manager.
52+
A reference to a vulnerability such as a security advisory from a Linux distribution or language package manager.
5453
"""
5554
vulnerability = models.ForeignKey(
5655
Vulnerability, on_delete=models.CASCADE)
@@ -70,13 +69,12 @@ def __str__(self):
7069

7170
class Package(PackageURLMixin):
7271
"""
73-
A software package with minimal identifying information.
74-
Other identifiers are stored as PackageReference.
72+
A software package with links to relevant vulnerabilities.
7573
"""
7674
vulnerabilities = models.ManyToManyField(to='Vulnerability', through='ImpactedPackage')
7775

7876
def __str__(self):
79-
return self.name
77+
return self.package_url
8078

8179

8280
class ImpactedPackage(models.Model):
@@ -92,56 +90,23 @@ class Meta:
9290

9391
class ResolvedPackage(models.Model):
9492
"""
95-
Relates a vulnerability to package(s) that contain
96-
a fix or resolution of this vulnerability.
93+
Relates a vulnerability to package(s) that contain a fix or resolution of this vulnerability.
9794
"""
9895
vulnerability = models.ForeignKey(Vulnerability, on_delete=models.CASCADE)
9996
package = models.ForeignKey(Package, on_delete=models.CASCADE)
10097

10198

102-
class PackageReference(models.Model):
103-
"""
104-
One or more identifiers and references for a software package
105-
in a package repository, such as a Debian, Maven or NPM repository.
106-
"""
107-
package = models.ForeignKey(Package, on_delete=models.CASCADE)
108-
repository = models.CharField(
109-
max_length=100,
110-
help_text='Repository URL eg:http://central.maven.org',
111-
blank=True,
112-
)
113-
platform = models.CharField(
114-
max_length=50,
115-
help_text='Platform eg:maven',
116-
blank=True,
117-
)
118-
name = models.CharField(
119-
max_length=50,
120-
help_text='Package reference name eg:org.apache.commons.io',
121-
blank=True,
122-
)
123-
version = models.CharField(
124-
max_length=50,
125-
help_text='Reference version',
126-
blank=True,
127-
)
128-
129-
def __str__(self):
130-
return self.platform
131-
132-
13399
class Importer(models.Model):
134100
"""
135-
Metadata and pointer to the implementation for a source
136-
of vulnerability data (aka security advisories)
101+
Metadata and pointer to the implementation for a source of vulnerability data (aka security advisories)
137102
"""
138103
name = models.CharField(max_length=100, unique=True, help_text='Name of the importer')
139104
license = models.CharField(max_length=100, blank=True, help_text='License of the vulnerability data')
140105
last_run = models.DateTimeField(null=True, help_text='UTC Timestamp of the last run')
141-
106+
142107
data_source = models.CharField(
143108
max_length=100,
144-
help_text='Class name of the data source implementation importable from vulnerabilities.importers',
109+
help_text='Class name of the data source implementation importable from vulnerabilities.importers',
145110
)
146111
data_source_cfg = pgfields.JSONField(
147112
null=False,
@@ -151,8 +116,7 @@ class Importer(models.Model):
151116

152117
def make_data_source(self, cutoff_date=None, batch_size=None) -> DataSource:
153118
"""
154-
Return a configured and ready to use instance of
155-
this importers data source implementation.
119+
Return a configured and ready to use instance of this importers data source implementation.
156120
157121
cutoff_date - timestamp of the oldest data to include in the import (default: self.last_run)
158122
batch_size - max. number of records to return on each iteration

vulnerabilities/tests/test_data_dump.py

-8
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727

2828
from vulnerabilities.models import ImpactedPackage
2929
from vulnerabilities.models import Package
30-
from vulnerabilities.models import PackageReference
3130
from vulnerabilities.models import ResolvedPackage
3231
from vulnerabilities.models import Vulnerability
3332
from vulnerabilities.models import VulnerabilityReference
@@ -153,13 +152,6 @@ def test_arch_Package(setArchLinuxData):
153152
assert 'archlinux' == pkg.namespace
154153

155154

156-
def test_arch_PackageReference(setArchLinuxData):
157-
"""
158-
Check that no package references were found in the test data
159-
"""
160-
assert 0 == PackageReference.objects.count()
161-
162-
163155
def test_arch_ImpactedPackage(setArchLinuxData):
164156
"""
165157
Check there is one ImpactedPackage for the number of packages

0 commit comments

Comments
 (0)