Skip to content

Commit 7f1b0ce

Browse files
committed
Make use of get_or_create to reduce the number of objects created
Fixes #40 Signed-off-by: Ranvir Singh <[email protected]>
1 parent af561ae commit 7f1b0ce

File tree

3 files changed

+89
-32
lines changed

3 files changed

+89
-32
lines changed

vulnerabilities/data_dump.py

+22-20
Original file line numberDiff line numberDiff line change
@@ -34,39 +34,39 @@ def debian_dump(extract_data):
3434
Save data scraped from Debian' security tracker.
3535
"""
3636
for data in extract_data:
37-
vulnerability = Vulnerability.objects.create(
37+
vulnerability, _ = Vulnerability.objects.get_or_create(
3838
summary=data.get('description', ''),
3939
)
40-
VulnerabilityReference.objects.create(
40+
VulnerabilityReference.objects.get_or_create(
4141
vulnerability=vulnerability,
4242
reference_id=data.get('vulnerability_id', ''),
4343
)
4444

4545
pkg_name = data.get('package_name', '')
46-
package = Package.objects.create(
46+
package, _ = Package.objects.get_or_create(
4747
name=pkg_name,
4848
version=data.get('version', ''),
4949
)
5050

5151
if data['status'] == 'open':
52-
ImpactedPackage.objects.create(
52+
ImpactedPackage.objects.get_or_create(
5353
vulnerability=vulnerability,
5454
package=package
5555
)
5656
else:
57-
ResolvedPackage.objects.create(
57+
ResolvedPackage.objects.get_or_create(
5858
vulnerability=vulnerability,
5959
package=package
6060
)
6161

6262
fixed_version = data.get('fixed_version')
6363
if fixed_version:
64-
package = Package.objects.create(
64+
package, _ = Package.objects.get_or_create(
6565
name=pkg_name,
6666
version=fixed_version,
6767
)
6868

69-
ResolvedPackage.objects.create(
69+
ResolvedPackage.objects.get_or_create(
7070
vulnerability=vulnerability,
7171
package=package
7272
)
@@ -77,17 +77,19 @@ def ubuntu_dump(html):
7777
Dump data scraped from Ubuntu's security tracker.
7878
"""
7979
for data in html:
80-
vulnerability = Vulnerability.objects.create(
81-
summary='',
80+
# If we don't do this, every VulnerabilityReference will have single
81+
# Vulnerability.
82+
vulnerability, _ = Vulnerability.objects.get_or_create(
83+
summary='{} vulnerability'.format(data.get('cve_id')),
8284
)
83-
VulnerabilityReference.objects.create(
85+
VulnerabilityReference.objects.get_or_create(
8486
vulnerability=vulnerability,
8587
reference_id=data.get('cve_id'),
8688
)
87-
package = Package.objects.create(
89+
package, _ = Package.objects.get_or_create(
8890
name=data.get('package_name'),
8991
)
90-
ImpactedPackage.objects.create(
92+
ImpactedPackage.objects.get_or_create(
9193
vulnerability=vulnerability,
9294
package=package
9395
)
@@ -111,39 +113,39 @@ def archlinux_dump(extract_data):
111113
if not fixed_version:
112114
fixed_version = 'None'
113115

114-
vulnerability = Vulnerability.objects.create(
116+
vulnerability, _ = Vulnerability.objects.get_or_create(
115117
summary=item['type'],
116118
)
117119

118120
for vulnerability_id in vulnerabilities:
119-
VulnerabilityReference.objects.create(
121+
VulnerabilityReference.objects.get_or_create(
120122
vulnerability=vulnerability,
121123
reference_id=vulnerability_id,
122124
url='https://security.archlinux.org/{}'.format(vulnerability_id)
123125
)
124126

125127
for package_name in packages_name:
126-
package_affected = Package.objects.create(
128+
package_affected, _ = Package.objects.get_or_create(
127129
name=package_name,
128130
version=affected_version
129131
)
130-
ImpactedPackage.objects.create(
132+
ImpactedPackage.objects.get_or_create(
131133
vulnerability=vulnerability,
132134
package=package_affected
133135
)
134-
PackageReference.objects.create(
136+
PackageReference.objects.get_or_create(
135137
package=package_affected,
136138
repository='https://security.archlinux.org/package/{}'.format(package_name)
137139
)
138-
package_fixed = Package.objects.create(
140+
package_fixed, _ = Package.objects.get_or_create(
139141
name=package_name,
140142
version=fixed_version
141143
)
142-
ResolvedPackage.objects.create(
144+
ResolvedPackage.objects.get_or_create(
143145
vulnerability=vulnerability,
144146
package=package_fixed
145147
)
146-
PackageReference.objects.create(
148+
PackageReference.objects.get_or_create(
147149
package=package_fixed,
148150
repository='https://security.archlinux.org/package/{}'.format(package_name)
149151
)

vulnerabilities/tests/test_api.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_debian_response(self):
4747
debian_dump(extract_data)
4848
response = self.client.get('/api/packages/?name=mimetex', format='json').data
4949

50-
self.assertEqual(4, response['count'])
50+
self.assertEqual(2, response['count'])
5151

5252
first_result = response['results'][0]
5353
self.assertEqual('mimetex', first_result['name'])
@@ -69,7 +69,7 @@ def test_ubuntu_response(self):
6969
"version": "",
7070
"platform": "",
7171
"vulnerabilities": [{
72-
"summary": "",
72+
"summary": "CVE-2012-3386 vulnerability",
7373
"cvss": None,
7474
"references": [{
7575
"reference_id": "CVE-2012-3386",
@@ -93,7 +93,7 @@ def test_serializers(self):
9393
pk = Package.objects.filter(name="mimetex")
9494
response = PackageSerializer(pk, many=True).data
9595

96-
self.assertEqual(4, len(response))
96+
self.assertEqual(2, len(response))
9797

9898
first_result = response[0]
9999
self.assertEqual('mimetex', first_result['name'])

vulnerabilities/tests/test_data_dump.py

+64-9
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ def setUpTestData(self):
5151
with open(os.path.join(TEST_DATA, 'debian.json')) as f:
5252
test_data = json.load(f)
5353

54-
extract_data = debian.extract_vulnerabilities(test_data)
55-
debian_dump(extract_data)
54+
self.extract_data = debian.extract_vulnerabilities(test_data)
55+
debian_dump(self.extract_data)
5656

5757
def test_Vulnerability(self):
5858
"""
@@ -83,9 +83,7 @@ def test_Package(self):
8383
"""
8484
Check that all packages from the test data are stored in the database
8585
"""
86-
# There are five rows in Package because currently the models allow duplicates
87-
# (see issue #28).
88-
self.assertEqual(5, Package.objects.count())
86+
self.assertEqual(3, Package.objects.count())
8987

9088
self.assertTrue(Package.objects.filter(name='mimetex'))
9189
self.assertTrue(Package.objects.get(name='librsync'))
@@ -114,15 +112,33 @@ def test_ResolvedPackage(self):
114112
self.assertIn('1.50-1.1', versions)
115113
self.assertIn('1.74-1', versions)
116114

115+
def test_debian_data_dump_twice(self):
116+
"""
117+
Scrape data from Debian' main tracker, save it
118+
in the database and verify entries.
119+
"""
120+
debian_dump(self.extract_data)
121+
122+
self.assertEqual(3, Vulnerability.objects.count())
123+
self.assertEqual(3, VulnerabilityReference.objects.count())
124+
self.assertEqual(3, Package.objects.count())
125+
126+
# Dumping the data twice doesn't create new objects.
127+
debian_dump(self.extract_data)
128+
129+
self.assertEqual(3, Vulnerability.objects.count())
130+
self.assertEqual(3, VulnerabilityReference.objects.count())
131+
self.assertEqual(3, Package.objects.count())
132+
117133

118134
class TestUbuntuDataDump(TestCase):
119135
@classmethod
120136
def setUpTestData(self):
121137
with open(os.path.join(TEST_DATA, 'ubuntu_main.html')) as f:
122138
test_data = f.read()
123139

124-
data = ubuntu.extract_cves(test_data)
125-
ubuntu_dump(data)
140+
self.data = ubuntu.extract_cves(test_data)
141+
ubuntu_dump(self.data)
126142

127143
def test_data_dump(self):
128144
"""
@@ -132,15 +148,32 @@ def test_data_dump(self):
132148
self.assertEqual(reference.reference_id, 'CVE-2002-2439')
133149
self.assertTrue(Package.objects.filter(name='gcc-4.6')[0].name, 'gcc-4.6')
134150

151+
def test_ubuntu_data_dump_twice(self):
152+
"""
153+
Scrape data from Ubuntu twice from main tracker, save it
154+
in the database and verify single time entry.
155+
"""
156+
ubuntu_dump(self.data)
157+
count = Package.objects.all().count()
158+
reference = VulnerabilityReference.objects.filter(
159+
reference_id='CVE-2002-2439')
160+
self.assertEqual(reference[0].reference_id, 'CVE-2002-2439')
161+
self.assertTrue(Package.objects.filter(name='gcc-4.6')[0].name,
162+
'gcc-4.6')
163+
164+
# Dumping the data twice doesn't create new objects.
165+
ubuntu_dump(self.data)
166+
self.assertEqual(count, Package.objects.all().count())
167+
135168

136169
class TestArchLinuxDataDump(TestCase):
137170

138171
@classmethod
139172
def setUpTestData(self):
140173
with open(os.path.join(TEST_DATA, 'archlinux.json')) as f:
141-
test_data = json.load(f)
174+
self.test_data = json.load(f)
142175

143-
archlinux_dump(test_data)
176+
archlinux_dump(self.test_data)
144177

145178
def test_Vulnerability(self):
146179
"""
@@ -190,3 +223,25 @@ def test_ResolvedPackage(self):
190223

191224
self.assertEqual(4, len(resolved_pkgs))
192225
self.assertEqual('2.6.1-1', resolved_pkg.package.version)
226+
227+
def test_archlinux_data_dump_twice(self):
228+
"""
229+
Scrape data from Archlinux' main tracker twice, save it
230+
in the database and verify no multiple entries.
231+
"""
232+
archlinux_dump(self.test_data)
233+
self.assertEqual(1, Vulnerability.objects.count())
234+
self.assertEqual(14, VulnerabilityReference.objects.count())
235+
self.assertEqual(8, Package.objects.count())
236+
self.assertEqual(8, PackageReference.objects.count())
237+
self.assertEqual(4, ImpactedPackage.objects.count())
238+
self.assertEqual(4, ResolvedPackage.objects.count())
239+
240+
# Dumping the data twice doesn't create new objects.
241+
archlinux_dump(self.test_data)
242+
self.assertEqual(1, Vulnerability.objects.count())
243+
self.assertEqual(14, VulnerabilityReference.objects.count())
244+
self.assertEqual(8, Package.objects.count())
245+
self.assertEqual(8, PackageReference.objects.count())
246+
self.assertEqual(4, ImpactedPackage.objects.count())
247+
self.assertEqual(4, ResolvedPackage.objects.count())

0 commit comments

Comments
 (0)