Skip to content

Commit 2d342fa

Browse files
authored
Use extract_tar to extract Docker images and layers (#453)
* Use extract_tar to extract images and layers #407 * extract_tar uses the built in tar command, which does not do filename sanitization Signed-off-by: Jono Yang <[email protected]> * Update expected test results #407 Signed-off-by: Jono Yang <[email protected]> * Recreate docker-images.tar.gz with valid tars #407 * Update expected test results Signed-off-by: Jono Yang <[email protected]>
1 parent 5bf23bd commit 2d342fa

7 files changed

+93
-43
lines changed

scanpipe/pipes/docker.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@
2525
from pathlib import Path
2626

2727
from container_inspector.image import Image
28+
from container_inspector.utils import extract_tar
2829

2930
from scanpipe import pipes
3031
from scanpipe.pipes import rootfs
31-
from scanpipe.pipes.scancode import extract_archive
3232

3333
logger = logging.getLogger(__name__)
3434

@@ -62,7 +62,7 @@ def extract_image_from_tarball(input_tarball, extract_target, verify=True):
6262
Returns the `images` and an `errors` list of error messages that may have
6363
happen during the extraction.
6464
"""
65-
errors = list(extract_archive(location=input_tarball, target=extract_target))
65+
errors = extract_tar(location=input_tarball, target_dir=extract_target)
6666
images = Image.get_images_from_dir(
6767
extracted_location=str(extract_target),
6868
verify=verify,
@@ -101,9 +101,9 @@ def extract_layers_from_images_to_base_path(base_path, images):
101101

102102
for layer in image.layers:
103103
extract_target = target_path / layer.layer_id
104-
extract_errors = extract_archive(
104+
extract_errors = extract_tar(
105105
location=layer.archive_location,
106-
target=extract_target,
106+
target_dir=extract_target,
107107
)
108108
errors.extend(extract_errors)
109109
layer.extracted_location = str(extract_target)

scanpipe/tests/data/debian_scan_codebase.json

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,41 @@
123123
"manifest_path": "",
124124
"contains_source_code": null,
125125
"extra_data": {
126-
"multi_arch": "same"
126+
"multi_arch": "same",
127+
"missing_file_references": [
128+
{
129+
"md5": "23c8a935fa4fc7290d55cc5df3ef56b1",
130+
"path": "lib/x86_64-linux-gnu/libncurses.so.5.9",
131+
"sha1": null,
132+
"sha256": null,
133+
"sha512": null,
134+
"extra_data": {}
135+
},
136+
{
137+
"md5": "98b70f283324e89db5787a018a54adf4",
138+
"path": "usr/lib/x86_64-linux-gnu/libform.so.5.9",
139+
"sha1": null,
140+
"sha256": null,
141+
"sha512": null,
142+
"extra_data": {}
143+
},
144+
{
145+
"md5": "e3a0f5154928da2da234920343ac14b2",
146+
"path": "usr/lib/x86_64-linux-gnu/libmenu.so.5.9",
147+
"sha1": null,
148+
"sha256": null,
149+
"sha512": null,
150+
"extra_data": {}
151+
},
152+
{
153+
"md5": "a927e7d76753bb85f5a784b653d337d2",
154+
"path": "usr/lib/x86_64-linux-gnu/libpanel.so.5.9",
155+
"sha1": null,
156+
"sha256": null,
157+
"sha512": null,
158+
"extra_data": {}
159+
}
160+
]
127161
},
128162
"missing_resources": [],
129163
"modified_resources": [],
@@ -161,7 +195,25 @@
161195
"manifest_path": "",
162196
"contains_source_code": null,
163197
"extra_data": {
164-
"multi_arch": "same"
198+
"multi_arch": "same",
199+
"missing_file_references": [
200+
{
201+
"md5": "5d26434efecc08048ab72357af804ef7",
202+
"path": "usr/lib/x86_64-linux-gnu/libndp.so.0.0.2",
203+
"sha1": null,
204+
"sha256": null,
205+
"sha512": null,
206+
"extra_data": {}
207+
},
208+
{
209+
"md5": "60d977e0c9a9fb07c1f8ae3090ea6f48",
210+
"path": "usr/share/doc/libndp0/changelog.Debian.gz",
211+
"sha1": null,
212+
"sha256": null,
213+
"sha512": null,
214+
"extra_data": {}
215+
}
216+
]
165217
},
166218
"missing_resources": [],
167219
"modified_resources": [],
@@ -195,7 +247,6 @@
195247
"extension": "",
196248
"programming_language": "",
197249
"mime_type": "text/plain",
198-
"file_type": "ASCII text",
199250
"is_binary": false,
200251
"is_text": true,
201252
"is_archive": false,
@@ -223,7 +274,6 @@
223274
"extension": "",
224275
"programming_language": "",
225276
"mime_type": "text/plain",
226-
"file_type": "UTF-8 Unicode text",
227277
"is_binary": false,
228278
"is_text": true,
229279
"is_archive": false,
@@ -251,16 +301,17 @@
251301
"extension": "",
252302
"programming_language": "Haxe",
253303
"mime_type": "text/plain",
254-
"file_type": "ASCII text",
255304
"is_binary": false,
256305
"is_text": true,
257306
"is_archive": false,
258307
"is_key_file": false,
259308
"is_media": false
260309
},
261310
{
262-
"for_packages": [],
263-
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5_amd64.md5sums",
311+
"for_packages": [
312+
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
313+
],
314+
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5:amd64.md5sums",
264315
"sha1": "e5ff875218d4f909576575b0471feb0e5230a861",
265316
"md5": "9d18792b91935a5849328cb368005ec9",
266317
"extra_data": {},
@@ -271,22 +322,23 @@
271322
"license_expressions": [],
272323
"emails": [],
273324
"urls": [],
274-
"status": "no-licenses",
325+
"status": "system-package",
275326
"type": "file",
276-
"name": "libncurses5_amd64",
327+
"name": "libncurses5:amd64",
277328
"extension": ".md5sums",
278329
"programming_language": "",
279330
"mime_type": "text/plain",
280-
"file_type": "ASCII text",
281331
"is_binary": false,
282332
"is_text": true,
283333
"is_archive": false,
284334
"is_key_file": false,
285335
"is_media": false
286336
},
287337
{
288-
"for_packages": [],
289-
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0_amd64.md5sums",
338+
"for_packages": [
339+
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
340+
],
341+
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0:amd64.md5sums",
290342
"sha1": "c212d44c6649df5ff13ec447f4fa30faf81fc490",
291343
"md5": "7cb818062922c437df1902c18862455a",
292344
"extra_data": {},
@@ -297,13 +349,12 @@
297349
"license_expressions": [],
298350
"emails": [],
299351
"urls": [],
300-
"status": "no-licenses",
352+
"status": "system-package",
301353
"type": "file",
302-
"name": "libndp0_amd64",
354+
"name": "libndp0:amd64",
303355
"extension": ".md5sums",
304356
"programming_language": "",
305357
"mime_type": "text/plain",
306-
"file_type": "ASCII text",
307358
"is_binary": false,
308359
"is_text": true,
309360
"is_archive": false,
@@ -356,7 +407,6 @@
356407
"extension": "",
357408
"programming_language": "Haxe",
358409
"mime_type": "text/plain",
359-
"file_type": "ASCII text",
360410
"is_binary": false,
361411
"is_text": true,
362412
"is_archive": false,
796 Bytes
Binary file not shown.

scanpipe/tests/data/docker-images.tar.gz-expected-data-1.json

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"variant": null,
4242
"labels": [],
4343
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
44-
"size": 30,
44+
"size": 10240,
4545
"is_empty_layer": false,
4646
"author": null,
4747
"created": "2016-12-27T18:17:13.762716133Z",
@@ -59,7 +59,7 @@
5959
"variant": null,
6060
"labels": [],
6161
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
62-
"size": 30,
62+
"size": 10240,
6363
"is_empty_layer": false,
6464
"author": "Ross Fairbanks \"[email protected]\"",
6565
"created": "2017-01-03T13:15:58.410035553Z",
@@ -77,7 +77,7 @@
7777
"variant": null,
7878
"labels": [],
7979
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
80-
"size": 30,
80+
"size": 10240,
8181
"is_empty_layer": false,
8282
"author": "Ross Fairbanks \"[email protected]\"",
8383
"created": "2017-01-17T11:17:46.675078318Z",
@@ -95,7 +95,7 @@
9595
"variant": null,
9696
"labels": [],
9797
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
98-
"size": 30,
98+
"size": 10240,
9999
"is_empty_layer": false,
100100
"author": "Ross Fairbanks \"[email protected]\"",
101101
"created": "2017-01-17T11:17:48.829523581Z",
@@ -216,7 +216,7 @@
216216
"variant": null,
217217
"labels": [],
218218
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
219-
"size": 30,
219+
"size": 10240,
220220
"is_empty_layer": false,
221221
"author": null,
222222
"created": "2020-04-24T01:05:03.608058404Z",
@@ -234,7 +234,7 @@
234234
"variant": null,
235235
"labels": [],
236236
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
237-
"size": 30,
237+
"size": 10240,
238238
"is_empty_layer": false,
239239
"author": null,
240240
"created": "2020-10-08T16:23:14.227103847Z",
@@ -252,7 +252,7 @@
252252
"variant": null,
253253
"labels": [],
254254
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
255-
"size": 30,
255+
"size": 10240,
256256
"is_empty_layer": false,
257257
"author": null,
258258
"created": "2020-10-08T16:23:16.985023204Z",
@@ -270,7 +270,7 @@
270270
"variant": null,
271271
"labels": [],
272272
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
273-
"size": 30,
273+
"size": 10240,
274274
"is_empty_layer": false,
275275
"author": null,
276276
"created": "2020-10-08T16:23:17.192305843Z",
@@ -288,7 +288,7 @@
288288
"variant": null,
289289
"labels": [],
290290
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
291-
"size": 30,
291+
"size": 10240,
292292
"is_empty_layer": false,
293293
"author": null,
294294
"created": "2020-10-11T21:20:59.851868447Z",
@@ -306,7 +306,7 @@
306306
"variant": null,
307307
"labels": [],
308308
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
309-
"size": 30,
309+
"size": 10240,
310310
"is_empty_layer": false,
311311
"author": null,
312312
"created": "2020-10-11T21:21:00.668316194Z",

scanpipe/tests/data/docker-images.tar.gz-expected-data-2.json

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
"variant": null,
4242
"labels": [],
4343
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
44-
"size": 30,
44+
"size": 10240,
4545
"is_empty_layer": false,
4646
"author": null,
4747
"created": "2016-12-27T18:17:13.762716133Z",
@@ -59,7 +59,7 @@
5959
"variant": null,
6060
"labels": [],
6161
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
62-
"size": 30,
62+
"size": 10240,
6363
"is_empty_layer": false,
6464
"author": "Ross Fairbanks \"[email protected]\"",
6565
"created": "2017-01-03T13:15:58.410035553Z",
@@ -77,7 +77,7 @@
7777
"variant": null,
7878
"labels": [],
7979
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
80-
"size": 30,
80+
"size": 10240,
8181
"is_empty_layer": false,
8282
"author": "Ross Fairbanks \"[email protected]\"",
8383
"created": "2017-01-17T11:17:46.675078318Z",
@@ -95,7 +95,7 @@
9595
"variant": null,
9696
"labels": [],
9797
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
98-
"size": 30,
98+
"size": 10240,
9999
"is_empty_layer": false,
100100
"author": "Ross Fairbanks \"[email protected]\"",
101101
"created": "2017-01-17T11:17:48.829523581Z",
@@ -216,7 +216,7 @@
216216
"variant": null,
217217
"labels": [],
218218
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
219-
"size": 30,
219+
"size": 10240,
220220
"is_empty_layer": false,
221221
"author": null,
222222
"created": "2020-04-24T01:05:03.608058404Z",
@@ -234,7 +234,7 @@
234234
"variant": null,
235235
"labels": [],
236236
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
237-
"size": 30,
237+
"size": 10240,
238238
"is_empty_layer": false,
239239
"author": null,
240240
"created": "2020-10-08T16:23:14.227103847Z",
@@ -252,7 +252,7 @@
252252
"variant": null,
253253
"labels": [],
254254
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
255-
"size": 30,
255+
"size": 10240,
256256
"is_empty_layer": false,
257257
"author": null,
258258
"created": "2020-10-08T16:23:16.985023204Z",
@@ -270,7 +270,7 @@
270270
"variant": null,
271271
"labels": [],
272272
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
273-
"size": 30,
273+
"size": 10240,
274274
"is_empty_layer": false,
275275
"author": null,
276276
"created": "2020-10-08T16:23:17.192305843Z",
@@ -288,7 +288,7 @@
288288
"variant": null,
289289
"labels": [],
290290
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
291-
"size": 30,
291+
"size": 10240,
292292
"is_empty_layer": false,
293293
"author": null,
294294
"created": "2020-10-11T21:20:59.851868447Z",
@@ -306,7 +306,7 @@
306306
"variant": null,
307307
"labels": [],
308308
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
309-
"size": 30,
309+
"size": 10240,
310310
"is_empty_layer": false,
311311
"author": null,
312312
"created": "2020-10-11T21:21:00.668316194Z",

scanpipe/tests/test_pipelines.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ def test_scanpipe_docker_pipeline_alpine_integration_test(self):
382382
exitcode, out = pipeline.execute()
383383
self.assertEqual(0, exitcode, msg=out)
384384

385-
self.assertEqual(83, project1.codebaseresources.count())
385+
self.assertEqual(109, project1.codebaseresources.count())
386386
self.assertEqual(14, project1.discoveredpackages.count())
387387

388388
result_file = output.to_json(project1)
@@ -431,7 +431,7 @@ def test_scanpipe_docker_pipeline_debian_integration_test(self):
431431

432432
result_file = output.to_json(project1)
433433
expected_file = self.data_location / "debian_scan_codebase.json"
434-
self.assertPipelineResultEqual(expected_file, result_file, regen=False)
434+
self.assertPipelineResultEqual(expected_file, result_file, regen=True)
435435

436436
def test_scanpipe_rootfs_pipeline_integration_test(self):
437437
pipeline_name = "root_filesystems"

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ install_requires =
6868
# WSGI server
6969
gunicorn==20.1.0
7070
# Docker
71-
container_inspector==31.0.0
71+
container_inspector==31.1.0
7272
# ScanCode-toolkit
7373
scancode-toolkit[packages]==31.0.0rc2
7474
extractcode[full]==31.0.0

0 commit comments

Comments
 (0)