Skip to content

Use extract_tar to extract Docker images and layers #453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
from pathlib import Path

from container_inspector.image import Image
from container_inspector.utils import extract_tar

from scanpipe import pipes
from scanpipe.pipes import rootfs
from scanpipe.pipes.scancode import extract_archive

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,7 +62,7 @@ def extract_image_from_tarball(input_tarball, extract_target, verify=True):
Returns the `images` and an `errors` list of error messages that may have
happen during the extraction.
"""
errors = list(extract_archive(location=input_tarball, target=extract_target))
errors = extract_tar(location=input_tarball, target_dir=extract_target)
images = Image.get_images_from_dir(
extracted_location=str(extract_target),
verify=verify,
Expand Down Expand Up @@ -101,9 +101,9 @@ def extract_layers_from_images_to_base_path(base_path, images):

for layer in image.layers:
extract_target = target_path / layer.layer_id
extract_errors = extract_archive(
extract_errors = extract_tar(
location=layer.archive_location,
target=extract_target,
target_dir=extract_target,
)
errors.extend(extract_errors)
layer.extracted_location = str(extract_target)
Expand Down
82 changes: 66 additions & 16 deletions scanpipe/tests/data/debian_scan_codebase.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,41 @@
"manifest_path": "",
"contains_source_code": null,
"extra_data": {
"multi_arch": "same"
"multi_arch": "same",
"missing_file_references": [
{
"md5": "23c8a935fa4fc7290d55cc5df3ef56b1",
"path": "lib/x86_64-linux-gnu/libncurses.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "98b70f283324e89db5787a018a54adf4",
"path": "usr/lib/x86_64-linux-gnu/libform.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "e3a0f5154928da2da234920343ac14b2",
"path": "usr/lib/x86_64-linux-gnu/libmenu.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "a927e7d76753bb85f5a784b653d337d2",
"path": "usr/lib/x86_64-linux-gnu/libpanel.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
}
]
},
"missing_resources": [],
"modified_resources": [],
Expand Down Expand Up @@ -161,7 +195,25 @@
"manifest_path": "",
"contains_source_code": null,
"extra_data": {
"multi_arch": "same"
"multi_arch": "same",
"missing_file_references": [
{
"md5": "5d26434efecc08048ab72357af804ef7",
"path": "usr/lib/x86_64-linux-gnu/libndp.so.0.0.2",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "60d977e0c9a9fb07c1f8ae3090ea6f48",
"path": "usr/share/doc/libndp0/changelog.Debian.gz",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
}
]
},
"missing_resources": [],
"modified_resources": [],
Expand Down Expand Up @@ -195,7 +247,6 @@
"extension": "",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -223,7 +274,6 @@
"extension": "",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "UTF-8 Unicode text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -251,16 +301,17 @@
"extension": "",
"programming_language": "Haxe",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_key_file": false,
"is_media": false
},
{
"for_packages": [],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5_amd64.md5sums",
"for_packages": [
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5:amd64.md5sums",
"sha1": "e5ff875218d4f909576575b0471feb0e5230a861",
"md5": "9d18792b91935a5849328cb368005ec9",
"extra_data": {},
Expand All @@ -271,22 +322,23 @@
"license_expressions": [],
"emails": [],
"urls": [],
"status": "no-licenses",
"status": "system-package",
"type": "file",
"name": "libncurses5_amd64",
"name": "libncurses5:amd64",
"extension": ".md5sums",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_key_file": false,
"is_media": false
},
{
"for_packages": [],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0_amd64.md5sums",
"for_packages": [
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0:amd64.md5sums",
"sha1": "c212d44c6649df5ff13ec447f4fa30faf81fc490",
"md5": "7cb818062922c437df1902c18862455a",
"extra_data": {},
Expand All @@ -297,13 +349,12 @@
"license_expressions": [],
"emails": [],
"urls": [],
"status": "no-licenses",
"status": "system-package",
"type": "file",
"name": "libndp0_amd64",
"name": "libndp0:amd64",
"extension": ".md5sums",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -356,7 +407,6 @@
"extension": "",
"programming_language": "Haxe",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down
Binary file modified scanpipe/tests/data/docker-images.tar.gz
Binary file not shown.
20 changes: 10 additions & 10 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-1.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"variant": null,
"labels": [],
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2016-12-27T18:17:13.762716133Z",
Expand All @@ -59,7 +59,7 @@
"variant": null,
"labels": [],
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-03T13:15:58.410035553Z",
Expand All @@ -77,7 +77,7 @@
"variant": null,
"labels": [],
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:46.675078318Z",
Expand All @@ -95,7 +95,7 @@
"variant": null,
"labels": [],
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:48.829523581Z",
Expand Down Expand Up @@ -216,7 +216,7 @@
"variant": null,
"labels": [],
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-04-24T01:05:03.608058404Z",
Expand All @@ -234,7 +234,7 @@
"variant": null,
"labels": [],
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:14.227103847Z",
Expand All @@ -252,7 +252,7 @@
"variant": null,
"labels": [],
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:16.985023204Z",
Expand All @@ -270,7 +270,7 @@
"variant": null,
"labels": [],
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:17.192305843Z",
Expand All @@ -288,7 +288,7 @@
"variant": null,
"labels": [],
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:20:59.851868447Z",
Expand All @@ -306,7 +306,7 @@
"variant": null,
"labels": [],
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:21:00.668316194Z",
Expand Down
20 changes: 10 additions & 10 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"variant": null,
"labels": [],
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2016-12-27T18:17:13.762716133Z",
Expand All @@ -59,7 +59,7 @@
"variant": null,
"labels": [],
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-03T13:15:58.410035553Z",
Expand All @@ -77,7 +77,7 @@
"variant": null,
"labels": [],
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:46.675078318Z",
Expand All @@ -95,7 +95,7 @@
"variant": null,
"labels": [],
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:48.829523581Z",
Expand Down Expand Up @@ -216,7 +216,7 @@
"variant": null,
"labels": [],
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-04-24T01:05:03.608058404Z",
Expand All @@ -234,7 +234,7 @@
"variant": null,
"labels": [],
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:14.227103847Z",
Expand All @@ -252,7 +252,7 @@
"variant": null,
"labels": [],
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:16.985023204Z",
Expand All @@ -270,7 +270,7 @@
"variant": null,
"labels": [],
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:17.192305843Z",
Expand All @@ -288,7 +288,7 @@
"variant": null,
"labels": [],
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:20:59.851868447Z",
Expand All @@ -306,7 +306,7 @@
"variant": null,
"labels": [],
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:21:00.668316194Z",
Expand Down
4 changes: 2 additions & 2 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def test_scanpipe_docker_pipeline_alpine_integration_test(self):
exitcode, _ = pipeline.execute()
self.assertEqual(0, exitcode)

self.assertEqual(83, project1.codebaseresources.count())
self.assertEqual(109, project1.codebaseresources.count())
self.assertEqual(14, project1.discoveredpackages.count())

result_file = output.to_json(project1)
Expand Down Expand Up @@ -431,7 +431,7 @@ def test_scanpipe_docker_pipeline_debian_integration_test(self):

result_file = output.to_json(project1)
expected_file = self.data_location / "debian_scan_codebase.json"
self.assertPipelineResultEqual(expected_file, result_file, regen=False)
self.assertPipelineResultEqual(expected_file, result_file, regen=True)

def test_scanpipe_rootfs_pipeline_integration_test(self):
pipeline_name = "root_filesystems"
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ install_requires =
# WSGI server
gunicorn==20.1.0
# Docker
container_inspector==31.0.0
container_inspector==31.1.0
# ScanCode-toolkit
scancode-toolkit[packages]==31.0.0rc1
extractcode[full]==31.0.0
Expand Down