From 53b524a0521ec179608637f09860a4aec5349d34 Mon Sep 17 00:00:00 2001 From: mkosiarc Date: Thu, 28 Nov 2024 09:52:15 +0100 Subject: [PATCH] Improve generation of base images SBOMs The process how the base image SBOM is generated was slightly changed. The most important change is due to requiring different inputs. The script now requires the full json of parsed Dockerfile, so we can extract the base images better. This allows us to extract base images data even for stage aliases. Another advantage is that we are counting the stages/layers more carefully, because we have more information about which stage was from scratch or from oci-archive. KFLUXBUGS-1718 Signed-off-by: mkosiarc --- .../scripts/base-images-sbom-script/README.md | 6 +- .../app/base_images_sbom_script.py | 126 ++++- .../app/test_base_images_sbom_script.py | 492 +++++++++++++++--- 3 files changed, 538 insertions(+), 86 deletions(-) diff --git a/sbom-utility-scripts/scripts/base-images-sbom-script/README.md b/sbom-utility-scripts/scripts/base-images-sbom-script/README.md index 7a416de1..72f8eb53 100644 --- a/sbom-utility-scripts/scripts/base-images-sbom-script/README.md +++ b/sbom-utility-scripts/scripts/base-images-sbom-script/README.md @@ -5,8 +5,10 @@ This is a script that creates sbom data for base images. It is used in It takes several inputs: 1. path to the sbom file, that will be updated in place with the base image data -2. path to a file containing base images as taken from from the dockerfile (with preserved order) -3. path to a file containing base images with digests, generated from the output of **buildah images --format '{{ .Name }}:{{ .Tag }}@{{ .Digest }}'**. The dockerfile order must be preserved as well +2. path to a json file containing parsed Dockerfile via dockerfile-json +3. path to a file containing base images references as used during in the Dockerfile mapped to the full image references +with digests. This mapping is expected to be in the format **|**. +The full image reference with digest is generated from the output of **buildah images --format '{{ .Name }}:{{ .Tag }}@{{ .Digest }}'**. diff --git a/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py b/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py index ce2221ff..b1c4b129 100644 --- a/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py +++ b/sbom-utility-scripts/scripts/base-images-sbom-script/app/base_images_sbom_script.py @@ -33,29 +33,48 @@ def parse_image_reference_to_parts(image): return ParsedImage(repository=repository, digest=digest, name=name) -def get_base_images_sbom_components(base_images_digests, is_last_from_scratch): +def get_base_images_sbom_components(base_images, base_images_digests): """ Creates the base images sbom data - :param base_images_digests: (List) - list of base images digests, same as BASE_IMAGE_DIGESTS tekton result - :param is_last_from_scratch: (Boolean) - Is the last stage/base image from scratch? + :param base_images: (List) - List of base images used during build, in the order they were used. + :param base_images_digests: (Dict) - Dict of base images digests, where the key is the image reference as + used in the original Dockerfile and the values are the full image reference + with digests that was actually used by buildah during build time. :return: components (List) - List of dict items in which each item contains sbom data about each base image """ components = [] already_used_base_images = set() - # property_name shows whether the image was used only in the building process - # or if it is the final base image. If the final base image is scratch, then - # this is omitted, because we aren't including scratch in the sbom. - for index, image in enumerate(base_images_digests): + for index, image in enumerate(base_images): + # flatpak archive and scratch are not real base images. So we skip them, but + # in a way that allows us to keep the correct track of index variable that + # refers to stage number. + if image.startswith("oci-archive") or image == "scratch": + continue + + # property_name shows whether the image was used only in the building process + # or if it is the final base image. property_name = "konflux:container:is_builder_image:for_stage" property_value = str(index) - if index == len(base_images_digests) - 1 and not is_last_from_scratch: + + # This is not reached if the last "image" was scratch or oci-archive. + # That is because we don't consider them base images, and we aren't putting + # them in SBOM + if index == len(base_images) - 1: property_name = "konflux:container:is_base_image" property_value = "true" - parsed_image = parse_image_reference_to_parts(image) + # It could happen that we have a base image from the parsed Dockerfile, but we don't have + # a digest reference for it. This could happen when buildah skipped the stage, due to optimization + # when it is unreachable, or redundant. Since in this case, it was not used in the actual build, + # it is ok to just skip these stages + base_image_digest = base_images_digests.get(image) + if not base_image_digest: + continue + + parsed_image = parse_image_reference_to_parts(base_images_digests[image]) purl = PackageURL( type="oci", @@ -87,23 +106,77 @@ def get_base_images_sbom_components(base_images_digests, is_last_from_scratch): return components +def get_base_images_from_dockerfile(parsed_dockerfile): + """ + Reads the base images from provided parsed dockerfile + + :param parsed_dockerfile: (Dict) - Contents of the parsed dockerfile + :return: base_images (List) - List of base images used during build as extracted + from the dockerfile in the order they were used. + + Example: + If the Dockerfile looks like + FROM registry.access.redhat.com/ubi8/ubi:latest as builder + ... + FROM builder + ... + + Then the relevant part of parsed_dockerfile look like + { + "Stages": [ + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "As": "builder", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + { + "BaseName": "builder", + "From": {"Stage": {"Named": "builder", "Index": 0}}, + }, + ] + }, + """ + base_images = [] + + # this part of the json is the relevant one that contains the + # info about base images + stages = parsed_dockerfile["Stages"] + + for stage in stages: + if "Image" in stage["From"]: + base_images.append(stage["From"]["Image"]) + elif "Scratch" in stage["From"]: + base_images.append("scratch") + elif "Stage" in stage["From"]: + stage_refering_to_index = stage["From"]["Stage"]["Index"] + # Find the original stage/image. Named stage can refer to another named stage, + # so continue looking until we find the image those stages refer to. + while stage_refering_to_index is not None and stage_refering_to_index != {}: + refered_stage = stages[stage_refering_to_index] + stage_refering_to_index = refered_stage.get("From").get("Stage", {}).get("Index", None) + if stage_refering_to_index is None or stage_refering_to_index == {}: + base_images.append(refered_stage["From"]["Image"]) + + return base_images + + def parse_args(): parser = argparse.ArgumentParser( description="Updates the sbom file with base images data based on the provided files" ) parser.add_argument("--sbom", type=pathlib.Path, help="Path to the sbom file", required=True) parser.add_argument( - "--base-images-from-dockerfile", + "--parsed-dockerfile", type=pathlib.Path, - help="Path to the file containing base images extracted from Dockerfile via grep, sed and awk in the buildah " - "task", + help="Path to the file containing parsed Dockerfile in json format extracted " + "from dockerfile-json in buildah task", required=True, ) parser.add_argument( "--base-images-digests", type=pathlib.Path, help="Path to the file containing base images digests." - " This is taken from the BASE_IMAGES_DIGEST tekton result that was generated from" + " This is taken from the base_images_digests file that was generated from" "the output of 'buildah images'", required=True, ) @@ -116,21 +189,28 @@ def main(): args = parse_args() - base_images_from_dockerfile = args.base_images_from_dockerfile.read_text().splitlines() - base_images_digests = args.base_images_digests.read_text().splitlines() + with args.parsed_dockerfile.open("r") as f: + parsed_dockerfile = json.load(f) - is_last_from_scratch = False - if base_images_from_dockerfile[-1] == "scratch": - is_last_from_scratch = True + base_images = get_base_images_from_dockerfile(parsed_dockerfile) + + base_images_digests_raw = args.base_images_digests.read_text().splitlines() + base_images_digests = {} + for item in base_images_digests_raw: + image_reference, digest = item.split("|") + base_images_digests[image_reference] = digest with args.sbom.open("r") as f: sbom = json.load(f) - base_images_sbom_components = get_base_images_sbom_components(base_images_digests, is_last_from_scratch) - if "formulation" in sbom: - sbom["formulation"].append({"components": base_images_sbom_components}) - else: - sbom.update({"formulation": [{"components": base_images_sbom_components}]}) + base_images_sbom_components = get_base_images_sbom_components(base_images, base_images_digests) + + # base_images_sbom_components could be empty, when having just one stage FROM scratch + if base_images_sbom_components: + if "formulation" in sbom: + sbom["formulation"].append({"components": base_images_sbom_components}) + else: + sbom.update({"formulation": [{"components": base_images_sbom_components}]}) with args.sbom.open("w") as f: json.dump(sbom, f, indent=4) diff --git a/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py b/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py index 6d36dcc3..ebe39a54 100644 --- a/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py +++ b/sbom-utility-scripts/scripts/base-images-sbom-script/app/test_base_images_sbom_script.py @@ -3,21 +3,29 @@ from unittest.mock import MagicMock -from base_images_sbom_script import get_base_images_sbom_components, main, parse_image_reference_to_parts, ParsedImage +from base_images_sbom_script import ( + get_base_images_sbom_components, + get_base_images_from_dockerfile, + main, + parse_image_reference_to_parts, + ParsedImage, +) @pytest.mark.parametrize( - "base_images_digests, is_last_from_scratch, expected_result", + "base_images, base_images_digests, expected_result", [ - # two builder images, last base image is from scratch + # two builder images, last stage is from scratch ( [ - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", - "registry.access.redhat.com/ubi8/ubi:latest@sha256" - ":627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "registry.access.redhat.com/ubi8/ubi:latest", + "scratch", ], - True, + { + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256:8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "registry.access.redhat.com/ubi8/ubi:latest": "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", # noqa + }, [ { "type": "container", @@ -49,12 +57,13 @@ # one builder image, one parent image ( [ - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", - "registry.access.redhat.com/ubi8/ubi:latest@sha256" - ":627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "registry.access.redhat.com/ubi8/ubi:latest", ], - False, + { + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256:8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "registry.access.redhat.com/ubi8/ubi:latest": "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", # noqa + }, [ { "type": "container", @@ -80,11 +89,10 @@ ), # just one parent image ( - [ - "registry.access.redhat.com/ubi8/ubi:latest@sha256" - ":627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", - ], - False, + ["registry.access.redhat.com/ubi8/ubi:latest"], + { + "registry.access.redhat.com/ubi8/ubi:latest": "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", # noqa + }, [ { "type": "container", @@ -95,13 +103,12 @@ }, ], ), - # one builder, last base image from scratch + # one builder, last stage from scratch ( - [ - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", - ], - True, + ["quay.io/mkosiarc_rhtap/single-container-app:f2566ab", "scratch"], + { + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256:8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + }, [ { "type": "container", @@ -118,7 +125,7 @@ }, ], ), - # four builder images, and from scratch base image + # four builder images, and from scratch in last stage ( [ "quay.io/builder1/builder1:aaaaaaa@sha256" @@ -129,8 +136,14 @@ ":3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", "quay.io/builder4/builder4:ddddddd@sha256" ":4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944", + "scratch", ], - True, + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943": "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", # noqa + "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944": "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944", # noqa + }, [ { "type": "container", @@ -196,7 +209,13 @@ "registry.access.redhat.com/ubi8/ubi:latest@sha256" ":627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", ], - False, + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943": "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", # noqa + "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944": "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944", # noqa + "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac": "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", # noqa + }, [ { "type": "container", @@ -255,7 +274,7 @@ }, ], ), - # 3 builders and one final base image. builder 1 is reused twice, resulting in multiple properties + # 3 builders and one final base image. builder 1 is reused three times, resulting in multiple properties ( [ "quay.io/builder1/builder1:aaaaaaa@sha256" @@ -271,7 +290,13 @@ "registry.access.redhat.com/ubi8/ubi:latest@sha256" ":627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", ], - False, + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943": "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", # noqa + "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944": "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944", # noqa + "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac": "registry.access.redhat.com/ubi8/ubi:latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac", # noqa + }, [ { "type": "container", @@ -331,7 +356,7 @@ }, ], ), - # 3 builders and final base image is scratch. builder 1 is reused twice, resulting in multiple properties + # 3 builders and final base image is scratch. builder 1 is reused three times, resulting in multiple properties ( [ "quay.io/builder1/builder1:aaaaaaa@sha256" @@ -344,8 +369,14 @@ ":3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", "quay.io/builder1/builder1:aaaaaaa@sha256" ":1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", + "scratch", ], - True, + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943": "quay.io/builder3/builder3:ccccccc@sha256:3f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420943", # noqa + "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944": "quay.io/builder4/builder4:ddddddd@sha256:4f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420944", # noqa + }, [ { "type": "container", @@ -403,7 +434,10 @@ "quay.io/builder1/builder1:aaaaaaa@sha256" ":1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", ], - False, + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + }, [ { "type": "container", @@ -435,17 +469,109 @@ }, ], ), + # Two images, both reused and several oci-archives and from scratch layers + ( + [ + "quay.io/builder1/builder1:aaaaaaa@sha256" + ":1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", + "scratch", + "quay.io/builder2/builder2:bbbbbbb@sha256" + ":2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", + "scratch", + "quay.io/builder1/builder1:aaaaaaa@sha256" + ":1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", + "oci-archive:export/out.ociarchive", + "quay.io/builder2/builder2:bbbbbbb@sha256" + ":2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", + "oci-archive:export/out.ociarchive", + "quay.io/builder1/builder1:aaaaaaa@sha256" + ":1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", + ], + { + "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941": "quay.io/builder1/builder1:aaaaaaa@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942": "quay.io/builder2/builder2:bbbbbbb@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942", # noqa + }, + [ + { + "type": "container", + "name": "quay.io/builder1/builder1", + "purl": "pkg:oci/builder1@sha256:1f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941" + "?repository_url=quay.io/builder1/builder1", + "properties": [ + { + "name": "konflux:container:is_builder_image:for_stage", + "value": "0", + }, + { + "name": "konflux:container:is_builder_image:for_stage", + "value": "4", + }, + { + "name": "konflux:container:is_base_image", + "value": "true", + }, + ], + }, + { + "type": "container", + "name": "quay.io/builder2/builder2", + "purl": "pkg:oci/builder2@sha256:2f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420942" + "?repository_url=quay.io/builder2/builder2", + "properties": [ + { + "name": "konflux:container:is_builder_image:for_stage", + "value": "2", + }, + { + "name": "konflux:container:is_builder_image:for_stage", + "value": "6", + }, + ], + }, + ], + ), + # one builder, last stage from oci-archive + ( + [ + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "oci-archive:export/out.ociarchive", + ], + { + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256:8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941", # noqa + }, + [ + { + "type": "container", + "name": "quay.io/mkosiarc_rhtap/single-container-app", + "purl": "pkg:oci/single-container-app@sha256" + ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941?repository_url=quay.io" + "/mkosiarc_rhtap/single-container-app", + "properties": [ + { + "name": "konflux:container:is_builder_image:for_stage", + "value": "0", + } + ], + }, + ], + ), + # just from scratch + ( + ["scratch"], + {}, # empty base_images_digests + [], # SBOM not created + ), ], ) -def test_get_base_images_sbom_components(base_images_digests, is_last_from_scratch, expected_result): - result = get_base_images_sbom_components(base_images_digests, is_last_from_scratch) +def test_get_base_images_sbom_components(base_images, base_images_digests, expected_result): + result = get_base_images_sbom_components(base_images, base_images_digests) assert result == expected_result def test_main_input_sbom_does_not_contain_formulation(tmp_path, mocker): sbom_file = tmp_path / "sbom.json" - base_images_from_dockerfile_file = tmp_path / "base_images_from_dockerfile.txt" - base_images_digests_file = tmp_path / "base_images_digests.txt" + parsed_dockerfile = tmp_path / "parsed_dockerfile.json" + base_images_digests_raw_file = tmp_path / "base_images_digests.txt" # minimal input sbom file sbom_file.write_text( @@ -457,20 +583,38 @@ def test_main_input_sbom_does_not_contain_formulation(tmp_path, mocker): ) # one builder images and one base image - base_images_from_dockerfile_file.write_text( - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab\nregistry.access.redhat.com/ubi8/ubi:latest" + parsed_dockerfile.write_text( + """ + { + "Stages": [ + { + "From": { + "Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab" + } + }, + { + "From": { + "Image": "registry.access.redhat.com/ubi8/ubi:latest" + } + } + ] + } + """ ) - base_images_digests_file.write_text( + base_images_digests_raw_file.write_text( + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab|" "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\nregistry.access.redhat.com/ubi8/ubi" - ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac " + ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\n" + "registry.access.redhat.com/ubi8/ubi:latest|" + "registry.access.redhat.com/ubi8/ubi" + ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac" ) # mock the parsed args, to avoid testing parse_args function mock_args = MagicMock() mock_args.sbom = sbom_file - mock_args.base_images_from_dockerfile = base_images_from_dockerfile_file - mock_args.base_images_digests = base_images_digests_file + mock_args.parsed_dockerfile = parsed_dockerfile + mock_args.base_images_digests = base_images_digests_raw_file mocker.patch("base_images_sbom_script.parse_args", return_value=mock_args) main() @@ -518,8 +662,8 @@ def test_main_input_sbom_does_not_contain_formulation(tmp_path, mocker): def test_main_input_sbom_does_not_contain_formulation_and_base_image_from_scratch(tmp_path, mocker): sbom_file = tmp_path / "sbom.json" - base_images_from_dockerfile_file = tmp_path / "base_images_from_dockerfile.txt" - base_images_digests_file = tmp_path / "base_images_digests.txt" + parsed_dockerfile = tmp_path / "parsed_dockerfile.json" + base_images_digests_raw_file = tmp_path / "base_images_digests.txt" # minimal input sbom file sbom_file.write_text( @@ -531,20 +675,43 @@ def test_main_input_sbom_does_not_contain_formulation_and_base_image_from_scratc ) # two builder images and the last one is from scratch - base_images_from_dockerfile_file.write_text( - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab\nregistry.access.redhat.com/ubi8/ubi:latest\nscratch" + parsed_dockerfile.write_text( + """ + { + "Stages": [ + { + "From": { + "Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab" + } + }, + { + "From": { + "Image": "registry.access.redhat.com/ubi8/ubi:latest" + } + }, + { + "From": { + "Scratch": true + } + } + ] + } + """ ) - base_images_digests_file.write_text( + base_images_digests_raw_file.write_text( + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab|" "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\nregistry.access.redhat.com/ubi8/ubi" - ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac " + ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\n" + "registry.access.redhat.com/ubi8/ubi:latest|" + "registry.access.redhat.com/ubi8/ubi" + ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac" ) # mock the parsed args, to avoid testing parse_args function mock_args = MagicMock() mock_args.sbom = sbom_file - mock_args.base_images_from_dockerfile = base_images_from_dockerfile_file - mock_args.base_images_digests = base_images_digests_file + mock_args.parsed_dockerfile = parsed_dockerfile + mock_args.base_images_digests = base_images_digests_raw_file mocker.patch("base_images_sbom_script.parse_args", return_value=mock_args) main() @@ -592,8 +759,8 @@ def test_main_input_sbom_does_not_contain_formulation_and_base_image_from_scratc def test_main_input_sbom_contains_formulation(tmp_path, mocker): sbom_file = tmp_path / "sbom.json" - base_images_from_dockerfile_file = tmp_path / "base_images_from_dockerfile.txt" - base_images_digests_file = tmp_path / "base_images_digests.txt" + parsed_dockerfile = tmp_path / "parsed_dockerfile.json" + base_images_digests_raw_file = tmp_path / "base_images_digests.txt" # minimal sbom with existing formulation that contains components item sbom_file.write_text( @@ -619,20 +786,43 @@ def test_main_input_sbom_contains_formulation(tmp_path, mocker): ) # two builder images and the last one is from scratch - base_images_from_dockerfile_file.write_text( - "quay.io/mkosiarc_rhtap/single-container-app:f2566ab\nregistry.access.redhat.com/ubi8/ubi:latest\nscratch" + parsed_dockerfile.write_text( + """ + { + "Stages": [ + { + "From": { + "Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab" + } + }, + { + "From": { + "Image": "registry.access.redhat.com/ubi8/ubi:latest" + } + }, + { + "From": { + "Scratch": true + } + } + ] + } + """ ) - base_images_digests_file.write_text( + base_images_digests_raw_file.write_text( + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab|" "quay.io/mkosiarc_rhtap/single-container-app:f2566ab@sha256" - ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\nregistry.access.redhat.com/ubi8/ubi" - ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac " + ":8f99627e843e931846855c5d899901bf093f5093e613a92745696a26b5420941\n" + "registry.access.redhat.com/ubi8/ubi:latest|" + "registry.access.redhat.com/ubi8/ubi" + ":latest@sha256:627867e53ad6846afba2dfbf5cef1d54c868a9025633ef0afd546278d4654eac" ) # mock the parsed args, to avoid testing parse_args function mock_args = MagicMock() mock_args.sbom = sbom_file - mock_args.base_images_from_dockerfile = base_images_from_dockerfile_file - mock_args.base_images_digests = base_images_digests_file + mock_args.parsed_dockerfile = parsed_dockerfile + mock_args.base_images_digests = base_images_digests_raw_file mocker.patch("base_images_sbom_script.parse_args", return_value=mock_args) main() @@ -722,3 +912,183 @@ def test_main_input_sbom_contains_formulation(tmp_path, mocker): def test_parse_image_reference_to_parts(image, expected_parsed_image): parsed_image = parse_image_reference_to_parts(image) assert parsed_image == expected_parsed_image + + +@pytest.mark.parametrize( + "parsed_dockerfile, expected_base_images", + [ + # basic example + # FROM quay.io/mkosiarc_rhtap/single-container-app:f2566ab + # ... + # FROM registry.access.redhat.com/ubi8/ubi:latest + # ... + ( + { + "Stages": [ + { + "BaseName": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "From": {"Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab"}, + }, + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + ] + }, + [ + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "registry.access.redhat.com/ubi8/ubi:latest", + ], + ), + # basic example with scratch stage + # FROM quay.io/mkosiarc_rhtap/single-container-app:f2566ab + # ... + # FROM registry.access.redhat.com/ubi8/ubi:latest + # ... + # FROM scratch + # ... + ( + { + "Stages": [ + { + "BaseName": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "From": {"Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab"}, + }, + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + {"BaseName": "scratch", "From": {"Scratch": True}}, + ] + }, + [ + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "registry.access.redhat.com/ubi8/ubi:latest", + "scratch", + ], + ), + # just from scratch + ( + { + "Stages": [ + {"BaseName": "scratch", "From": {"Scratch": True}}, + ] + }, + [ + "scratch", + ], + ), + # Multiple images which are reused, including two scratch stages and two oci-archive stages + # FROM quay.io/mkosiarc_rhtap/single-container-app:f2566ab + # ... + # FROM scratch + # ... + # FROM quay.io/mkosiarc_rhtap/single-container-app:f2566ab + # ... + # FROM oci-archive:export/out.ociarchive + # ... + # FROM registry.access.redhat.com/ubi8/ubi:latest + # ... + # FROM scratch + # ... + # FROM oci-archive:export/out.ociarchive + # ... + ( + { + "Stages": [ + { + "BaseName": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "From": {"Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab"}, + }, + {"BaseName": "scratch", "From": {"Scratch": True}}, + { + "BaseName": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "From": {"Image": "quay.io/mkosiarc_rhtap/single-container-app:f2566ab"}, + }, + { + "BaseName": "oci-archive:export/out.ociarchive", + "From": {"Image": "oci-archive:export/out.ociarchive"}, + }, + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + {"BaseName": "scratch", "From": {"Scratch": True}}, + { + "BaseName": "oci-archive:export/out.ociarchive", + "From": {"Image": "oci-archive:export/out.ociarchive"}, + }, + ] + }, + [ + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "scratch", + "quay.io/mkosiarc_rhtap/single-container-app:f2566ab", + "oci-archive:export/out.ociarchive", + "registry.access.redhat.com/ubi8/ubi:latest", + "scratch", + "oci-archive:export/out.ociarchive", + ], + ), + # alias/named stage, so something like + # FROM registry.access.redhat.com/ubi8/ubi:latest as builder + # ... + # FROM builder + # ... + ( + { + "Stages": [ + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "As": "builder", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + { + "BaseName": "builder", + "From": {"Stage": {"Named": "builder", "Index": 0}}, + }, + ] + }, + [ + "registry.access.redhat.com/ubi8/ubi:latest", + "registry.access.redhat.com/ubi8/ubi:latest", + ], + ), + # alias to an alias, so something like + # FROM registry.access.redhat.com/ubi8/ubi:latest as builder + # ... + # FROM builder as stage1 + # ... + # FROM stage1 as stage2 + # ... + ( + { + "Stages": [ + { + "BaseName": "registry.access.redhat.com/ubi8/ubi:latest", + "As": "builder", + "From": {"Image": "registry.access.redhat.com/ubi8/ubi:latest"}, + }, + { + "BaseName": "builder", + "As": "stage1", + "From": {"Stage": {"Named": "builder", "Index": 0}}, + }, + { + "BaseName": "stage1", + "As": "stage2", + "From": {"Stage": {"Named": "stage1", "Index": 1}}, + }, + ] + }, + [ + "registry.access.redhat.com/ubi8/ubi:latest", + "registry.access.redhat.com/ubi8/ubi:latest", + "registry.access.redhat.com/ubi8/ubi:latest", + ], + ), + ], +) +def test_get_base_images_from_dockerfile(parsed_dockerfile, expected_base_images): + actual_base_images = get_base_images_from_dockerfile(parsed_dockerfile) + assert actual_base_images == expected_base_images