From 4da3be864fab86dec963c231366fdad11eaef21c Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Fri, 30 Jun 2023 10:19:16 -0400 Subject: [PATCH] Refactor source API (#1846) * refactor source API and syft json source block Signed-off-by: Alex Goodman * update source detection and format test utils Signed-off-by: Alex Goodman * generate list of all source metadata types Signed-off-by: Alex Goodman * extract base and root normalization into helper functions Signed-off-by: Alex Goodman * preserve syftjson model package name import ref Signed-off-by: Alex Goodman * alias should not be a pointer Signed-off-by: Alex Goodman --------- Signed-off-by: Alex Goodman Signed-off-by: Alex Goodman --- .gitignore | 1 + Makefile | 2 +- cmd/syft/cli/attest/attest.go | 67 +- cmd/syft/cli/eventloop/tasks.go | 14 +- cmd/syft/cli/packages/packages.go | 60 +- cmd/syft/cli/poweruser/poweruser.go | 55 +- internal/constants.go | 2 +- schema/json/README.md | 12 +- schema/json/generate/main.go | 50 - schema/json/internal/generated.go | 39 - schema/json/main_test.go | 39 - schema/json/schema-9.0.0.json | 1881 +++++++++++++++++ .../cataloger/filedigest/cataloger_test.go | 4 +- .../cataloger/filemetadata/cataloger_test.go | 2 +- .../internal/all_regular_files_test.go | 4 +- syft/file/test-fixtures/req-resp/.gitignore | 2 + .../req-resp/path/to/rel-inside.txt | 1 + .../req-resp/path/to/the/file.txt | 1 + .../req-resp/path/to/the/rel-outside.txt | 1 + syft/file/test-fixtures/req-resp/root-link | 1 + .../req-resp/somewhere/outside.txt | 1 + .../common/cyclonedxhelpers/decoder.go | 34 +- .../formats/common/cyclonedxhelpers/format.go | 49 +- .../common/spdxhelpers/document_name.go | 14 +- .../common/spdxhelpers/document_name_test.go | 31 +- .../common/spdxhelpers/document_namespace.go | 16 +- .../spdxhelpers/document_namespace_test.go | 42 +- .../common/spdxhelpers/to_syft_model.go | 23 +- .../common/spdxhelpers/to_syft_model_test.go | 27 +- syft/formats/cyclonedxjson/encoder_test.go | 69 +- .../TestCycloneDxDirectoryEncoder.golden | 12 +- .../snapshot/TestCycloneDxImageEncoder.golden | 16 +- syft/formats/cyclonedxxml/encoder_test.go | 65 +- .../TestCycloneDxDirectoryEncoder.golden | 10 +- .../snapshot/TestCycloneDxImageEncoder.golden | 14 +- syft/formats/github/encoder.go | 107 +- syft/formats/github/encoder_test.go | 182 +- .../internal/testutils/directory_input.go | 204 ++ .../internal/testutils/file_relationships.go | 32 + .../formats/internal/testutils/image_input.go | 113 + syft/formats/internal/testutils/redactor.go | 142 ++ syft/formats/internal/testutils/snapshot.go | 88 + syft/formats/internal/testutils/utils.go | 396 ---- syft/formats/spdxjson/encoder_test.go | 76 +- .../TestSPDXJSONDirectoryEncoder.golden | 8 +- .../snapshot/TestSPDXJSONImageEncoder.golden | 6 +- .../snapshot/TestSPDXRelationshipOrder.golden | 6 +- syft/formats/spdxtagvalue/encoder_test.go | 121 +- .../snapshot/TestSPDXJSONSPDXIDs.golden | 6 +- .../snapshot/TestSPDXRelationshipOrder.golden | 6 +- .../TestSPDXTagValueDirectoryEncoder.golden | 8 +- .../TestSPDXTagValueImageEncoder.golden | 6 +- syft/formats/syftjson/encoder_test.go | 75 +- syft/formats/syftjson/model/package.go | 4 +- syft/formats/syftjson/model/package_test.go | 2 +- syft/formats/syftjson/model/source.go | 101 +- syft/formats/syftjson/model/source_test.go | 203 +- .../snapshot/TestDirectoryEncoder.golden | 12 +- .../TestEncodeFullJSONDocument.golden | 8 +- .../snapshot/TestImageEncoder.golden | 8 +- syft/formats/syftjson/to_format_model.go | 44 +- syft/formats/syftjson/to_format_model_test.go | 176 +- syft/formats/syftjson/to_syft_model.go | 52 +- syft/formats/syftjson/to_syft_model_test.go | 154 +- syft/formats/table/encoder_test.go | 13 +- syft/formats/template/encoder_test.go | 27 +- syft/formats/text/encoder.go | 14 +- syft/formats/text/encoder_test.go | 38 +- .../snapshot/TestTextDirectoryEncoder.golden | 2 +- syft/internal/fileresolver/chroot_context.go | 165 ++ .../fileresolver/chroot_context_test.go | 481 +++++ .../container_image_squash_test.go | 21 + syft/internal/fileresolver/directory.go | 135 +- .../fileresolver/directory_indexer.go | 6 +- syft/internal/fileresolver/excluding_file.go | 4 +- .../fileresolver/excluding_file_test.go | 2 +- .../test-fixtures/req-resp/.gitignore | 2 + .../fileresolver/unindexed_directory_test.go | 21 - syft/internal/generate.go | 4 + syft/internal/jsonschema/README.md | 1 + .../json => syft/internal/jsonschema}/main.go | 71 +- .../packagemetadata/discover_type_names.go | 8 +- .../internal/packagemetadata/generate/main.go | 55 + syft/internal/packagemetadata/generated.go | 10 + syft/internal/packagemetadata/names.go | 13 + syft/internal/packagemetadata/names_test.go | 25 + .../sourcemetadata/completion_tester.go | 69 + .../sourcemetadata/discover_type_names.go | 148 ++ syft/internal/sourcemetadata/generate/main.go | 55 + syft/internal/sourcemetadata/generated.go | 10 + syft/internal/sourcemetadata/names.go | 41 + syft/internal/sourcemetadata/names_test.go | 29 + syft/internal/windows/path.go | 41 + syft/lib.go | 17 +- syft/linux/identify_release_test.go | 2 +- syft/pkg/cataloger/binary/cataloger_test.go | 6 +- .../internal/pkgtest/test_generic_parser.go | 4 +- syft/pkg/cataloger/search_config.go | 4 +- syft/sbom/sbom.go | 2 +- syft/source/alias.go | 13 + syft/source/description.go | 9 + syft/source/detection.go | 200 ++ .../{scheme_test.go => detection_test.go} | 50 +- syft/source/digest_utils.go | 11 + syft/source/directory_source.go | 215 ++ syft/source/directory_source_test.go | 560 +++++ syft/source/directory_source_win_test.go | 65 + syft/source/exclude.go | 5 + syft/source/file_source.go | 280 +++ syft/source/file_source_test.go | 278 +++ syft/source/image_metadata.go | 62 - syft/source/metadata.go | 12 - syft/source/scheme.go | 74 - syft/source/scope.go | 2 +- syft/source/source.go | 626 +----- syft/source/source_test.go | 920 -------- syft/source/source_win_test.go | 54 - syft/source/stereoscope_image_metadata.go | 62 + syft/source/stereoscope_image_source.go | 245 +++ syft/source/stereoscope_image_source_test.go | 243 +++ .../file-index-filter/.1/something | 1 + .../source/test-fixtures/file-index-filter/.2 | 1 + .../test-fixtures/file-index-filter/.vimrc | 1 + .../test-fixtures/file-index-filter/empty | 0 test/integration/catalog_packages_test.go | 8 +- test/integration/utils_test.go | 24 +- 126 files changed, 7384 insertions(+), 3190 deletions(-) delete mode 100644 schema/json/generate/main.go delete mode 100644 schema/json/internal/generated.go delete mode 100644 schema/json/main_test.go create mode 100644 schema/json/schema-9.0.0.json create mode 100644 syft/file/test-fixtures/req-resp/.gitignore create mode 120000 syft/file/test-fixtures/req-resp/path/to/rel-inside.txt create mode 100644 syft/file/test-fixtures/req-resp/path/to/the/file.txt create mode 120000 syft/file/test-fixtures/req-resp/path/to/the/rel-outside.txt create mode 120000 syft/file/test-fixtures/req-resp/root-link create mode 100644 syft/file/test-fixtures/req-resp/somewhere/outside.txt create mode 100644 syft/formats/internal/testutils/directory_input.go create mode 100644 syft/formats/internal/testutils/file_relationships.go create mode 100644 syft/formats/internal/testutils/image_input.go create mode 100644 syft/formats/internal/testutils/redactor.go create mode 100644 syft/formats/internal/testutils/snapshot.go delete mode 100644 syft/formats/internal/testutils/utils.go create mode 100644 syft/internal/fileresolver/chroot_context.go create mode 100644 syft/internal/fileresolver/chroot_context_test.go create mode 100644 syft/internal/fileresolver/test-fixtures/req-resp/.gitignore create mode 100644 syft/internal/generate.go create mode 100644 syft/internal/jsonschema/README.md rename {schema/json => syft/internal/jsonschema}/main.go (56%) rename schema/json/internal/metadata_types.go => syft/internal/packagemetadata/discover_type_names.go (96%) create mode 100644 syft/internal/packagemetadata/generate/main.go create mode 100644 syft/internal/packagemetadata/generated.go create mode 100644 syft/internal/packagemetadata/names.go create mode 100644 syft/internal/packagemetadata/names_test.go create mode 100644 syft/internal/sourcemetadata/completion_tester.go create mode 100644 syft/internal/sourcemetadata/discover_type_names.go create mode 100644 syft/internal/sourcemetadata/generate/main.go create mode 100644 syft/internal/sourcemetadata/generated.go create mode 100644 syft/internal/sourcemetadata/names.go create mode 100644 syft/internal/sourcemetadata/names_test.go create mode 100644 syft/internal/windows/path.go create mode 100644 syft/source/alias.go create mode 100644 syft/source/description.go create mode 100644 syft/source/detection.go rename syft/source/{scheme_test.go => detection_test.go} (88%) create mode 100644 syft/source/digest_utils.go create mode 100644 syft/source/directory_source.go create mode 100644 syft/source/directory_source_test.go create mode 100644 syft/source/directory_source_win_test.go create mode 100644 syft/source/exclude.go create mode 100644 syft/source/file_source.go create mode 100644 syft/source/file_source_test.go delete mode 100644 syft/source/image_metadata.go delete mode 100644 syft/source/metadata.go delete mode 100644 syft/source/scheme.go delete mode 100644 syft/source/source_test.go delete mode 100644 syft/source/source_win_test.go create mode 100644 syft/source/stereoscope_image_metadata.go create mode 100644 syft/source/stereoscope_image_source.go create mode 100644 syft/source/stereoscope_image_source_test.go create mode 100644 syft/source/test-fixtures/file-index-filter/.1/something create mode 100644 syft/source/test-fixtures/file-index-filter/.2 create mode 100644 syft/source/test-fixtures/file-index-filter/.vimrc create mode 100644 syft/source/test-fixtures/file-index-filter/empty diff --git a/.gitignore b/.gitignore index 423be3f90..ab7501da4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/.bin CHANGELOG.md VERSION /test/results diff --git a/Makefile b/Makefile index fd1ee9759..f04590d80 100644 --- a/Makefile +++ b/Makefile @@ -302,7 +302,7 @@ compare-test-rpm-package-install: $(TEMP_DIR) $(SNAPSHOT_DIR) .PHONY: generate-json-schema generate-json-schema: ## Generate a new json schema - cd schema/json && go generate . && go run . + cd syft/internal && go generate . && cd jsonschema && go run . .PHONY: generate-license-list generate-license-list: ## Generate an updated spdx license list diff --git a/cmd/syft/cli/attest/attest.go b/cmd/syft/cli/attest/attest.go index 9264c8e8b..9ed452b35 100644 --- a/cmd/syft/cli/attest/attest.go +++ b/cmd/syft/cli/attest/attest.go @@ -12,6 +12,7 @@ import ( "golang.org/x/exp/slices" "github.com/anchore/stereoscope" + "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/cmd/syft/cli/eventloop" "github.com/anchore/syft/cmd/syft/cli/options" "github.com/anchore/syft/cmd/syft/cli/packages" @@ -34,17 +35,8 @@ func Run(_ context.Context, app *config.Application, args []string) error { return err } - // could be an image or a directory, with or without a scheme - // TODO: validate that source is image + // note: must be a container image userInput := args[0] - si, err := source.ParseInputWithNameVersion(userInput, app.Platform, app.SourceName, app.SourceVersion, app.DefaultImagePullSource) - if err != nil { - return fmt.Errorf("could not generate source input for packages command: %w", err) - } - - if si.Scheme != source.ImageScheme { - return fmt.Errorf("attestations are only supported for oci images at this time") - } eventBus := partybus.NewBus() stereoscope.SetBus(eventBus) @@ -52,7 +44,7 @@ func Run(_ context.Context, app *config.Application, args []string) error { subscription := eventBus.Subscribe() return eventloop.EventLoop( - execWorker(app, *si), + execWorker(app, userInput), eventloop.SetupSignals(), subscription, stereoscope.Cleanup, @@ -60,13 +52,48 @@ func Run(_ context.Context, app *config.Application, args []string) error { ) } -func buildSBOM(app *config.Application, si source.Input, errs chan error) (*sbom.SBOM, error) { - src, cleanup, err := source.New(si, app.Registry.ToOptions(), app.Exclusions) - if cleanup != nil { - defer cleanup() +func buildSBOM(app *config.Application, userInput string, errs chan error) (*sbom.SBOM, error) { + cfg := source.DetectConfig{ + DefaultImageSource: app.DefaultImagePullSource, + } + detection, err := source.Detect(userInput, cfg) + if err != nil { + return nil, fmt.Errorf("could not deteremine source: %w", err) + } + + if detection.IsContainerImage() { + return nil, fmt.Errorf("attestations are only supported for oci images at this time") + } + + var platform *image.Platform + + if app.Platform != "" { + platform, err = image.NewPlatform(app.Platform) + if err != nil { + return nil, fmt.Errorf("invalid platform: %w", err) + } + } + + src, err := detection.NewSource( + source.DetectionSourceConfig{ + Alias: source.Alias{ + Name: app.SourceName, + Version: app.SourceVersion, + }, + RegistryOptions: app.Registry.ToOptions(), + Platform: platform, + Exclude: source.ExcludeConfig{ + Paths: app.Exclusions, + }, + DigestAlgorithms: nil, + }, + ) + + if src != nil { + defer src.Close() } if err != nil { - return nil, fmt.Errorf("failed to construct source from user input %q: %w", si.UserInput, err) + return nil, fmt.Errorf("failed to construct source from user input %q: %w", userInput, err) } s, err := packages.GenerateSBOM(src, errs, app) @@ -75,20 +102,20 @@ func buildSBOM(app *config.Application, si source.Input, errs chan error) (*sbom } if s == nil { - return nil, fmt.Errorf("no SBOM produced for %q", si.UserInput) + return nil, fmt.Errorf("no SBOM produced for %q", userInput) } return s, nil } //nolint:funlen -func execWorker(app *config.Application, si source.Input) <-chan error { +func execWorker(app *config.Application, userInput string) <-chan error { errs := make(chan error) go func() { defer close(errs) defer bus.Publish(partybus.Event{Type: event.Exit}) - s, err := buildSBOM(app, si, errs) + s, err := buildSBOM(app, userInput, errs) if err != nil { errs <- fmt.Errorf("unable to build SBOM: %w", err) return @@ -136,7 +163,7 @@ func execWorker(app *config.Application, si source.Input) <-chan error { predicateType = "custom" } - args := []string{"attest", si.UserInput, "--predicate", f.Name(), "--type", predicateType} + args := []string{"attest", userInput, "--predicate", f.Name(), "--type", predicateType} if app.Attest.Key != "" { args = append(args, "--key", app.Attest.Key) } diff --git a/cmd/syft/cli/eventloop/tasks.go b/cmd/syft/cli/eventloop/tasks.go index 536a39ee6..4c0456542 100644 --- a/cmd/syft/cli/eventloop/tasks.go +++ b/cmd/syft/cli/eventloop/tasks.go @@ -16,7 +16,7 @@ import ( "github.com/anchore/syft/syft/source" ) -type Task func(*sbom.Artifacts, *source.Source) ([]artifact.Relationship, error) +type Task func(*sbom.Artifacts, source.Source) ([]artifact.Relationship, error) func Tasks(app *config.Application) ([]Task, error) { var tasks []Task @@ -48,7 +48,7 @@ func generateCatalogPackagesTask(app *config.Application) (Task, error) { return nil, nil } - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { packageCatalog, relationships, theDistro, err := syft.CatalogPackages(src, app.ToCatalogerConfig()) results.Packages = packageCatalog @@ -67,7 +67,7 @@ func generateCatalogFileMetadataTask(app *config.Application) (Task, error) { metadataCataloger := filemetadata.NewCataloger() - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { resolver, err := src.FileResolver(app.FileMetadata.Cataloger.ScopeOpt) if err != nil { return nil, err @@ -110,7 +110,7 @@ func generateCatalogFileDigestsTask(app *config.Application) (Task, error) { digestsCataloger := filedigest.NewCataloger(hashes) - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { resolver, err := src.FileResolver(app.FileMetadata.Cataloger.ScopeOpt) if err != nil { return nil, err @@ -142,7 +142,7 @@ func generateCatalogSecretsTask(app *config.Application) (Task, error) { return nil, err } - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { resolver, err := src.FileResolver(app.Secrets.Cataloger.ScopeOpt) if err != nil { return nil, err @@ -169,7 +169,7 @@ func generateCatalogContentsTask(app *config.Application) (Task, error) { return nil, err } - task := func(results *sbom.Artifacts, src *source.Source) ([]artifact.Relationship, error) { + task := func(results *sbom.Artifacts, src source.Source) ([]artifact.Relationship, error) { resolver, err := src.FileResolver(app.FileContents.Cataloger.ScopeOpt) if err != nil { return nil, err @@ -186,7 +186,7 @@ func generateCatalogContentsTask(app *config.Application) (Task, error) { return task, nil } -func RunTask(t Task, a *sbom.Artifacts, src *source.Source, c chan<- artifact.Relationship, errs chan<- error) { +func RunTask(t Task, a *sbom.Artifacts, src source.Source, c chan<- artifact.Relationship, errs chan<- error) { defer close(c) relationships, err := t(a, src) diff --git a/cmd/syft/cli/packages/packages.go b/cmd/syft/cli/packages/packages.go index 544a1b502..da5258193 100644 --- a/cmd/syft/cli/packages/packages.go +++ b/cmd/syft/cli/packages/packages.go @@ -7,6 +7,7 @@ import ( "github.com/wagoodman/go-partybus" "github.com/anchore/stereoscope" + "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/cmd/syft/cli/eventloop" "github.com/anchore/syft/cmd/syft/cli/options" "github.com/anchore/syft/internal" @@ -35,10 +36,6 @@ func Run(_ context.Context, app *config.Application, args []string) error { // could be an image or a directory, with or without a scheme userInput := args[0] - si, err := source.ParseInputWithNameVersion(userInput, app.Platform, app.SourceName, app.SourceVersion, app.DefaultImagePullSource) - if err != nil { - return fmt.Errorf("could not generate source input for packages command: %w", err) - } eventBus := partybus.NewBus() stereoscope.SetBus(eventBus) @@ -46,7 +43,7 @@ func Run(_ context.Context, app *config.Application, args []string) error { subscription := eventBus.Subscribe() return eventloop.EventLoop( - execWorker(app, *si, writer), + execWorker(app, userInput, writer), eventloop.SetupSignals(), subscription, stereoscope.Cleanup, @@ -54,17 +51,52 @@ func Run(_ context.Context, app *config.Application, args []string) error { ) } -func execWorker(app *config.Application, si source.Input, writer sbom.Writer) <-chan error { +func execWorker(app *config.Application, userInput string, writer sbom.Writer) <-chan error { errs := make(chan error) go func() { defer close(errs) - src, cleanup, err := source.New(si, app.Registry.ToOptions(), app.Exclusions) - if cleanup != nil { - defer cleanup() + detection, err := source.Detect( + userInput, + source.DetectConfig{ + DefaultImageSource: app.DefaultImagePullSource, + }, + ) + if err != nil { + errs <- fmt.Errorf("could not deteremine source: %w", err) + return + } + + var platform *image.Platform + + if app.Platform != "" { + platform, err = image.NewPlatform(app.Platform) + if err != nil { + errs <- fmt.Errorf("invalid platform: %w", err) + return + } + } + + src, err := detection.NewSource( + source.DetectionSourceConfig{ + Alias: source.Alias{ + Name: app.SourceName, + Version: app.SourceVersion, + }, + RegistryOptions: app.Registry.ToOptions(), + Platform: platform, + Exclude: source.ExcludeConfig{ + Paths: app.Exclusions, + }, + DigestAlgorithms: nil, + }, + ) + + if src != nil { + defer src.Close() } if err != nil { - errs <- fmt.Errorf("failed to construct source from user input %q: %w", si.UserInput, err) + errs <- fmt.Errorf("failed to construct source from user input %q: %w", userInput, err) return } @@ -75,7 +107,7 @@ func execWorker(app *config.Application, si source.Input, writer sbom.Writer) <- } if s == nil { - errs <- fmt.Errorf("no SBOM produced for %q", si.UserInput) + errs <- fmt.Errorf("no SBOM produced for %q", userInput) } bus.Publish(partybus.Event{ @@ -86,14 +118,14 @@ func execWorker(app *config.Application, si source.Input, writer sbom.Writer) <- return errs } -func GenerateSBOM(src *source.Source, errs chan error, app *config.Application) (*sbom.SBOM, error) { +func GenerateSBOM(src source.Source, errs chan error, app *config.Application) (*sbom.SBOM, error) { tasks, err := eventloop.Tasks(app) if err != nil { return nil, err } s := sbom.SBOM{ - Source: src.Metadata, + Source: src.Describe(), Descriptor: sbom.Descriptor{ Name: internal.ApplicationName, Version: version.FromBuild().Version, @@ -106,7 +138,7 @@ func GenerateSBOM(src *source.Source, errs chan error, app *config.Application) return &s, nil } -func buildRelationships(s *sbom.SBOM, src *source.Source, tasks []eventloop.Task, errs chan error) { +func buildRelationships(s *sbom.SBOM, src source.Source, tasks []eventloop.Task, errs chan error) { var relationships []<-chan artifact.Relationship for _, task := range tasks { c := make(chan artifact.Relationship) diff --git a/cmd/syft/cli/poweruser/poweruser.go b/cmd/syft/cli/poweruser/poweruser.go index 724f9a81f..dd1b758fe 100644 --- a/cmd/syft/cli/poweruser/poweruser.go +++ b/cmd/syft/cli/poweruser/poweruser.go @@ -9,6 +9,7 @@ import ( "github.com/wagoodman/go-partybus" "github.com/anchore/stereoscope" + "github.com/anchore/stereoscope/pkg/image" "github.com/anchore/syft/cmd/syft/cli/eventloop" "github.com/anchore/syft/cmd/syft/cli/options" "github.com/anchore/syft/cmd/syft/cli/packages" @@ -38,10 +39,6 @@ func Run(_ context.Context, app *config.Application, args []string) error { }() userInput := args[0] - si, err := source.ParseInputWithNameVersion(userInput, app.Platform, app.SourceName, app.SourceVersion, app.DefaultImagePullSource) - if err != nil { - return fmt.Errorf("could not generate source input for packages command: %w", err) - } eventBus := partybus.NewBus() stereoscope.SetBus(eventBus) @@ -49,7 +46,7 @@ func Run(_ context.Context, app *config.Application, args []string) error { subscription := eventBus.Subscribe() return eventloop.EventLoop( - execWorker(app, *si, writer), + execWorker(app, userInput, writer), eventloop.SetupSignals(), subscription, stereoscope.Cleanup, @@ -57,7 +54,8 @@ func Run(_ context.Context, app *config.Application, args []string) error { ) } -func execWorker(app *config.Application, si source.Input, writer sbom.Writer) <-chan error { +//nolint:funlen +func execWorker(app *config.Application, userInput string, writer sbom.Writer) <-chan error { errs := make(chan error) go func() { defer close(errs) @@ -72,17 +70,52 @@ func execWorker(app *config.Application, si source.Input, writer sbom.Writer) <- return } - src, cleanup, err := source.New(si, app.Registry.ToOptions(), app.Exclusions) + detection, err := source.Detect( + userInput, + source.DetectConfig{ + DefaultImageSource: app.DefaultImagePullSource, + }, + ) if err != nil { - errs <- err + errs <- fmt.Errorf("could not deteremine source: %w", err) return } - if cleanup != nil { - defer cleanup() + + var platform *image.Platform + + if app.Platform != "" { + platform, err = image.NewPlatform(app.Platform) + if err != nil { + errs <- fmt.Errorf("invalid platform: %w", err) + return + } + } + + src, err := detection.NewSource( + source.DetectionSourceConfig{ + Alias: source.Alias{ + Name: app.SourceName, + Version: app.SourceVersion, + }, + RegistryOptions: app.Registry.ToOptions(), + Platform: platform, + Exclude: source.ExcludeConfig{ + Paths: app.Exclusions, + }, + DigestAlgorithms: nil, + }, + ) + + if src != nil { + defer src.Close() + } + if err != nil { + errs <- fmt.Errorf("failed to construct source from user input %q: %w", userInput, err) + return } s := sbom.SBOM{ - Source: src.Metadata, + Source: src.Describe(), Descriptor: sbom.Descriptor{ Name: internal.ApplicationName, Version: version.FromBuild().Version, diff --git a/internal/constants.go b/internal/constants.go index 73bd40a41..bf520ab90 100644 --- a/internal/constants.go +++ b/internal/constants.go @@ -6,5 +6,5 @@ const ( // JSONSchemaVersion is the current schema version output by the JSON encoder // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. - JSONSchemaVersion = "8.0.1" + JSONSchemaVersion = "9.0.0" ) diff --git a/schema/json/README.md b/schema/json/README.md index 40af8b4cd..7de19b64d 100644 --- a/schema/json/README.md +++ b/schema/json/README.md @@ -3,8 +3,8 @@ This is the JSON schema for output from the JSON presenters (`syft packages -o json` and `syft power-user `). The required inputs for defining the JSON schema are as follows: - the value of `internal.JSONSchemaVersion` that governs the schema filename -- the `Document` struct definition within `internal/presenters/poweruser/json_document.go` that governs the overall document shape -- the `artifactMetadataContainer` struct definition within `schema/json/generate.go` that governs the allowable shapes of `pkg.Package.Metadata` +- the `Document` struct definition within `github.com/anchore/syft/syft/formats/syftjson/model/document.go` that governs the overall document shape +- generated `AllTypes()` helper function within the `syft/internal/sourcemetadata` and `syft/internal/packagemetadata` packages With regard to testing the JSON schema, integration test cases provided by the developer are used as examples to validate that JSON output from Syft is always valid relative to the `schema/json/schema-$VERSION.json` file. @@ -22,15 +22,13 @@ Given a version number format `MODEL.REVISION.ADDITION`: ## Adding a New `pkg.*Metadata` Type -When adding a new `pkg.*Metadata` that is assigned to the `pkg.Package.Metadata` struct field it is important that a few things -are done: +When adding a new `pkg.*Metadata` that is assigned to the `pkg.Package.Metadata` struct field you must add a test case to `test/integration/catalog_packages_cases_test.go` that exercises the new package type with the new metadata. -- a new integration test case is added to `test/integration/catalog_packages_cases_test.go` that exercises the new package type with the new metadata -- the new metadata struct is added to the `artifactMetadataContainer` struct within `schema/json/generate.go` +Additionally it is important to generate a new JSON schema since the `pkg.Package.Metadata` field is covered by the schema. ## Generating a New Schema -Create the new schema by running `cd schema/json && go run generate.go` (note you must be in the `schema/json` dir while running this): +Create the new schema by running `make generate-json-schema` from the root of the repo: - If there is **not** an existing schema for the given version, then the new schema file will be written to `schema/json/schema-$VERSION.json` - If there is an existing schema for the given version and the new schema matches the existing schema, no action is taken diff --git a/schema/json/generate/main.go b/schema/json/generate/main.go deleted file mode 100644 index fc8dc120a..000000000 --- a/schema/json/generate/main.go +++ /dev/null @@ -1,50 +0,0 @@ -package main - -import ( - "fmt" - "os" - - "github.com/dave/jennifer/jen" - - "github.com/anchore/syft/schema/json/internal" -) - -// This program generates internal/generated.go. - -const ( - pkgImport = "github.com/anchore/syft/syft/pkg" - path = "internal/generated.go" -) - -func main() { - typeNames, err := internal.AllSyftMetadataTypeNames() - if err != nil { - panic(fmt.Errorf("unable to get all metadata type names: %w", err)) - } - - fmt.Printf("updating metadata container object with %+v types\n", len(typeNames)) - - f := jen.NewFile("internal") - f.HeaderComment("DO NOT EDIT: generated by schema/json/generate/main.go") - f.ImportName(pkgImport, "pkg") - f.Comment("ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field.") - f.Type().Id("ArtifactMetadataContainer").StructFunc(func(g *jen.Group) { - for _, typeName := range typeNames { - g.Id(typeName).Qual(pkgImport, typeName) - } - }) - - rendered := fmt.Sprintf("%#v", f) - - fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) - if err != nil { - panic(fmt.Errorf("unable to open file: %w", err)) - } - _, err = fh.WriteString(rendered) - if err != nil { - panic(fmt.Errorf("unable to write file: %w", err)) - } - if err := fh.Close(); err != nil { - panic(fmt.Errorf("unable to close file: %w", err)) - } -} diff --git a/schema/json/internal/generated.go b/schema/json/internal/generated.go deleted file mode 100644 index 3341818de..000000000 --- a/schema/json/internal/generated.go +++ /dev/null @@ -1,39 +0,0 @@ -// DO NOT EDIT: generated by schema/json/generate/main.go - -package internal - -import "github.com/anchore/syft/syft/pkg" - -// ArtifactMetadataContainer is a struct that contains all the metadata types for a package, as represented in the pkg.Package.Metadata field. -type ArtifactMetadataContainer struct { - AlpmMetadata pkg.AlpmMetadata - ApkMetadata pkg.ApkMetadata - BinaryMetadata pkg.BinaryMetadata - CargoPackageMetadata pkg.CargoPackageMetadata - CocoapodsMetadata pkg.CocoapodsMetadata - ConanLockMetadata pkg.ConanLockMetadata - ConanMetadata pkg.ConanMetadata - DartPubMetadata pkg.DartPubMetadata - DotnetDepsMetadata pkg.DotnetDepsMetadata - DpkgMetadata pkg.DpkgMetadata - GemMetadata pkg.GemMetadata - GolangBinMetadata pkg.GolangBinMetadata - GolangModMetadata pkg.GolangModMetadata - HackageMetadata pkg.HackageMetadata - JavaMetadata pkg.JavaMetadata - KbPackageMetadata pkg.KbPackageMetadata - LinuxKernelMetadata pkg.LinuxKernelMetadata - LinuxKernelModuleMetadata pkg.LinuxKernelModuleMetadata - MixLockMetadata pkg.MixLockMetadata - NixStoreMetadata pkg.NixStoreMetadata - NpmPackageJSONMetadata pkg.NpmPackageJSONMetadata - NpmPackageLockJSONMetadata pkg.NpmPackageLockJSONMetadata - PhpComposerJSONMetadata pkg.PhpComposerJSONMetadata - PortageMetadata pkg.PortageMetadata - PythonPackageMetadata pkg.PythonPackageMetadata - PythonPipfileLockMetadata pkg.PythonPipfileLockMetadata - PythonRequirementsMetadata pkg.PythonRequirementsMetadata - RDescriptionFileMetadata pkg.RDescriptionFileMetadata - RebarLockMetadata pkg.RebarLockMetadata - RpmMetadata pkg.RpmMetadata -} diff --git a/schema/json/main_test.go b/schema/json/main_test.go deleted file mode 100644 index 0903b4dde..000000000 --- a/schema/json/main_test.go +++ /dev/null @@ -1,39 +0,0 @@ -package main - -import ( - "reflect" - "sort" - "testing" - - "github.com/google/go-cmp/cmp" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/anchore/syft/schema/json/internal" -) - -func TestAllMetadataRepresented(t *testing.T) { - // this test checks that all the metadata types are represented in the currently generated ArtifactMetadataContainer struct - // such that PRs will reflect when there is drift from the implemented set of metadata types and the generated struct - // which controls the JSON schema content. - expected, err := internal.AllSyftMetadataTypeNames() - require.NoError(t, err) - actual := allTypeNamesFromStruct(internal.ArtifactMetadataContainer{}) - if !assert.ElementsMatch(t, expected, actual) { - t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) - t.Log("did you add a new pkg.*Metadata type without updating the JSON schema?") - t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)") - } -} - -func allTypeNamesFromStruct(instance any) []string { - // get all the type names from the struct (not recursively) - var typeNames []string - tt := reflect.TypeOf(instance) - for i := 0; i < tt.NumField(); i++ { - field := tt.Field(i) - typeNames = append(typeNames, field.Type.Name()) - } - sort.Strings(typeNames) - return typeNames -} diff --git a/schema/json/schema-9.0.0.json b/schema/json/schema-9.0.0.json new file mode 100644 index 000000000..0f24da576 --- /dev/null +++ b/schema/json/schema-9.0.0.json @@ -0,0 +1,1881 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "anchore.io/schema/syft/json/9.0.0/document", + "$ref": "#/$defs/Document", + "$defs": { + "AlpmFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "type": { + "type": "string" + }, + "uid": { + "type": "string" + }, + "gid": { + "type": "string" + }, + "time": { + "type": "string", + "format": "date-time" + }, + "size": { + "type": "string" + }, + "link": { + "type": "string" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array" + } + }, + "type": "object" + }, + "AlpmMetadata": { + "properties": { + "basepackage": { + "type": "string" + }, + "package": { + "type": "string" + }, + "version": { + "type": "string" + }, + "description": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "packager": { + "type": "string" + }, + "url": { + "type": "string" + }, + "validation": { + "type": "string" + }, + "reason": { + "type": "integer" + }, + "files": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array" + }, + "backup": { + "items": { + "$ref": "#/$defs/AlpmFileRecord" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "basepackage", + "package", + "version", + "description", + "architecture", + "size", + "packager", + "url", + "validation", + "reason", + "files", + "backup" + ] + }, + "ApkFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "ownerUid": { + "type": "string" + }, + "ownerGid": { + "type": "string" + }, + "permissions": { + "type": "string" + }, + "digest": { + "$ref": "#/$defs/Digest" + } + }, + "type": "object", + "required": [ + "path" + ] + }, + "ApkMetadata": { + "properties": { + "package": { + "type": "string" + }, + "originPackage": { + "type": "string" + }, + "maintainer": { + "type": "string" + }, + "version": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "url": { + "type": "string" + }, + "description": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "installedSize": { + "type": "integer" + }, + "pullDependencies": { + "items": { + "type": "string" + }, + "type": "array" + }, + "provides": { + "items": { + "type": "string" + }, + "type": "array" + }, + "pullChecksum": { + "type": "string" + }, + "gitCommitOfApkPort": { + "type": "string" + }, + "files": { + "items": { + "$ref": "#/$defs/ApkFileRecord" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "package", + "originPackage", + "maintainer", + "version", + "architecture", + "url", + "description", + "size", + "installedSize", + "pullDependencies", + "provides", + "pullChecksum", + "gitCommitOfApkPort", + "files" + ] + }, + "BinaryMetadata": { + "properties": { + "matches": { + "items": { + "$ref": "#/$defs/ClassifierMatch" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "matches" + ] + }, + "CargoPackageMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "source": { + "type": "string" + }, + "checksum": { + "type": "string" + }, + "dependencies": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "checksum", + "dependencies" + ] + }, + "ClassifierMatch": { + "properties": { + "classifier": { + "type": "string" + }, + "location": { + "$ref": "#/$defs/Location" + } + }, + "type": "object", + "required": [ + "classifier", + "location" + ] + }, + "CocoapodsMetadata": { + "properties": { + "checksum": { + "type": "string" + } + }, + "type": "object", + "required": [ + "checksum" + ] + }, + "ConanLockMetadata": { + "properties": { + "ref": { + "type": "string" + }, + "package_id": { + "type": "string" + }, + "prev": { + "type": "string" + }, + "requires": { + "type": "string" + }, + "build_requires": { + "type": "string" + }, + "py_requires": { + "type": "string" + }, + "options": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "path": { + "type": "string" + }, + "context": { + "type": "string" + } + }, + "type": "object", + "required": [ + "ref" + ] + }, + "ConanMetadata": { + "properties": { + "ref": { + "type": "string" + } + }, + "type": "object", + "required": [ + "ref" + ] + }, + "Coordinates": { + "properties": { + "path": { + "type": "string" + }, + "layerID": { + "type": "string" + } + }, + "type": "object", + "required": [ + "path" + ] + }, + "DartPubMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "hosted_url": { + "type": "string" + }, + "vcs_url": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version" + ] + }, + "Descriptor": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "configuration": true + }, + "type": "object", + "required": [ + "name", + "version" + ] + }, + "Digest": { + "properties": { + "algorithm": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ] + }, + "Document": { + "properties": { + "artifacts": { + "items": { + "$ref": "#/$defs/Package" + }, + "type": "array" + }, + "artifactRelationships": { + "items": { + "$ref": "#/$defs/Relationship" + }, + "type": "array" + }, + "files": { + "items": { + "$ref": "#/$defs/File" + }, + "type": "array" + }, + "secrets": { + "items": { + "$ref": "#/$defs/Secrets" + }, + "type": "array" + }, + "source": { + "$ref": "#/$defs/Source" + }, + "distro": { + "$ref": "#/$defs/LinuxRelease" + }, + "descriptor": { + "$ref": "#/$defs/Descriptor" + }, + "schema": { + "$ref": "#/$defs/Schema" + } + }, + "type": "object", + "required": [ + "artifacts", + "artifactRelationships", + "source", + "distro", + "descriptor", + "schema" + ] + }, + "DotnetDepsMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "path": { + "type": "string" + }, + "sha512": { + "type": "string" + }, + "hashPath": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version", + "path", + "sha512", + "hashPath" + ] + }, + "DpkgFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "digest": { + "$ref": "#/$defs/Digest" + }, + "isConfigFile": { + "type": "boolean" + } + }, + "type": "object", + "required": [ + "path", + "isConfigFile" + ] + }, + "DpkgMetadata": { + "properties": { + "package": { + "type": "string" + }, + "source": { + "type": "string" + }, + "version": { + "type": "string" + }, + "sourceVersion": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "maintainer": { + "type": "string" + }, + "installedSize": { + "type": "integer" + }, + "files": { + "items": { + "$ref": "#/$defs/DpkgFileRecord" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "package", + "source", + "version", + "sourceVersion", + "architecture", + "maintainer", + "installedSize", + "files" + ] + }, + "File": { + "properties": { + "id": { + "type": "string" + }, + "location": { + "$ref": "#/$defs/Coordinates" + }, + "metadata": { + "$ref": "#/$defs/FileMetadataEntry" + }, + "contents": { + "type": "string" + }, + "digests": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "id", + "location" + ] + }, + "FileMetadataEntry": { + "properties": { + "mode": { + "type": "integer" + }, + "type": { + "type": "string" + }, + "linkDestination": { + "type": "string" + }, + "userID": { + "type": "integer" + }, + "groupID": { + "type": "integer" + }, + "mimeType": { + "type": "string" + }, + "size": { + "type": "integer" + } + }, + "type": "object", + "required": [ + "mode", + "type", + "userID", + "groupID", + "mimeType", + "size" + ] + }, + "GemMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array" + }, + "authors": { + "items": { + "type": "string" + }, + "type": "array" + }, + "homepage": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version" + ] + }, + "GolangBinMetadata": { + "properties": { + "goBuildSettings": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "goCompiledVersion": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "h1Digest": { + "type": "string" + }, + "mainModule": { + "type": "string" + } + }, + "type": "object", + "required": [ + "goCompiledVersion", + "architecture" + ] + }, + "GolangModMetadata": { + "properties": { + "h1Digest": { + "type": "string" + } + }, + "type": "object" + }, + "HackageMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "pkgHash": { + "type": "string" + }, + "snapshotURL": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version" + ] + }, + "IDLikes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "JavaManifest": { + "properties": { + "main": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "namedSections": { + "patternProperties": { + ".*": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "JavaMetadata": { + "properties": { + "virtualPath": { + "type": "string" + }, + "manifest": { + "$ref": "#/$defs/JavaManifest" + }, + "pomProperties": { + "$ref": "#/$defs/PomProperties" + }, + "pomProject": { + "$ref": "#/$defs/PomProject" + }, + "digest": { + "items": { + "$ref": "#/$defs/Digest" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "virtualPath" + ] + }, + "KbPackageMetadata": { + "properties": { + "product_id": { + "type": "string" + }, + "kb": { + "type": "string" + } + }, + "type": "object", + "required": [ + "product_id", + "kb" + ] + }, + "License": { + "properties": { + "value": { + "type": "string" + }, + "spdxExpression": { + "type": "string" + }, + "type": { + "type": "string" + }, + "urls": { + "items": { + "type": "string" + }, + "type": "array" + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "value", + "spdxExpression", + "type", + "urls", + "locations" + ] + }, + "LinuxKernelMetadata": { + "properties": { + "name": { + "type": "string" + }, + "architecture": { + "type": "string" + }, + "version": { + "type": "string" + }, + "extendedVersion": { + "type": "string" + }, + "buildTime": { + "type": "string" + }, + "author": { + "type": "string" + }, + "format": { + "type": "string" + }, + "rwRootFS": { + "type": "boolean" + }, + "swapDevice": { + "type": "integer" + }, + "rootDevice": { + "type": "integer" + }, + "videoMode": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "architecture", + "version" + ] + }, + "LinuxKernelModuleMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "sourceVersion": { + "type": "string" + }, + "path": { + "type": "string" + }, + "description": { + "type": "string" + }, + "author": { + "type": "string" + }, + "license": { + "type": "string" + }, + "kernelVersion": { + "type": "string" + }, + "versionMagic": { + "type": "string" + }, + "parameters": { + "patternProperties": { + ".*": { + "$ref": "#/$defs/LinuxKernelModuleParameter" + } + }, + "type": "object" + } + }, + "type": "object" + }, + "LinuxKernelModuleParameter": { + "properties": { + "type": { + "type": "string" + }, + "description": { + "type": "string" + } + }, + "type": "object" + }, + "LinuxRelease": { + "properties": { + "prettyName": { + "type": "string" + }, + "name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "idLike": { + "$ref": "#/$defs/IDLikes" + }, + "version": { + "type": "string" + }, + "versionID": { + "type": "string" + }, + "versionCodename": { + "type": "string" + }, + "buildID": { + "type": "string" + }, + "imageID": { + "type": "string" + }, + "imageVersion": { + "type": "string" + }, + "variant": { + "type": "string" + }, + "variantID": { + "type": "string" + }, + "homeURL": { + "type": "string" + }, + "supportURL": { + "type": "string" + }, + "bugReportURL": { + "type": "string" + }, + "privacyPolicyURL": { + "type": "string" + }, + "cpeName": { + "type": "string" + }, + "supportEnd": { + "type": "string" + } + }, + "type": "object" + }, + "Location": { + "properties": { + "path": { + "type": "string" + }, + "layerID": { + "type": "string" + }, + "annotations": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object", + "required": [ + "path" + ] + }, + "MixLockMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "pkgHash": { + "type": "string" + }, + "pkgHashExt": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ] + }, + "NixStoreMetadata": { + "properties": { + "outputHash": { + "type": "string" + }, + "output": { + "type": "string" + }, + "files": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "outputHash", + "files" + ] + }, + "NpmPackageJSONMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "author": { + "type": "string" + }, + "homepage": { + "type": "string" + }, + "description": { + "type": "string" + }, + "url": { + "type": "string" + }, + "private": { + "type": "boolean" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "homepage", + "description", + "url", + "private" + ] + }, + "NpmPackageLockJSONMetadata": { + "properties": { + "resolved": { + "type": "string" + }, + "integrity": { + "type": "string" + } + }, + "type": "object", + "required": [ + "resolved", + "integrity" + ] + }, + "Package": { + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "type": { + "type": "string" + }, + "foundBy": { + "type": "string" + }, + "locations": { + "items": { + "$ref": "#/$defs/Location" + }, + "type": "array" + }, + "licenses": { + "$ref": "#/$defs/licenses" + }, + "language": { + "type": "string" + }, + "cpes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "purl": { + "type": "string" + }, + "metadataType": { + "type": "string" + }, + "metadata": { + "anyOf": [ + { + "type": "null" + }, + { + "$ref": "#/$defs/AlpmMetadata" + }, + { + "$ref": "#/$defs/ApkMetadata" + }, + { + "$ref": "#/$defs/BinaryMetadata" + }, + { + "$ref": "#/$defs/CargoPackageMetadata" + }, + { + "$ref": "#/$defs/CocoapodsMetadata" + }, + { + "$ref": "#/$defs/ConanLockMetadata" + }, + { + "$ref": "#/$defs/ConanMetadata" + }, + { + "$ref": "#/$defs/DartPubMetadata" + }, + { + "$ref": "#/$defs/DotnetDepsMetadata" + }, + { + "$ref": "#/$defs/DpkgMetadata" + }, + { + "$ref": "#/$defs/GemMetadata" + }, + { + "$ref": "#/$defs/GolangBinMetadata" + }, + { + "$ref": "#/$defs/GolangModMetadata" + }, + { + "$ref": "#/$defs/HackageMetadata" + }, + { + "$ref": "#/$defs/JavaMetadata" + }, + { + "$ref": "#/$defs/KbPackageMetadata" + }, + { + "$ref": "#/$defs/LinuxKernelMetadata" + }, + { + "$ref": "#/$defs/LinuxKernelModuleMetadata" + }, + { + "$ref": "#/$defs/MixLockMetadata" + }, + { + "$ref": "#/$defs/NixStoreMetadata" + }, + { + "$ref": "#/$defs/NpmPackageJSONMetadata" + }, + { + "$ref": "#/$defs/NpmPackageLockJSONMetadata" + }, + { + "$ref": "#/$defs/PhpComposerJSONMetadata" + }, + { + "$ref": "#/$defs/PortageMetadata" + }, + { + "$ref": "#/$defs/PythonPackageMetadata" + }, + { + "$ref": "#/$defs/PythonPipfileLockMetadata" + }, + { + "$ref": "#/$defs/PythonRequirementsMetadata" + }, + { + "$ref": "#/$defs/RDescriptionFileMetadata" + }, + { + "$ref": "#/$defs/RebarLockMetadata" + }, + { + "$ref": "#/$defs/RpmMetadata" + } + ] + } + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "foundBy", + "locations", + "licenses", + "language", + "cpes", + "purl" + ] + }, + "PhpComposerAuthors": { + "properties": { + "name": { + "type": "string" + }, + "email": { + "type": "string" + }, + "homepage": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name" + ] + }, + "PhpComposerExternalReference": { + "properties": { + "type": { + "type": "string" + }, + "url": { + "type": "string" + }, + "reference": { + "type": "string" + }, + "shasum": { + "type": "string" + } + }, + "type": "object", + "required": [ + "type", + "url", + "reference" + ] + }, + "PhpComposerJSONMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "source": { + "$ref": "#/$defs/PhpComposerExternalReference" + }, + "dist": { + "$ref": "#/$defs/PhpComposerExternalReference" + }, + "require": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "provide": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "require-dev": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "suggest": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + }, + "license": { + "items": { + "type": "string" + }, + "type": "array" + }, + "type": { + "type": "string" + }, + "notification-url": { + "type": "string" + }, + "bin": { + "items": { + "type": "string" + }, + "type": "array" + }, + "authors": { + "items": { + "$ref": "#/$defs/PhpComposerAuthors" + }, + "type": "array" + }, + "description": { + "type": "string" + }, + "homepage": { + "type": "string" + }, + "keywords": { + "items": { + "type": "string" + }, + "type": "array" + }, + "time": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version", + "source", + "dist" + ] + }, + "PomParent": { + "properties": { + "groupId": { + "type": "string" + }, + "artifactId": { + "type": "string" + }, + "version": { + "type": "string" + } + }, + "type": "object", + "required": [ + "groupId", + "artifactId", + "version" + ] + }, + "PomProject": { + "properties": { + "path": { + "type": "string" + }, + "parent": { + "$ref": "#/$defs/PomParent" + }, + "groupId": { + "type": "string" + }, + "artifactId": { + "type": "string" + }, + "version": { + "type": "string" + }, + "name": { + "type": "string" + }, + "description": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "type": "object", + "required": [ + "path", + "groupId", + "artifactId", + "version", + "name" + ] + }, + "PomProperties": { + "properties": { + "path": { + "type": "string" + }, + "name": { + "type": "string" + }, + "groupId": { + "type": "string" + }, + "artifactId": { + "type": "string" + }, + "version": { + "type": "string" + }, + "scope": { + "type": "string" + }, + "extraFields": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object", + "required": [ + "path", + "name", + "groupId", + "artifactId", + "version" + ] + }, + "PortageFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "digest": { + "$ref": "#/$defs/Digest" + } + }, + "type": "object", + "required": [ + "path" + ] + }, + "PortageMetadata": { + "properties": { + "installedSize": { + "type": "integer" + }, + "files": { + "items": { + "$ref": "#/$defs/PortageFileRecord" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "installedSize", + "files" + ] + }, + "PythonDirectURLOriginInfo": { + "properties": { + "url": { + "type": "string" + }, + "commitId": { + "type": "string" + }, + "vcs": { + "type": "string" + } + }, + "type": "object", + "required": [ + "url" + ] + }, + "PythonFileDigest": { + "properties": { + "algorithm": { + "type": "string" + }, + "value": { + "type": "string" + } + }, + "type": "object", + "required": [ + "algorithm", + "value" + ] + }, + "PythonFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "digest": { + "$ref": "#/$defs/PythonFileDigest" + }, + "size": { + "type": "string" + } + }, + "type": "object", + "required": [ + "path" + ] + }, + "PythonPackageMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "author": { + "type": "string" + }, + "authorEmail": { + "type": "string" + }, + "platform": { + "type": "string" + }, + "files": { + "items": { + "$ref": "#/$defs/PythonFileRecord" + }, + "type": "array" + }, + "sitePackagesRootPath": { + "type": "string" + }, + "topLevelPackages": { + "items": { + "type": "string" + }, + "type": "array" + }, + "directUrlOrigin": { + "$ref": "#/$defs/PythonDirectURLOriginInfo" + } + }, + "type": "object", + "required": [ + "name", + "version", + "author", + "authorEmail", + "platform", + "sitePackagesRootPath" + ] + }, + "PythonPipfileLockMetadata": { + "properties": { + "hashes": { + "items": { + "type": "string" + }, + "type": "array" + }, + "index": { + "type": "string" + } + }, + "type": "object", + "required": [ + "hashes", + "index" + ] + }, + "PythonRequirementsMetadata": { + "properties": { + "name": { + "type": "string" + }, + "extras": { + "items": { + "type": "string" + }, + "type": "array" + }, + "versionConstraint": { + "type": "string" + }, + "url": { + "type": "string" + }, + "markers": { + "patternProperties": { + ".*": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object", + "required": [ + "name", + "extras", + "versionConstraint", + "url", + "markers" + ] + }, + "RDescriptionFileMetadata": { + "properties": { + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "author": { + "type": "string" + }, + "maintainer": { + "type": "string" + }, + "url": { + "items": { + "type": "string" + }, + "type": "array" + }, + "repository": { + "type": "string" + }, + "built": { + "type": "string" + }, + "needsCompilation": { + "type": "boolean" + }, + "imports": { + "items": { + "type": "string" + }, + "type": "array" + }, + "depends": { + "items": { + "type": "string" + }, + "type": "array" + }, + "suggests": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "type": "object" + }, + "RebarLockMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "pkgHash": { + "type": "string" + }, + "pkgHashExt": { + "type": "string" + } + }, + "type": "object", + "required": [ + "name", + "version", + "pkgHash", + "pkgHashExt" + ] + }, + "Relationship": { + "properties": { + "parent": { + "type": "string" + }, + "child": { + "type": "string" + }, + "type": { + "type": "string" + }, + "metadata": true + }, + "type": "object", + "required": [ + "parent", + "child", + "type" + ] + }, + "RpmMetadata": { + "properties": { + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "epoch": { + "oneOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ] + }, + "architecture": { + "type": "string" + }, + "release": { + "type": "string" + }, + "sourceRpm": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "vendor": { + "type": "string" + }, + "modularityLabel": { + "type": "string" + }, + "files": { + "items": { + "$ref": "#/$defs/RpmdbFileRecord" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "name", + "version", + "epoch", + "architecture", + "release", + "sourceRpm", + "size", + "vendor", + "modularityLabel", + "files" + ] + }, + "RpmdbFileRecord": { + "properties": { + "path": { + "type": "string" + }, + "mode": { + "type": "integer" + }, + "size": { + "type": "integer" + }, + "digest": { + "$ref": "#/$defs/Digest" + }, + "userName": { + "type": "string" + }, + "groupName": { + "type": "string" + }, + "flags": { + "type": "string" + } + }, + "type": "object", + "required": [ + "path", + "mode", + "size", + "digest", + "userName", + "groupName", + "flags" + ] + }, + "Schema": { + "properties": { + "version": { + "type": "string" + }, + "url": { + "type": "string" + } + }, + "type": "object", + "required": [ + "version", + "url" + ] + }, + "SearchResult": { + "properties": { + "classification": { + "type": "string" + }, + "lineNumber": { + "type": "integer" + }, + "lineOffset": { + "type": "integer" + }, + "seekPosition": { + "type": "integer" + }, + "length": { + "type": "integer" + }, + "value": { + "type": "string" + } + }, + "type": "object", + "required": [ + "classification", + "lineNumber", + "lineOffset", + "seekPosition", + "length" + ] + }, + "Secrets": { + "properties": { + "location": { + "$ref": "#/$defs/Coordinates" + }, + "secrets": { + "items": { + "$ref": "#/$defs/SearchResult" + }, + "type": "array" + } + }, + "type": "object", + "required": [ + "location", + "secrets" + ] + }, + "Source": { + "properties": { + "id": { + "type": "string" + }, + "name": { + "type": "string" + }, + "version": { + "type": "string" + }, + "type": { + "type": "string" + }, + "metadata": true + }, + "type": "object", + "required": [ + "id", + "name", + "version", + "type", + "metadata" + ] + }, + "licenses": { + "items": { + "$ref": "#/$defs/License" + }, + "type": "array" + } + } +} diff --git a/syft/file/cataloger/filedigest/cataloger_test.go b/syft/file/cataloger/filedigest/cataloger_test.go index ed8562cbd..1ed1af588 100644 --- a/syft/file/cataloger/filedigest/cataloger_test.go +++ b/syft/file/cataloger/filedigest/cataloger_test.go @@ -75,7 +75,7 @@ func TestDigestsCataloger(t *testing.T) { t.Run(test.name, func(t *testing.T) { c := NewCataloger(test.digests) - src, err := source.NewFromDirectory("test-fixtures/last/") + src, err := source.NewFromDirectoryPath("test-fixtures/last/") require.NoError(t, err) resolver, err := src.FileResolver(source.SquashedScope) @@ -94,7 +94,7 @@ func TestDigestsCataloger_MixFileTypes(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", testImage) - src, err := source.NewFromImage(img, "---") + src, err := source.NewFromStereoscopeImageObject(img, testImage, nil) if err != nil { t.Fatalf("could not create source: %+v", err) } diff --git a/syft/file/cataloger/filemetadata/cataloger_test.go b/syft/file/cataloger/filemetadata/cataloger_test.go index 99dfa908a..9c84c9f76 100644 --- a/syft/file/cataloger/filemetadata/cataloger_test.go +++ b/syft/file/cataloger/filemetadata/cataloger_test.go @@ -20,7 +20,7 @@ func TestFileMetadataCataloger(t *testing.T) { c := NewCataloger() - src, err := source.NewFromImage(img, "---") + src, err := source.NewFromStereoscopeImageObject(img, testImage, nil) if err != nil { t.Fatalf("could not create source: %+v", err) } diff --git a/syft/file/cataloger/internal/all_regular_files_test.go b/syft/file/cataloger/internal/all_regular_files_test.go index 714e733e6..ced333840 100644 --- a/syft/file/cataloger/internal/all_regular_files_test.go +++ b/syft/file/cataloger/internal/all_regular_files_test.go @@ -27,7 +27,7 @@ func Test_allRegularFiles(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", testImage) - s, err := source.NewFromImage(img, "---") + s, err := source.NewFromStereoscopeImageObject(img, testImage, nil) require.NoError(t, err) r, err := s.FileResolver(source.SquashedScope) @@ -41,7 +41,7 @@ func Test_allRegularFiles(t *testing.T) { { name: "directory", setup: func() file.Resolver { - s, err := source.NewFromDirectory("test-fixtures/symlinked-root/nested/link-root") + s, err := source.NewFromDirectoryPath("test-fixtures/symlinked-root/nested/link-root") require.NoError(t, err) r, err := s.FileResolver(source.SquashedScope) require.NoError(t, err) diff --git a/syft/file/test-fixtures/req-resp/.gitignore b/syft/file/test-fixtures/req-resp/.gitignore new file mode 100644 index 000000000..c94459921 --- /dev/null +++ b/syft/file/test-fixtures/req-resp/.gitignore @@ -0,0 +1,2 @@ +path/to/abs-inside.txt +path/to/the/abs-outside.txt \ No newline at end of file diff --git a/syft/file/test-fixtures/req-resp/path/to/rel-inside.txt b/syft/file/test-fixtures/req-resp/path/to/rel-inside.txt new file mode 120000 index 000000000..f2bc06e87 --- /dev/null +++ b/syft/file/test-fixtures/req-resp/path/to/rel-inside.txt @@ -0,0 +1 @@ +./the/file.txt \ No newline at end of file diff --git a/syft/file/test-fixtures/req-resp/path/to/the/file.txt b/syft/file/test-fixtures/req-resp/path/to/the/file.txt new file mode 100644 index 000000000..fbfd79f5e --- /dev/null +++ b/syft/file/test-fixtures/req-resp/path/to/the/file.txt @@ -0,0 +1 @@ +file-1 diff --git a/syft/file/test-fixtures/req-resp/path/to/the/rel-outside.txt b/syft/file/test-fixtures/req-resp/path/to/the/rel-outside.txt new file mode 120000 index 000000000..6ad08d357 --- /dev/null +++ b/syft/file/test-fixtures/req-resp/path/to/the/rel-outside.txt @@ -0,0 +1 @@ +../../../somewhere/outside.txt \ No newline at end of file diff --git a/syft/file/test-fixtures/req-resp/root-link b/syft/file/test-fixtures/req-resp/root-link new file mode 120000 index 000000000..6a043149e --- /dev/null +++ b/syft/file/test-fixtures/req-resp/root-link @@ -0,0 +1 @@ +./ \ No newline at end of file diff --git a/syft/file/test-fixtures/req-resp/somewhere/outside.txt b/syft/file/test-fixtures/req-resp/somewhere/outside.txt new file mode 100644 index 000000000..37ad56119 --- /dev/null +++ b/syft/file/test-fixtures/req-resp/somewhere/outside.txt @@ -0,0 +1 @@ +file-2 diff --git a/syft/formats/common/cyclonedxhelpers/decoder.go b/syft/formats/common/cyclonedxhelpers/decoder.go index ef81bf999..cb4c9974e 100644 --- a/syft/formats/common/cyclonedxhelpers/decoder.go +++ b/syft/formats/common/cyclonedxhelpers/decoder.go @@ -229,32 +229,34 @@ func collectRelationships(bom *cyclonedx.BOM, s *sbom.SBOM, idMap map[string]int } } -func extractComponents(meta *cyclonedx.Metadata) source.Metadata { +func extractComponents(meta *cyclonedx.Metadata) source.Description { if meta == nil || meta.Component == nil { - return source.Metadata{} + return source.Description{} } c := meta.Component - image := source.ImageMetadata{ - UserInput: c.Name, - ID: c.BOMRef, - ManifestDigest: c.Version, - } - switch c.Type { case cyclonedx.ComponentTypeContainer: - return source.Metadata{ - Scheme: source.ImageScheme, - ImageMetadata: image, + return source.Description{ + ID: "", + // TODO: can we decode alias name-version somehow? (it isn't be encoded in the first place yet) + + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: c.Name, + ID: c.BOMRef, + ManifestDigest: c.Version, + }, } case cyclonedx.ComponentTypeFile: - return source.Metadata{ - Scheme: source.FileScheme, // or source.DirectoryScheme - Path: c.Name, - ImageMetadata: image, + // TODO: can we decode alias name-version somehow? (it isn't be encoded in the first place yet) + + // TODO: this is lossy... we can't know if this is a file or a directory + return source.Description{ + ID: "", + Metadata: source.FileSourceMetadata{Path: c.Name}, } } - return source.Metadata{} + return source.Description{} } // if there is more than one tool in meta.Tools' list the last item will be used diff --git a/syft/formats/common/cyclonedxhelpers/format.go b/syft/formats/common/cyclonedxhelpers/format.go index 2facf558d..34ca35094 100644 --- a/syft/formats/common/cyclonedxhelpers/format.go +++ b/syft/formats/common/cyclonedxhelpers/format.go @@ -110,7 +110,7 @@ func formatCPE(cpeString string) string { } // NewBomDescriptor returns a new BomDescriptor tailored for the current time and "syft" tool details. -func toBomDescriptor(name, version string, srcMetadata source.Metadata) *cyclonedx.Metadata { +func toBomDescriptor(name, version string, srcMetadata source.Description) *cyclonedx.Metadata { return &cyclonedx.Metadata{ Timestamp: time.Now().Format(time.RFC3339), Tools: &[]cyclonedx.Tool{ @@ -170,35 +170,56 @@ func toDependencies(relationships []artifact.Relationship) []cyclonedx.Dependenc return result } -func toBomDescriptorComponent(srcMetadata source.Metadata) *cyclonedx.Component { +func toBomDescriptorComponent(srcMetadata source.Description) *cyclonedx.Component { name := srcMetadata.Name - switch srcMetadata.Scheme { - case source.ImageScheme: + version := srcMetadata.Version + switch metadata := srcMetadata.Metadata.(type) { + case source.StereoscopeImageSourceMetadata: if name == "" { - name = srcMetadata.ImageMetadata.UserInput + name = metadata.UserInput } - bomRef, err := artifact.IDByHash(srcMetadata.ImageMetadata.ID) + if version == "" { + version = metadata.ManifestDigest + } + bomRef, err := artifact.IDByHash(metadata.ID) if err != nil { - log.Warnf("unable to get fingerprint of image metadata=%s: %+v", srcMetadata.ImageMetadata.ID, err) + log.Warnf("unable to get fingerprint of source image metadata=%s: %+v", metadata.ID, err) } return &cyclonedx.Component{ BOMRef: string(bomRef), Type: cyclonedx.ComponentTypeContainer, Name: name, - Version: srcMetadata.ImageMetadata.ManifestDigest, + Version: version, } - case source.DirectoryScheme, source.FileScheme: + case source.DirectorySourceMetadata: if name == "" { - name = srcMetadata.Path + name = metadata.Path } - bomRef, err := artifact.IDByHash(srcMetadata.Path) + bomRef, err := artifact.IDByHash(metadata.Path) if err != nil { - log.Warnf("unable to get fingerprint of source metadata path=%s: %+v", srcMetadata.Path, err) + log.Warnf("unable to get fingerprint of source directory metadata path=%s: %+v", metadata.Path, err) } return &cyclonedx.Component{ BOMRef: string(bomRef), - Type: cyclonedx.ComponentTypeFile, - Name: name, + // TODO: this is lossy... we can't know if this is a file or a directory + Type: cyclonedx.ComponentTypeFile, + Name: name, + Version: version, + } + case source.FileSourceMetadata: + if name == "" { + name = metadata.Path + } + bomRef, err := artifact.IDByHash(metadata.Path) + if err != nil { + log.Warnf("unable to get fingerprint of source file metadata path=%s: %+v", metadata.Path, err) + } + return &cyclonedx.Component{ + BOMRef: string(bomRef), + // TODO: this is lossy... we can't know if this is a file or a directory + Type: cyclonedx.ComponentTypeFile, + Name: name, + Version: version, } } diff --git a/syft/formats/common/spdxhelpers/document_name.go b/syft/formats/common/spdxhelpers/document_name.go index 8967117e9..6932f2b4e 100644 --- a/syft/formats/common/spdxhelpers/document_name.go +++ b/syft/formats/common/spdxhelpers/document_name.go @@ -4,16 +4,18 @@ import ( "github.com/anchore/syft/syft/source" ) -func DocumentName(srcMetadata source.Metadata) string { +func DocumentName(srcMetadata source.Description) string { if srcMetadata.Name != "" { return srcMetadata.Name } - switch srcMetadata.Scheme { - case source.ImageScheme: - return srcMetadata.ImageMetadata.UserInput - case source.DirectoryScheme, source.FileScheme: - return srcMetadata.Path + switch metadata := srcMetadata.Metadata.(type) { + case source.StereoscopeImageSourceMetadata: + return metadata.UserInput + case source.DirectorySourceMetadata: + return metadata.Path + case source.FileSourceMetadata: + return metadata.Path default: return "unknown" } diff --git a/syft/formats/common/spdxhelpers/document_name_test.go b/syft/formats/common/spdxhelpers/document_name_test.go index cc1944247..f993f41a6 100644 --- a/syft/formats/common/spdxhelpers/document_name_test.go +++ b/syft/formats/common/spdxhelpers/document_name_test.go @@ -5,31 +5,27 @@ import ( "strings" "testing" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/source" ) func Test_DocumentName(t *testing.T) { - allSchemes := strset.New() - for _, s := range source.AllSchemes { - allSchemes.Add(string(s)) - } - testedSchemes := strset.New() + + tracker := sourcemetadata.NewCompletionTester(t) tests := []struct { name string inputName string - srcMetadata source.Metadata + srcMetadata source.Description expected string }{ { name: "image", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + srcMetadata: source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "image-repo/name:tag", ID: "id", ManifestDigest: "digest", @@ -40,18 +36,16 @@ func Test_DocumentName(t *testing.T) { { name: "directory", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.DirectoryScheme, - Path: "some/path/to/place", + srcMetadata: source.Description{ + Metadata: source.DirectorySourceMetadata{Path: "some/path/to/place"}, }, expected: "some/path/to/place", }, { name: "file", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.FileScheme, - Path: "some/path/to/place", + srcMetadata: source.Description{ + Metadata: source.FileSourceMetadata{Path: "some/path/to/place"}, }, expected: "some/path/to/place", }, @@ -62,10 +56,7 @@ func Test_DocumentName(t *testing.T) { assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual name %q", actual)) // track each scheme tested (passed or not) - testedSchemes.Add(string(test.srcMetadata.Scheme)) + tracker.Tested(t, test.srcMetadata.Metadata) }) } - - // assert all possible schemes were under test - assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test") } diff --git a/syft/formats/common/spdxhelpers/document_namespace.go b/syft/formats/common/spdxhelpers/document_namespace.go index c2a2bd129..3b6d30b69 100644 --- a/syft/formats/common/spdxhelpers/document_namespace.go +++ b/syft/formats/common/spdxhelpers/document_namespace.go @@ -18,20 +18,20 @@ const ( inputFile = "file" ) -func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string) { - name := DocumentName(srcMetadata) - return name, DocumentNamespace(name, srcMetadata) +func DocumentNameAndNamespace(src source.Description) (string, string) { + name := DocumentName(src) + return name, DocumentNamespace(name, src) } -func DocumentNamespace(name string, srcMetadata source.Metadata) string { +func DocumentNamespace(name string, src source.Description) string { name = cleanName(name) input := "unknown-source-type" - switch srcMetadata.Scheme { - case source.ImageScheme: + switch src.Metadata.(type) { + case source.StereoscopeImageSourceMetadata: input = inputImage - case source.DirectoryScheme: + case source.DirectorySourceMetadata: input = inputDirectory - case source.FileScheme: + case source.FileSourceMetadata: input = inputFile } diff --git a/syft/formats/common/spdxhelpers/document_namespace_test.go b/syft/formats/common/spdxhelpers/document_namespace_test.go index 545227664..00bed3536 100644 --- a/syft/formats/common/spdxhelpers/document_namespace_test.go +++ b/syft/formats/common/spdxhelpers/document_namespace_test.go @@ -5,31 +5,26 @@ import ( "strings" "testing" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/source" ) func Test_documentNamespace(t *testing.T) { - allSchemes := strset.New() - for _, s := range source.AllSchemes { - allSchemes.Add(string(s)) - } - testedSchemes := strset.New() + tracker := sourcemetadata.NewCompletionTester(t) tests := []struct { - name string - inputName string - srcMetadata source.Metadata - expected string + name string + inputName string + src source.Description + expected string }{ { name: "image", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + src: source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "image-repo/name:tag", ID: "id", ManifestDigest: "digest", @@ -40,33 +35,32 @@ func Test_documentNamespace(t *testing.T) { { name: "directory", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.DirectoryScheme, - Path: "some/path/to/place", + src: source.Description{ + Metadata: source.DirectorySourceMetadata{ + Path: "some/path/to/place", + }, }, expected: "https://anchore.com/syft/dir/my-name-", }, { name: "file", inputName: "my-name", - srcMetadata: source.Metadata{ - Scheme: source.FileScheme, - Path: "some/path/to/place", + src: source.Description{ + Metadata: source.FileSourceMetadata{ + Path: "some/path/to/place", + }, }, expected: "https://anchore.com/syft/file/my-name-", }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual := DocumentNamespace(test.inputName, test.srcMetadata) + actual := DocumentNamespace(test.inputName, test.src) // note: since the namespace ends with a UUID we check the prefix assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual namespace %q", actual)) // track each scheme tested (passed or not) - testedSchemes.Add(string(test.srcMetadata.Scheme)) + tracker.Tested(t, test.src.Metadata) }) } - - // assert all possible schemes were under test - assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test") } diff --git a/syft/formats/common/spdxhelpers/to_syft_model.go b/syft/formats/common/spdxhelpers/to_syft_model.go index fd34541df..54ecd1455 100644 --- a/syft/formats/common/spdxhelpers/to_syft_model.go +++ b/syft/formats/common/spdxhelpers/to_syft_model.go @@ -28,8 +28,7 @@ func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { spdxIDMap := make(map[string]interface{}) - src := source.Metadata{Scheme: source.UnknownScheme} - src.Scheme = extractSchemeFromNamespace(doc.DocumentNamespace) + src := extractSourceFromNamespace(doc.DocumentNamespace) s := &sbom.SBOM{ Source: src, @@ -54,24 +53,32 @@ func ToSyftModel(doc *spdx.Document) (*sbom.SBOM, error) { // image, directory, for example. This is our best effort to determine // the scheme. Syft-generated SBOMs have in the namespace // field a type encoded, which we try to identify here. -func extractSchemeFromNamespace(ns string) source.Scheme { +func extractSourceFromNamespace(ns string) source.Description { u, err := url.Parse(ns) if err != nil { - return source.UnknownScheme + return source.Description{ + Metadata: nil, + } } parts := strings.Split(u.Path, "/") for _, p := range parts { switch p { case inputFile: - return source.FileScheme + return source.Description{ + Metadata: source.FileSourceMetadata{}, + } case inputImage: - return source.ImageScheme + return source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{}, + } case inputDirectory: - return source.DirectoryScheme + return source.Description{ + Metadata: source.DirectorySourceMetadata{}, + } } } - return source.UnknownScheme + return source.Description{} } func findLinuxReleaseByPURL(doc *spdx.Document) *linux.Release { diff --git a/syft/formats/common/spdxhelpers/to_syft_model_test.go b/syft/formats/common/spdxhelpers/to_syft_model_test.go index e4a98f5ab..6c9cf7c8f 100644 --- a/syft/formats/common/spdxhelpers/to_syft_model_test.go +++ b/syft/formats/common/spdxhelpers/to_syft_model_test.go @@ -1,6 +1,7 @@ package spdxhelpers import ( + "reflect" "testing" "github.com/spdx/tools-golang/spdx" @@ -197,36 +198,46 @@ func Test_extractMetadata(t *testing.T) { func TestExtractSourceFromNamespaces(t *testing.T) { tests := []struct { namespace string - expected source.Scheme + expected any }{ { namespace: "https://anchore.com/syft/file/d42b01d0-7325-409b-b03f-74082935c4d3", - expected: source.FileScheme, + expected: source.FileSourceMetadata{}, }, { namespace: "https://anchore.com/syft/image/d42b01d0-7325-409b-b03f-74082935c4d3", - expected: source.ImageScheme, + expected: source.StereoscopeImageSourceMetadata{}, }, { namespace: "https://anchore.com/syft/dir/d42b01d0-7325-409b-b03f-74082935c4d3", - expected: source.DirectoryScheme, + expected: source.DirectorySourceMetadata{}, }, { namespace: "https://another-host/blob/123", - expected: source.UnknownScheme, + expected: nil, }, { namespace: "bla bla", - expected: source.UnknownScheme, + expected: nil, }, { namespace: "", - expected: source.UnknownScheme, + expected: nil, }, } for _, tt := range tests { - require.Equal(t, tt.expected, extractSchemeFromNamespace(tt.namespace)) + desc := extractSourceFromNamespace(tt.namespace) + if tt.expected == nil && desc.Metadata == nil { + return + } + if tt.expected != nil && desc.Metadata == nil { + t.Fatal("expected metadata but got nil") + } + if tt.expected == nil && desc.Metadata != nil { + t.Fatal("expected nil metadata but got something") + } + require.Equal(t, reflect.TypeOf(tt.expected), reflect.TypeOf(desc.Metadata)) } } diff --git a/syft/formats/cyclonedxjson/encoder_test.go b/syft/formats/cyclonedxjson/encoder_test.go index 2159fca5e..dec29164e 100644 --- a/syft/formats/cyclonedxjson/encoder_test.go +++ b/syft/formats/cyclonedxjson/encoder_test.go @@ -2,49 +2,62 @@ package cyclonedxjson import ( "flag" - "regexp" "testing" "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateCycloneDx = flag.Bool("update-cyclonedx", false, "update the *.golden files for cyclone-dx encoders") +var updateSnapshot = flag.Bool("update-cyclonedx-json", false, "update the *.golden files for cyclone-dx JSON encoders") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestCycloneDxDirectoryEncoder(t *testing.T) { + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateCycloneDx, - true, - cycloneDxRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(dir), + }, ) } func TestCycloneDxImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage), - testImage, - *updateCycloneDx, - true, - cycloneDxRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(), + }, ) } -func cycloneDxRedactor(s []byte) []byte { - replacements := map[string]string{ - // UUIDs - `urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`: `urn:uuid:redacted`, - // timestamps - `([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `timestamp:redacted`, - // image hashes - `sha256:[A-Fa-f0-9]{64}`: `sha256:redacted`, - // bom-refs - `"bom-ref":\s*"[^"]+"`: `"bom-ref": "redacted"`, - } - for pattern, replacement := range replacements { - s = regexp.MustCompile(pattern).ReplaceAll(s, []byte(replacement)) - } - return s +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...). + WithPatternRedactors( + map[string]string{ + // UUIDs + `urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}`: `urn:uuid:redacted`, + + // timestamps + `([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `timestamp:redacted`, + + // image hashes + `sha256:[A-Fa-f0-9]{64}`: `sha256:redacted`, + + // BOM refs + `"bom-ref":\s*"[^"]+"`: `"bom-ref":"redacted"`, + }, + ) } diff --git a/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index 3cab5be0e..77ff36dba 100644 --- a/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -2,10 +2,10 @@ "$schema": "http://cyclonedx.org/schema/bom-1.4.schema.json", "bomFormat": "CycloneDX", "specVersion": "1.4", - "serialNumber": "urn:uuid:1b71a5b4-4bc5-4548-a51a-212e631976cd", + "serialNumber": "urn:uuid:redacted", "version": 1, "metadata": { - "timestamp": "2023-05-08T14:40:32-04:00", + "timestamp": "timestamp:redacted", "tools": [ { "vendor": "anchore", @@ -14,14 +14,14 @@ } ], "component": { - "bom-ref": "163686ac6e30c752", + "bom-ref":"redacted", "type": "file", - "name": "/some/path" + "name": "some/path" } }, "components": [ { - "bom-ref": "8c7e1242588c971a", + "bom-ref":"redacted", "type": "library", "name": "package-1", "version": "1.0.1", @@ -58,7 +58,7 @@ ] }, { - "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=db4abfe497c180d3", + "bom-ref":"redacted", "type": "library", "name": "package-2", "version": "2.0.1", diff --git a/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 4d4765f54..ac4799123 100644 --- a/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/formats/cyclonedxjson/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -2,10 +2,10 @@ "$schema": "http://cyclonedx.org/schema/bom-1.4.schema.json", "bomFormat": "CycloneDX", "specVersion": "1.4", - "serialNumber": "urn:uuid:1695d6ae-0ddf-4e77-9c9d-74df1bdd8d5b", + "serialNumber": "urn:uuid:redacted", "version": 1, "metadata": { - "timestamp": "2023-05-08T14:40:32-04:00", + "timestamp": "timestamp:redacted", "tools": [ { "vendor": "anchore", @@ -14,15 +14,15 @@ } ], "component": { - "bom-ref": "38160ebc2a6876e8", + "bom-ref":"redacted", "type": "container", "name": "user-image-input", - "version": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368" + "version": "sha256:redacted" } }, "components": [ { - "bom-ref": "ec2e0c93617507ef", + "bom-ref":"redacted", "type": "library", "name": "package-1", "version": "1.0.1", @@ -54,7 +54,7 @@ }, { "name": "syft:location:0:layerID", - "value": "sha256:ab62016f9bec7286af65604081564cadeeb364a48faca2346c3f5a5a1f5ef777" + "value": "sha256:redacted" }, { "name": "syft:location:0:path", @@ -63,7 +63,7 @@ ] }, { - "bom-ref": "pkg:deb/debian/package-2@2.0.1?package-id=958443e2d9304af4", + "bom-ref":"redacted", "type": "library", "name": "package-2", "version": "2.0.1", @@ -84,7 +84,7 @@ }, { "name": "syft:location:0:layerID", - "value": "sha256:f1803845b6747d94d6e4ecce2331457e5f1c4fb97de5216f392a76f4582f63b2" + "value": "sha256:redacted" }, { "name": "syft:location:0:path", diff --git a/syft/formats/cyclonedxxml/encoder_test.go b/syft/formats/cyclonedxxml/encoder_test.go index 1070f44ca..5b8781aff 100644 --- a/syft/formats/cyclonedxxml/encoder_test.go +++ b/syft/formats/cyclonedxxml/encoder_test.go @@ -2,51 +2,62 @@ package cyclonedxxml import ( "flag" - "regexp" "testing" "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateCycloneDx = flag.Bool("update-cyclonedx", false, "update the *.golden files for cyclone-dx encoders") +var updateSnapshot = flag.Bool("update-cyclonedx-xml", false, "update the *.golden files for cyclone-dx XML encoders") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestCycloneDxDirectoryEncoder(t *testing.T) { + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateCycloneDx, - false, - cycloneDxRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(dir), + }, ) } func TestCycloneDxImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage), - testImage, - *updateCycloneDx, - false, - cycloneDxRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(), + }, ) } -func cycloneDxRedactor(s []byte) []byte { - serialPattern := regexp.MustCompile(`serialNumber="[a-zA-Z0-9\-:]+"`) - rfc3339Pattern := regexp.MustCompile(`([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([\+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`) - sha256Pattern := regexp.MustCompile(`sha256:[A-Fa-f0-9]{64}`) +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...). + WithPatternRedactors( + map[string]string{ + // serial numbers + `serialNumber="[a-zA-Z0-9\-:]+`: `serialNumber="redacted`, - for _, pattern := range []*regexp.Regexp{serialPattern, rfc3339Pattern, sha256Pattern} { - s = pattern.ReplaceAll(s, []byte("redacted")) - } + // dates + `([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`: `redacted`, - // the bom-ref will be autogenerated every time, the value here should not be directly tested in snapshot tests - bomRefPattern := regexp.MustCompile(` bom-ref="[a-zA-Z0-9\-:]+"`) - bomRef3339Pattern := regexp.MustCompile(`([0-9]+)-(0[1-9]|1[012])-(0[1-9]|[12][0-9]|3[01])[Tt]([01][0-9]|2[0-3]):([0-5][0-9]):([0-5][0-9]|60)(\.[0-9]+)?(([Zz])|([\+|\-]([01][0-9]|2[0-3]):[0-5][0-9]))`) - for _, pattern := range []*regexp.Regexp{bomRefPattern, bomRef3339Pattern} { - s = pattern.ReplaceAll(s, []byte("")) - } + // image hashes + `sha256:[A-Za-z0-9]{64}`: `sha256:redacted`, - return s + // BOM refs + `bom-ref="[a-zA-Z0-9\-:]+"`: `bom-ref:redacted`, + }, + ) } diff --git a/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden b/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden index 32eaf2747..592072d20 100644 --- a/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden +++ b/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxDirectoryEncoder.golden @@ -1,7 +1,7 @@ - + - 2023-05-08T14:40:52-04:00 + redacted anchore @@ -9,12 +9,12 @@ v0.42.0-bogus - - /some/path + + some/path - + package-1 1.0.1 diff --git a/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden b/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden index 67ad5f052..95701d100 100644 --- a/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden +++ b/syft/formats/cyclonedxxml/test-fixtures/snapshot/TestCycloneDxImageEncoder.golden @@ -1,7 +1,7 @@ - + - 2023-05-08T14:40:52-04:00 + redacted anchore @@ -9,13 +9,13 @@ v0.42.0-bogus - + user-image-input - sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368 + sha256:redacted - + package-1 1.0.1 @@ -30,7 +30,7 @@ python PythonPackageMetadata python - sha256:ab62016f9bec7286af65604081564cadeeb364a48faca2346c3f5a5a1f5ef777 + sha256:redacted /somefile-1.txt @@ -43,7 +43,7 @@ the-cataloger-2 DpkgMetadata deb - sha256:f1803845b6747d94d6e4ecce2331457e5f1c4fb97de5216f392a76f4582f63b2 + sha256:redacted /somefile-2.txt 0 diff --git a/syft/formats/github/encoder.go b/syft/formats/github/encoder.go index e03c7f504..261ff6b18 100644 --- a/syft/formats/github/encoder.go +++ b/syft/formats/github/encoder.go @@ -64,45 +64,6 @@ func filesystem(p pkg.Package) string { return "" } -// isArchive returns true if the path appears to be an archive -func isArchive(path string) bool { - _, err := archiver.ByExtension(path) - return err == nil -} - -// toPath Generates a string representation of the package location, optionally including the layer hash -func toPath(s source.Metadata, p pkg.Package) string { - inputPath := strings.TrimPrefix(s.Path, "./") - if inputPath == "." { - inputPath = "" - } - locations := p.Locations.ToSlice() - if len(locations) > 0 { - location := locations[0] - packagePath := location.RealPath - if location.VirtualPath != "" { - packagePath = location.VirtualPath - } - packagePath = strings.TrimPrefix(packagePath, "/") - switch s.Scheme { - case source.ImageScheme: - image := strings.ReplaceAll(s.ImageMetadata.UserInput, ":/", "//") - return fmt.Sprintf("%s:/%s", image, packagePath) - case source.FileScheme: - if isArchive(inputPath) { - return fmt.Sprintf("%s:/%s", inputPath, packagePath) - } - return inputPath - case source.DirectoryScheme: - if inputPath != "" { - return fmt.Sprintf("%s/%s", inputPath, packagePath) - } - return packagePath - } - } - return fmt.Sprintf("%s%s", inputPath, s.ImageMetadata.UserInput) -} - // toGithubManifests manifests, each of which represents a specific location that has dependencies func toGithubManifests(s *sbom.SBOM) Manifests { manifests := map[string]*Manifest{} @@ -144,6 +105,63 @@ func toGithubManifests(s *sbom.SBOM) Manifests { return out } +// toPath Generates a string representation of the package location, optionally including the layer hash +func toPath(s source.Description, p pkg.Package) string { + inputPath := trimRelative(s.Name) + locations := p.Locations.ToSlice() + if len(locations) > 0 { + location := locations[0] + packagePath := location.RealPath + if location.VirtualPath != "" { + packagePath = location.VirtualPath + } + packagePath = strings.TrimPrefix(packagePath, "/") + switch metadata := s.Metadata.(type) { + case source.StereoscopeImageSourceMetadata: + image := strings.ReplaceAll(metadata.UserInput, ":/", "//") + return fmt.Sprintf("%s:/%s", image, packagePath) + case source.FileSourceMetadata: + path := trimRelative(metadata.Path) + if isArchive(metadata.Path) { + return fmt.Sprintf("%s:/%s", path, packagePath) + } + return path + case source.DirectorySourceMetadata: + path := trimRelative(metadata.Path) + if path != "" { + return fmt.Sprintf("%s/%s", path, packagePath) + } + return packagePath + } + } + return inputPath +} + +func trimRelative(s string) string { + s = strings.TrimPrefix(s, "./") + if s == "." { + s = "" + } + return s +} + +// isArchive returns true if the path appears to be an archive +func isArchive(path string) bool { + _, err := archiver.ByExtension(path) + return err == nil +} + +func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { + for _, r := range s.Relationships { + if r.From.ID() == p.ID() { + if p, ok := r.To.(pkg.Package); ok { + out = append(out, dependencyName(p)) + } + } + } + return +} + // dependencyName to make things a little nicer to read; this might end up being lossy func dependencyName(p pkg.Package) string { purl, err := packageurl.FromString(p.PURL) @@ -171,14 +189,3 @@ func toDependencyMetadata(_ pkg.Package) Metadata { // so we don't need anything here yet return Metadata{} } - -func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { - for _, r := range s.Relationships { - if r.From.ID() == p.ID() { - if p, ok := r.To.(pkg.Package); ok { - out = append(out, dependencyName(p)) - } - } - } - return -} diff --git a/syft/formats/github/encoder_test.go b/syft/formats/github/encoder_test.go index a0770f252..3325509ce 100644 --- a/syft/formats/github/encoder_test.go +++ b/syft/formats/github/encoder_test.go @@ -1,24 +1,25 @@ package github import ( - "encoding/json" "testing" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/stretchr/testify/assert" "github.com/anchore/packageurl-go" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) -func Test_toGithubModel(t *testing.T) { +func sbomFixture() sbom.SBOM { s := sbom.SBOM{ - Source: source.Metadata{ - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + Source: source.Description{ + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "ubuntu:18.04", Architecture: "amd64", }, @@ -75,88 +76,121 @@ func Test_toGithubModel(t *testing.T) { s.Artifacts.Packages.Add(p) } - actual := toGithubModel(&s) + return s +} - expected := DependencySnapshot{ - Version: 0, - Detector: DetectorMetadata{ - Name: "syft", - Version: "0.0.0-dev", - URL: "https://github.com/anchore/syft", - }, - Metadata: Metadata{ - "syft:distro": "pkg:generic/ubuntu@18.04?like=debian", - }, - Scanned: actual.Scanned, - Manifests: Manifests{ - "ubuntu:18.04:/usr/lib": Manifest{ - Name: "ubuntu:18.04:/usr/lib", - File: FileInfo{ - SourceLocation: "ubuntu:18.04:/usr/lib", +func Test_toGithubModel(t *testing.T) { + tracker := sourcemetadata.NewCompletionTester(t) + + tests := []struct { + name string + metadata any + testPath string + expected *DependencySnapshot + }{ + { + name: "image", + expected: &DependencySnapshot{ + Version: 0, + Detector: DetectorMetadata{ + Name: "syft", + Version: "0.0.0-dev", + URL: "https://github.com/anchore/syft", }, Metadata: Metadata{ - "syft:filesystem": "fsid-1", + "syft:distro": "pkg:generic/ubuntu@18.04?like=debian", }, - Resolved: DependencyGraph{ - "pkg:generic/pkg-1@1.0.1": DependencyNode{ - PackageURL: "pkg:generic/pkg-1@1.0.1", - Scope: DependencyScopeRuntime, - Relationship: DependencyRelationshipDirect, + //Scanned: actual.Scanned, + Manifests: Manifests{ + "ubuntu:18.04:/usr/lib": Manifest{ + Name: "ubuntu:18.04:/usr/lib", + File: FileInfo{ + SourceLocation: "ubuntu:18.04:/usr/lib", + }, + Metadata: Metadata{ + "syft:filesystem": "fsid-1", + }, + Resolved: DependencyGraph{ + "pkg:generic/pkg-1@1.0.1": DependencyNode{ + PackageURL: "pkg:generic/pkg-1@1.0.1", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + Metadata: Metadata{}, + }, + "pkg:generic/pkg-2@2.0.2": DependencyNode{ + PackageURL: "pkg:generic/pkg-2@2.0.2", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + Metadata: Metadata{}, + }, + }, }, - "pkg:generic/pkg-2@2.0.2": DependencyNode{ - PackageURL: "pkg:generic/pkg-2@2.0.2", - Scope: DependencyScopeRuntime, - Relationship: DependencyRelationshipDirect, - }, - }, - }, - "ubuntu:18.04:/etc": Manifest{ - Name: "ubuntu:18.04:/etc", - File: FileInfo{ - SourceLocation: "ubuntu:18.04:/etc", - }, - Metadata: Metadata{ - "syft:filesystem": "fsid-1", - }, - Resolved: DependencyGraph{ - "pkg:generic/pkg-3@3.0.3": DependencyNode{ - PackageURL: "pkg:generic/pkg-3@3.0.3", - Scope: DependencyScopeRuntime, - Relationship: DependencyRelationshipDirect, + "ubuntu:18.04:/etc": Manifest{ + Name: "ubuntu:18.04:/etc", + File: FileInfo{ + SourceLocation: "ubuntu:18.04:/etc", + }, + Metadata: Metadata{ + "syft:filesystem": "fsid-1", + }, + Resolved: DependencyGraph{ + "pkg:generic/pkg-3@3.0.3": DependencyNode{ + PackageURL: "pkg:generic/pkg-3@3.0.3", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + Metadata: Metadata{}, + }, + }, }, }, }, }, + { + name: "current directory", + metadata: source.DirectorySourceMetadata{Path: "."}, + testPath: "etc", + }, + { + name: "relative directory", + metadata: source.DirectorySourceMetadata{Path: "./artifacts"}, + testPath: "artifacts/etc", + }, + { + name: "absolute directory", + metadata: source.DirectorySourceMetadata{Path: "/artifacts"}, + testPath: "/artifacts/etc", + }, + { + name: "file", + metadata: source.FileSourceMetadata{Path: "./executable"}, + testPath: "executable", + }, + { + name: "archive", + metadata: source.FileSourceMetadata{Path: "./archive.tar.gz"}, + testPath: "archive.tar.gz:/etc", + }, } - // just using JSONEq because it gives a comprehensible diff - s1, _ := json.Marshal(expected) - s2, _ := json.Marshal(actual) - assert.JSONEq(t, string(s1), string(s2)) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + s := sbomFixture() - // Just test the other schemes: - s.Source.Path = "." - s.Source.Scheme = source.DirectoryScheme - actual = toGithubModel(&s) - assert.Equal(t, "etc", actual.Manifests["etc"].Name) + if test.metadata != nil { + s.Source.Metadata = test.metadata + } + actual := toGithubModel(&s) - s.Source.Path = "./artifacts" - s.Source.Scheme = source.DirectoryScheme - actual = toGithubModel(&s) - assert.Equal(t, "artifacts/etc", actual.Manifests["artifacts/etc"].Name) + if test.expected != nil { + if d := cmp.Diff(*test.expected, actual, cmpopts.IgnoreFields(DependencySnapshot{}, "Scanned")); d != "" { + t.Errorf("unexpected result (-want +got):\n%s", d) + } + } - s.Source.Path = "/artifacts" - s.Source.Scheme = source.DirectoryScheme - actual = toGithubModel(&s) - assert.Equal(t, "/artifacts/etc", actual.Manifests["/artifacts/etc"].Name) + assert.Equal(t, test.testPath, actual.Manifests[test.testPath].Name) - s.Source.Path = "./executable" - s.Source.Scheme = source.FileScheme - actual = toGithubModel(&s) - assert.Equal(t, "executable", actual.Manifests["executable"].Name) - - s.Source.Path = "./archive.tar.gz" - s.Source.Scheme = source.FileScheme - actual = toGithubModel(&s) - assert.Equal(t, "archive.tar.gz:/etc", actual.Manifests["archive.tar.gz:/etc"].Name) + // track each scheme tested (passed or not) + tracker.Tested(t, s.Source.Metadata) + }) + } } diff --git a/syft/formats/internal/testutils/directory_input.go b/syft/formats/internal/testutils/directory_input.go new file mode 100644 index 000000000..232ac9c12 --- /dev/null +++ b/syft/formats/internal/testutils/directory_input.go @@ -0,0 +1,204 @@ +package testutils + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func DirectoryInput(t testing.TB, dir string) sbom.SBOM { + catalog := newDirectoryCatalog() + + path := filepath.Join(dir, "some", "path") + + require.NoError(t, os.MkdirAll(path, 0755)) + + src, err := source.NewFromDirectory( + source.DirectoryConfig{ + Path: path, + Base: dir, + }, + ) + require.NoError(t, err) + + return sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: catalog, + LinuxDistribution: &linux.Release{ + PrettyName: "debian", + Name: "debian", + ID: "debian", + IDLike: []string{"like!"}, + Version: "1.2.3", + VersionID: "1.2.3", + }, + }, + Source: src.Describe(), + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + // the application configuration should be persisted here, however, we do not want to import + // the application configuration in this package (it's reserved only for ingestion by the cmd package) + Configuration: map[string]string{ + "config-key": "config-value", + }, + }, + } +} + +func DirectoryInputWithAuthorField(t testing.TB) sbom.SBOM { + catalog := newDirectoryCatalogWithAuthorField() + + dir := t.TempDir() + path := filepath.Join(dir, "some", "path") + + require.NoError(t, os.MkdirAll(path, 0755)) + + src, err := source.NewFromDirectory( + source.DirectoryConfig{ + Path: path, + Base: dir, + }, + ) + require.NoError(t, err) + + return sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: catalog, + LinuxDistribution: &linux.Release{ + PrettyName: "debian", + Name: "debian", + ID: "debian", + IDLike: []string{"like!"}, + Version: "1.2.3", + VersionID: "1.2.3", + }, + }, + Source: src.Describe(), + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + // the application configuration should be persisted here, however, we do not want to import + // the application configuration in this package (it's reserved only for ingestion by the cmd package) + Configuration: map[string]string{ + "config-key": "config-value", + }, + }, + } +} + +func newDirectoryCatalog() *pkg.Collection { + catalog := pkg.NewCollection() + + // populate catalog with test data + catalog.Add(pkg.Package{ + Name: "package-1", + Version: "1.0.1", + Type: pkg.PythonPkg, + FoundBy: "the-cataloger-1", + Locations: file.NewLocationSet( + file.NewLocation("/some/path/pkg1"), + ), + Language: pkg.Python, + MetadataType: pkg.PythonPackageMetadataType, + Licenses: pkg.NewLicenseSet( + pkg.NewLicense("MIT"), + ), + Metadata: pkg.PythonPackageMetadata{ + Name: "package-1", + Version: "1.0.1", + Files: []pkg.PythonFileRecord{ + { + Path: "/some/path/pkg1/dependencies/foo", + }, + }, + }, + PURL: "a-purl-2", // intentionally a bad pURL for test fixtures + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), + }, + }) + catalog.Add(pkg.Package{ + Name: "package-2", + Version: "2.0.1", + Type: pkg.DebPkg, + FoundBy: "the-cataloger-2", + Locations: file.NewLocationSet( + file.NewLocation("/some/path/pkg1"), + ), + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "package-2", + Version: "2.0.1", + }, + PURL: "pkg:deb/debian/package-2@2.0.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), + }, + }) + + return catalog +} + +func newDirectoryCatalogWithAuthorField() *pkg.Collection { + catalog := pkg.NewCollection() + + // populate catalog with test data + catalog.Add(pkg.Package{ + Name: "package-1", + Version: "1.0.1", + Type: pkg.PythonPkg, + FoundBy: "the-cataloger-1", + Locations: file.NewLocationSet( + file.NewLocation("/some/path/pkg1"), + ), + Language: pkg.Python, + MetadataType: pkg.PythonPackageMetadataType, + Licenses: pkg.NewLicenseSet( + pkg.NewLicense("MIT"), + ), + Metadata: pkg.PythonPackageMetadata{ + Name: "package-1", + Version: "1.0.1", + Author: "test-author", + Files: []pkg.PythonFileRecord{ + { + Path: "/some/path/pkg1/dependencies/foo", + }, + }, + }, + PURL: "a-purl-2", // intentionally a bad pURL for test fixtures + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), + }, + }) + catalog.Add(pkg.Package{ + Name: "package-2", + Version: "2.0.1", + Type: pkg.DebPkg, + FoundBy: "the-cataloger-2", + Locations: file.NewLocationSet( + file.NewLocation("/some/path/pkg1"), + ), + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "package-2", + Version: "2.0.1", + }, + PURL: "pkg:deb/debian/package-2@2.0.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), + }, + }) + + return catalog +} diff --git a/syft/formats/internal/testutils/file_relationships.go b/syft/formats/internal/testutils/file_relationships.go new file mode 100644 index 000000000..39ab8211f --- /dev/null +++ b/syft/formats/internal/testutils/file_relationships.go @@ -0,0 +1,32 @@ +package testutils + +import ( + "math/rand" + "time" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/sbom" +) + +//nolint:gosec +func AddSampleFileRelationships(s *sbom.SBOM) { + catalog := s.Artifacts.Packages.Sorted() + s.Artifacts.FileMetadata = map[file.Coordinates]file.Metadata{} + + files := []string{"/f1", "/f2", "/d1/f3", "/d2/f4", "/z1/f5", "/a1/f6"} + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + rnd.Shuffle(len(files), func(i, j int) { files[i], files[j] = files[j], files[i] }) + + for _, f := range files { + meta := file.Metadata{} + coords := file.Coordinates{RealPath: f} + s.Artifacts.FileMetadata[coords] = meta + + s.Relationships = append(s.Relationships, artifact.Relationship{ + From: catalog[0], + To: coords, + Type: artifact.ContainsRelationship, + }) + } +} diff --git a/syft/formats/internal/testutils/image_input.go b/syft/formats/internal/testutils/image_input.go new file mode 100644 index 000000000..e53bc5f7d --- /dev/null +++ b/syft/formats/internal/testutils/image_input.go @@ -0,0 +1,113 @@ +package testutils + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/stereoscope/pkg/filetree" + "github.com/anchore/stereoscope/pkg/image" + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/cpe" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func ImageInput(t testing.TB, testImage string, options ...ImageOption) sbom.SBOM { + t.Helper() + catalog := pkg.NewCollection() + var cfg imageCfg + var img *image.Image + for _, opt := range options { + opt(&cfg) + } + + switch cfg.fromSnapshot { + case true: + img = imagetest.GetGoldenFixtureImage(t, testImage) + default: + img = imagetest.GetFixtureImage(t, "docker-archive", testImage) + } + + populateImageCatalog(catalog, img) + + // this is a hard coded value that is not given by the fixture helper and must be provided manually + img.Metadata.ManifestDigest = "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368" + + src, err := source.NewFromStereoscopeImageObject(img, "user-image-input", nil) + assert.NoError(t, err) + + return sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: catalog, + LinuxDistribution: &linux.Release{ + PrettyName: "debian", + Name: "debian", + ID: "debian", + IDLike: []string{"like!"}, + Version: "1.2.3", + VersionID: "1.2.3", + }, + }, + Source: src.Describe(), + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + // the application configuration should be persisted here, however, we do not want to import + // the application configuration in this package (it's reserved only for ingestion by the cmd package) + Configuration: map[string]string{ + "config-key": "config-value", + }, + }, + } +} + +func populateImageCatalog(catalog *pkg.Collection, img *image.Image) { + _, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks) + _, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks) + + // populate catalog with test data + catalog.Add(pkg.Package{ + Name: "package-1", + Version: "1.0.1", + Locations: file.NewLocationSet( + file.NewLocationFromImage(string(ref1.RealPath), *ref1.Reference, img), + ), + Type: pkg.PythonPkg, + FoundBy: "the-cataloger-1", + Language: pkg.Python, + MetadataType: pkg.PythonPackageMetadataType, + Licenses: pkg.NewLicenseSet( + pkg.NewLicense("MIT"), + ), + Metadata: pkg.PythonPackageMetadata{ + Name: "package-1", + Version: "1.0.1", + }, + PURL: "a-purl-1", // intentionally a bad pURL for test fixtures + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:1:*:*:*:*:*:*:*"), + }, + }) + catalog.Add(pkg.Package{ + Name: "package-2", + Version: "2.0.1", + Locations: file.NewLocationSet( + file.NewLocationFromImage(string(ref2.RealPath), *ref2.Reference, img), + ), + Type: pkg.DebPkg, + FoundBy: "the-cataloger-2", + MetadataType: pkg.DpkgMetadataType, + Metadata: pkg.DpkgMetadata{ + Package: "package-2", + Version: "2.0.1", + }, + PURL: "pkg:deb/debian/package-2@2.0.1", + CPEs: []cpe.CPE{ + cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), + }, + }) +} diff --git a/syft/formats/internal/testutils/redactor.go b/syft/formats/internal/testutils/redactor.go new file mode 100644 index 000000000..0c5505d1b --- /dev/null +++ b/syft/formats/internal/testutils/redactor.go @@ -0,0 +1,142 @@ +package testutils + +import ( + "bytes" + "regexp" +) + +var ( + _ Redactor = (*RedactorFn)(nil) + _ Redactor = (*PatternReplacement)(nil) + _ Redactor = (*ValueReplacement)(nil) + _ Redactor = (*Redactions)(nil) +) + +type Redactor interface { + Redact([]byte) []byte +} + +// Replace by function ////////////////////////////// + +type RedactorFn func([]byte) []byte + +func (r RedactorFn) Redact(b []byte) []byte { + return r(b) +} + +// Replace by regex ////////////////////////////// + +type PatternReplacement struct { + Search *regexp.Regexp + Replace string +} + +func NewPatternReplacement(r *regexp.Regexp) PatternReplacement { + return PatternReplacement{ + Search: r, + Replace: "redacted", + } +} + +func (p PatternReplacement) Redact(b []byte) []byte { + return p.Search.ReplaceAll(b, []byte(p.Replace)) +} + +// Replace by value ////////////////////////////// + +type ValueReplacement struct { + Search string + Replace string +} + +func NewValueReplacement(v string) ValueReplacement { + return ValueReplacement{ + Search: v, + Replace: "redacted", + } +} + +func (v ValueReplacement) Redact(b []byte) []byte { + return bytes.ReplaceAll(b, []byte(v.Search), []byte(v.Replace)) +} + +// Handle a collection of redactors ////////////////////////////// + +type Redactions struct { + redactors []Redactor +} + +func NewRedactions(redactors ...Redactor) *Redactions { + r := &Redactions{ + redactors: redactors, + } + + return r.WithFunctions(carriageRedactor) +} + +func (r *Redactions) WithPatternRedactors(values map[string]string) *Redactions { + for k, v := range values { + r.redactors = append(r.redactors, + PatternReplacement{ + Search: regexp.MustCompile(k), + Replace: v, + }, + ) + } + return r +} + +func (r *Redactions) WithValueRedactors(values map[string]string) *Redactions { + for k, v := range values { + r.redactors = append(r.redactors, + ValueReplacement{ + Search: k, + Replace: v, + }, + ) + } + return r +} + +func (r *Redactions) WithPatternsRedacted(values ...string) *Redactions { + for _, pattern := range values { + r.redactors = append(r.redactors, + NewPatternReplacement(regexp.MustCompile(pattern)), + ) + } + return r +} + +func (r *Redactions) WithValuesRedacted(values ...string) *Redactions { + for _, v := range values { + r.redactors = append(r.redactors, + NewValueReplacement(v), + ) + } + return r +} + +func (r *Redactions) WithFunctions(values ...func([]byte) []byte) *Redactions { + for _, fn := range values { + r.redactors = append(r.redactors, + RedactorFn(fn), + ) + } + return r +} + +func (r *Redactions) WithRedactors(rs ...Redactor) *Redactions { + r.redactors = append(r.redactors, rs...) + return r +} + +func (r Redactions) Redact(b []byte) []byte { + for _, redactor := range r.redactors { + b = redactor.Redact(b) + } + return b +} + +func carriageRedactor(s []byte) []byte { + return bytes.ReplaceAll(s, []byte("\r\n"), []byte("\n")) +} diff --git a/syft/formats/internal/testutils/snapshot.go b/syft/formats/internal/testutils/snapshot.go new file mode 100644 index 000000000..7eae36594 --- /dev/null +++ b/syft/formats/internal/testutils/snapshot.go @@ -0,0 +1,88 @@ +package testutils + +import ( + "bytes" + "testing" + + "github.com/sergi/go-diff/diffmatchpatch" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/go-testutils" + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/sbom" +) + +type imageCfg struct { + fromSnapshot bool +} + +type ImageOption func(cfg *imageCfg) + +func FromSnapshot() ImageOption { + return func(cfg *imageCfg) { + cfg.fromSnapshot = true + } +} + +type EncoderSnapshotTestConfig struct { + Subject sbom.SBOM + Format sbom.Format + UpdateSnapshot bool + PersistRedactionsInSnapshot bool + IsJSON bool + Redactor Redactor +} + +func AssertEncoderAgainstGoldenSnapshot(t *testing.T, cfg EncoderSnapshotTestConfig) { + t.Helper() + var buffer bytes.Buffer + + err := cfg.Format.Encode(&buffer, cfg.Subject) + assert.NoError(t, err) + actual := buffer.Bytes() + + if cfg.UpdateSnapshot && !cfg.PersistRedactionsInSnapshot { + // replace the expected snapshot contents with the current (unredacted) encoder contents + testutils.UpdateGoldenFileContents(t, actual) + return + } + + var expected []byte + if cfg.Redactor != nil { + actual = cfg.Redactor.Redact(actual) + expected = cfg.Redactor.Redact(testutils.GetGoldenFileContents(t)) + } else { + expected = testutils.GetGoldenFileContents(t) + } + + if cfg.UpdateSnapshot && cfg.PersistRedactionsInSnapshot { + // replace the expected snapshot contents with the current (redacted) encoder contents + testutils.UpdateGoldenFileContents(t, actual) + return + } + + if cfg.IsJSON { + require.JSONEq(t, string(expected), string(actual)) + } else if !bytes.Equal(expected, actual) { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(string(expected), string(actual), true) + t.Logf("len: %d\nexpected: %s", len(expected), expected) + t.Logf("len: %d\nactual: %s", len(actual), actual) + t.Errorf("mismatched output:\n%s", dmp.DiffPrettyText(diffs)) + } +} + +type ImageSnapshotTestConfig struct { + Image string + UpdateImageSnapshot bool +} + +func AssertEncoderAgainstGoldenImageSnapshot(t *testing.T, imgCfg ImageSnapshotTestConfig, cfg EncoderSnapshotTestConfig) { + if imgCfg.UpdateImageSnapshot { + // grab the latest image contents and persist + imagetest.UpdateGoldenFixtureImage(t, imgCfg.Image) + } + + AssertEncoderAgainstGoldenSnapshot(t, cfg) +} diff --git a/syft/formats/internal/testutils/utils.go b/syft/formats/internal/testutils/utils.go deleted file mode 100644 index f9f4941d4..000000000 --- a/syft/formats/internal/testutils/utils.go +++ /dev/null @@ -1,396 +0,0 @@ -package testutils - -import ( - "bytes" - "math/rand" - "strings" - "testing" - "time" - - "github.com/sergi/go-diff/diffmatchpatch" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/anchore/go-testutils" - "github.com/anchore/stereoscope/pkg/filetree" - "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/cpe" - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/linux" - "github.com/anchore/syft/syft/pkg" - "github.com/anchore/syft/syft/sbom" - "github.com/anchore/syft/syft/source" -) - -type redactor func(s []byte) []byte - -type imageCfg struct { - fromSnapshot bool -} - -type ImageOption func(cfg *imageCfg) - -func FromSnapshot() ImageOption { - return func(cfg *imageCfg) { - cfg.fromSnapshot = true - } -} - -func AssertEncoderAgainstGoldenImageSnapshot(t *testing.T, format sbom.Format, sbom sbom.SBOM, testImage string, updateSnapshot bool, json bool, redactors ...redactor) { - var buffer bytes.Buffer - - // grab the latest image contents and persist - if updateSnapshot { - imagetest.UpdateGoldenFixtureImage(t, testImage) - } - - err := format.Encode(&buffer, sbom) - assert.NoError(t, err) - actual := buffer.Bytes() - - // replace the expected snapshot contents with the current encoder contents - if updateSnapshot { - testutils.UpdateGoldenFileContents(t, actual) - } - - actual = redact(actual, redactors...) - expected := redact(testutils.GetGoldenFileContents(t), redactors...) - - if json { - require.JSONEq(t, string(expected), string(actual)) - } else if !bytes.Equal(expected, actual) { - // assert that the golden file snapshot matches the actual contents - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(string(expected), string(actual), true) - t.Errorf("mismatched output:\n%s", dmp.DiffPrettyText(diffs)) - } -} - -func AssertEncoderAgainstGoldenSnapshot(t *testing.T, format sbom.Format, sbom sbom.SBOM, updateSnapshot bool, json bool, redactors ...redactor) { - var buffer bytes.Buffer - - err := format.Encode(&buffer, sbom) - assert.NoError(t, err) - actual := buffer.Bytes() - - // replace the expected snapshot contents with the current encoder contents - if updateSnapshot { - testutils.UpdateGoldenFileContents(t, actual) - } - - actual = redact(actual, redactors...) - expected := redact(testutils.GetGoldenFileContents(t), redactors...) - - if json { - require.JSONEq(t, string(expected), string(actual)) - } else if !bytes.Equal(expected, actual) { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(string(expected), string(actual), true) - t.Logf("len: %d\nexpected: %s", len(expected), expected) - t.Logf("len: %d\nactual: %s", len(actual), actual) - t.Errorf("mismatched output:\n%s", dmp.DiffPrettyText(diffs)) - } -} - -func ImageInput(t testing.TB, testImage string, options ...ImageOption) sbom.SBOM { - t.Helper() - catalog := pkg.NewCollection() - var cfg imageCfg - var img *image.Image - for _, opt := range options { - opt(&cfg) - } - - switch cfg.fromSnapshot { - case true: - img = imagetest.GetGoldenFixtureImage(t, testImage) - default: - img = imagetest.GetFixtureImage(t, "docker-archive", testImage) - } - - populateImageCatalog(catalog, img) - - // this is a hard coded value that is not given by the fixture helper and must be provided manually - img.Metadata.ManifestDigest = "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368" - - src, err := source.NewFromImage(img, "user-image-input") - assert.NoError(t, err) - - return sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: catalog, - LinuxDistribution: &linux.Release{ - PrettyName: "debian", - Name: "debian", - ID: "debian", - IDLike: []string{"like!"}, - Version: "1.2.3", - VersionID: "1.2.3", - }, - }, - Source: src.Metadata, - Descriptor: sbom.Descriptor{ - Name: "syft", - Version: "v0.42.0-bogus", - // the application configuration should be persisted here, however, we do not want to import - // the application configuration in this package (it's reserved only for ingestion by the cmd package) - Configuration: map[string]string{ - "config-key": "config-value", - }, - }, - } -} - -func carriageRedactor(s []byte) []byte { - msg := strings.ReplaceAll(string(s), "\r\n", "\n") - return []byte(msg) -} - -func populateImageCatalog(catalog *pkg.Collection, img *image.Image) { - _, ref1, _ := img.SquashedTree().File("/somefile-1.txt", filetree.FollowBasenameLinks) - _, ref2, _ := img.SquashedTree().File("/somefile-2.txt", filetree.FollowBasenameLinks) - - // populate catalog with test data - catalog.Add(pkg.Package{ - Name: "package-1", - Version: "1.0.1", - Locations: file.NewLocationSet( - file.NewLocationFromImage(string(ref1.RealPath), *ref1.Reference, img), - ), - Type: pkg.PythonPkg, - FoundBy: "the-cataloger-1", - Language: pkg.Python, - MetadataType: pkg.PythonPackageMetadataType, - Licenses: pkg.NewLicenseSet( - pkg.NewLicense("MIT"), - ), - Metadata: pkg.PythonPackageMetadata{ - Name: "package-1", - Version: "1.0.1", - }, - PURL: "a-purl-1", // intentionally a bad pURL for test fixtures - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:1:*:*:*:*:*:*:*"), - }, - }) - catalog.Add(pkg.Package{ - Name: "package-2", - Version: "2.0.1", - Locations: file.NewLocationSet( - file.NewLocationFromImage(string(ref2.RealPath), *ref2.Reference, img), - ), - Type: pkg.DebPkg, - FoundBy: "the-cataloger-2", - MetadataType: pkg.DpkgMetadataType, - Metadata: pkg.DpkgMetadata{ - Package: "package-2", - Version: "2.0.1", - }, - PURL: "pkg:deb/debian/package-2@2.0.1", - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), - }, - }) -} - -func DirectoryInput(t testing.TB) sbom.SBOM { - catalog := newDirectoryCatalog() - - src, err := source.NewFromDirectory("/some/path") - assert.NoError(t, err) - - return sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: catalog, - LinuxDistribution: &linux.Release{ - PrettyName: "debian", - Name: "debian", - ID: "debian", - IDLike: []string{"like!"}, - Version: "1.2.3", - VersionID: "1.2.3", - }, - }, - Source: src.Metadata, - Descriptor: sbom.Descriptor{ - Name: "syft", - Version: "v0.42.0-bogus", - // the application configuration should be persisted here, however, we do not want to import - // the application configuration in this package (it's reserved only for ingestion by the cmd package) - Configuration: map[string]string{ - "config-key": "config-value", - }, - }, - } -} - -func DirectoryInputWithAuthorField(t testing.TB) sbom.SBOM { - catalog := newDirectoryCatalogWithAuthorField() - - src, err := source.NewFromDirectory("/some/path") - assert.NoError(t, err) - - return sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: catalog, - LinuxDistribution: &linux.Release{ - PrettyName: "debian", - Name: "debian", - ID: "debian", - IDLike: []string{"like!"}, - Version: "1.2.3", - VersionID: "1.2.3", - }, - }, - Source: src.Metadata, - Descriptor: sbom.Descriptor{ - Name: "syft", - Version: "v0.42.0-bogus", - // the application configuration should be persisted here, however, we do not want to import - // the application configuration in this package (it's reserved only for ingestion by the cmd package) - Configuration: map[string]string{ - "config-key": "config-value", - }, - }, - } -} - -func newDirectoryCatalog() *pkg.Collection { - catalog := pkg.NewCollection() - - // populate catalog with test data - catalog.Add(pkg.Package{ - Name: "package-1", - Version: "1.0.1", - Type: pkg.PythonPkg, - FoundBy: "the-cataloger-1", - Locations: file.NewLocationSet( - file.NewLocation("/some/path/pkg1"), - ), - Language: pkg.Python, - MetadataType: pkg.PythonPackageMetadataType, - Licenses: pkg.NewLicenseSet( - pkg.NewLicense("MIT"), - ), - Metadata: pkg.PythonPackageMetadata{ - Name: "package-1", - Version: "1.0.1", - Files: []pkg.PythonFileRecord{ - { - Path: "/some/path/pkg1/dependencies/foo", - }, - }, - }, - PURL: "a-purl-2", // intentionally a bad pURL for test fixtures - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), - }, - }) - catalog.Add(pkg.Package{ - Name: "package-2", - Version: "2.0.1", - Type: pkg.DebPkg, - FoundBy: "the-cataloger-2", - Locations: file.NewLocationSet( - file.NewLocation("/some/path/pkg1"), - ), - MetadataType: pkg.DpkgMetadataType, - Metadata: pkg.DpkgMetadata{ - Package: "package-2", - Version: "2.0.1", - }, - PURL: "pkg:deb/debian/package-2@2.0.1", - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), - }, - }) - - return catalog -} - -func newDirectoryCatalogWithAuthorField() *pkg.Collection { - catalog := pkg.NewCollection() - - // populate catalog with test data - catalog.Add(pkg.Package{ - Name: "package-1", - Version: "1.0.1", - Type: pkg.PythonPkg, - FoundBy: "the-cataloger-1", - Locations: file.NewLocationSet( - file.NewLocation("/some/path/pkg1"), - ), - Language: pkg.Python, - MetadataType: pkg.PythonPackageMetadataType, - Licenses: pkg.NewLicenseSet( - pkg.NewLicense("MIT"), - ), - Metadata: pkg.PythonPackageMetadata{ - Name: "package-1", - Version: "1.0.1", - Author: "test-author", - Files: []pkg.PythonFileRecord{ - { - Path: "/some/path/pkg1/dependencies/foo", - }, - }, - }, - PURL: "a-purl-2", // intentionally a bad pURL for test fixtures - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), - }, - }) - catalog.Add(pkg.Package{ - Name: "package-2", - Version: "2.0.1", - Type: pkg.DebPkg, - FoundBy: "the-cataloger-2", - Locations: file.NewLocationSet( - file.NewLocation("/some/path/pkg1"), - ), - MetadataType: pkg.DpkgMetadataType, - Metadata: pkg.DpkgMetadata{ - Package: "package-2", - Version: "2.0.1", - }, - PURL: "pkg:deb/debian/package-2@2.0.1", - CPEs: []cpe.CPE{ - cpe.Must("cpe:2.3:*:some:package:2:*:*:*:*:*:*:*"), - }, - }) - - return catalog -} - -//nolint:gosec -func AddSampleFileRelationships(s *sbom.SBOM) { - catalog := s.Artifacts.Packages.Sorted() - s.Artifacts.FileMetadata = map[file.Coordinates]file.Metadata{} - - files := []string{"/f1", "/f2", "/d1/f3", "/d2/f4", "/z1/f5", "/a1/f6"} - rnd := rand.New(rand.NewSource(time.Now().UnixNano())) - rnd.Shuffle(len(files), func(i, j int) { files[i], files[j] = files[j], files[i] }) - - for _, f := range files { - meta := file.Metadata{} - coords := file.Coordinates{RealPath: f} - s.Artifacts.FileMetadata[coords] = meta - - s.Relationships = append(s.Relationships, artifact.Relationship{ - From: catalog[0], - To: coords, - Type: artifact.ContainsRelationship, - }) - } -} - -// remove dynamic values, which should be tested independently -func redact(b []byte, redactors ...redactor) []byte { - redactors = append(redactors, carriageRedactor) - for _, r := range redactors { - b = r(b) - } - return b -} diff --git a/syft/formats/spdxjson/encoder_test.go b/syft/formats/spdxjson/encoder_test.go index f33a87708..b684777aa 100644 --- a/syft/formats/spdxjson/encoder_test.go +++ b/syft/formats/spdxjson/encoder_test.go @@ -2,57 +2,81 @@ package spdxjson import ( "flag" - "regexp" "testing" "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateSpdxJson = flag.Bool("update-spdx-json", false, "update the *.golden files for spdx-json encoders") +var updateSnapshot = flag.Bool("update-spdx-json", false, "update the *.golden files for spdx-json encoders") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestSPDXJSONDirectoryEncoder(t *testing.T) { + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateSpdxJson, - true, - spdxJsonRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(dir), + }, ) } func TestSPDXJSONImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage, testutils.FromSnapshot()), - testImage, - *updateSpdxJson, - true, - spdxJsonRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage, testutils.FromSnapshot()), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(), + }, ) } func TestSPDXRelationshipOrder(t *testing.T) { testImage := "image-simple" + s := testutils.ImageInput(t, testImage, testutils.FromSnapshot()) testutils.AddSampleFileRelationships(&s) + testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - s, - testImage, - *updateSpdxJson, - true, - spdxJsonRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: s, + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(), + }, ) } -func spdxJsonRedactor(s []byte) []byte { - // each SBOM reports the time it was generated, which is not useful during snapshot testing - s = regexp.MustCompile(`"created":\s+"[^"]*"`).ReplaceAll(s, []byte(`"created":""`)) +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...). + WithPatternRedactors( + map[string]string{ + // each SBOM reports the time it was generated, which is not useful during snapshot testing + `"created":\s+"[^"]*"`: `"created":"redacted"`, - // each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing - s = regexp.MustCompile(`"documentNamespace":\s+"[^"]*"`).ReplaceAll(s, []byte(`"documentNamespace":""`)) + // each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing + `"documentNamespace":\s+"[^"]*"`: `"documentNamespace":"redacted"`, - // the license list will be updated periodically, the value here should not be directly tested in snapshot tests - return regexp.MustCompile(`"licenseListVersion":\s+"[^"]*"`).ReplaceAll(s, []byte(`"licenseListVersion":""`)) + // the license list will be updated periodically, the value here should not be directly tested in snapshot tests + `"licenseListVersion":\s+"[^"]*"`: `"licenseListVersion":"redacted"`, + }, + ) } diff --git a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden index e4ba2ccd9..bfcc72b96 100644 --- a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden +++ b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONDirectoryEncoder.golden @@ -2,15 +2,15 @@ "spdxVersion": "SPDX-2.3", "dataLicense": "CC0-1.0", "SPDXID": "SPDXRef-DOCUMENT", - "name": "/some/path", - "documentNamespace": "https://anchore.com/syft/dir/some/path-303fccb4-22d1-4039-9061-553bc875f086", + "name": "some/path", + "documentNamespace":"redacted", "creationInfo": { - "licenseListVersion": "3.20", + "licenseListVersion":"redacted", "creators": [ "Organization: Anchore, Inc", "Tool: syft-v0.42.0-bogus" ], - "created": "2023-06-05T18:49:13Z" + "created":"redacted" }, "packages": [ { diff --git a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden index 3c49c3362..3caf98e85 100644 --- a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden +++ b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXJSONImageEncoder.golden @@ -3,14 +3,14 @@ "dataLicense": "CC0-1.0", "SPDXID": "SPDXRef-DOCUMENT", "name": "user-image-input", - "documentNamespace": "https://anchore.com/syft/image/user-image-input-5b9aac79-334c-4d6a-b2e6-95a819c1d45a", + "documentNamespace":"redacted", "creationInfo": { - "licenseListVersion": "3.20", + "licenseListVersion":"redacted", "creators": [ "Organization: Anchore, Inc", "Tool: syft-v0.42.0-bogus" ], - "created": "2023-06-05T18:49:14Z" + "created":"redacted" }, "packages": [ { diff --git a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden index 0dba256d9..acdb202ee 100644 --- a/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden +++ b/syft/formats/spdxjson/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden @@ -3,14 +3,14 @@ "dataLicense": "CC0-1.0", "SPDXID": "SPDXRef-DOCUMENT", "name": "user-image-input", - "documentNamespace": "https://anchore.com/syft/image/user-image-input-2a1392ab-7eb5-4f2a-86f6-777aef3232e1", + "documentNamespace":"redacted", "creationInfo": { - "licenseListVersion": "3.20", + "licenseListVersion":"redacted", "creators": [ "Organization: Anchore, Inc", "Tool: syft-v0.42.0-bogus" ], - "created": "2023-06-05T18:49:14Z" + "created":"redacted" }, "packages": [ { diff --git a/syft/formats/spdxtagvalue/encoder_test.go b/syft/formats/spdxtagvalue/encoder_test.go index 5d95f6397..f414d88fa 100644 --- a/syft/formats/spdxtagvalue/encoder_test.go +++ b/syft/formats/spdxtagvalue/encoder_test.go @@ -2,7 +2,6 @@ package spdxtagvalue import ( "flag" - "regexp" "testing" "github.com/anchore/syft/syft/formats/internal/testutils" @@ -11,28 +10,38 @@ import ( "github.com/anchore/syft/syft/source" ) -var updateSpdxTagValue = flag.Bool("update-spdx-tv", false, "update the *.golden files for spdx-tv encoders") +var updateSnapshot = flag.Bool("update-spdx-tv", false, "update the *.golden files for spdx-tv encoders") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestSPDXTagValueDirectoryEncoder(t *testing.T) { - + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateSpdxTagValue, - false, - spdxTagValueRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(dir), + }, ) } func TestSPDXTagValueImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage, testutils.FromSnapshot()), - testImage, - *updateSpdxTagValue, - false, - spdxTagValueRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage, testutils.FromSnapshot()), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(), + }, ) } @@ -45,28 +54,33 @@ func TestSPDXJSONSPDXIDs(t *testing.T) { p.SetID() pkgs = append(pkgs, p) } - testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - sbom.SBOM{ - Artifacts: sbom.Artifacts{ - Packages: pkg.NewCollection(pkgs...), - }, - Relationships: nil, - Source: source.Metadata{ - Scheme: source.DirectoryScheme, - Path: "foobar/baz", // in this case, foobar is used as the spdx docment name - }, - Descriptor: sbom.Descriptor{ - Name: "syft", - Version: "v0.42.0-bogus", - Configuration: map[string]string{ - "config-key": "config-value", - }, + + s := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkgs...), + }, + Relationships: nil, + Source: source.Description{ + Metadata: source.DirectorySourceMetadata{Path: "foobar/baz"}, // in this case, foobar is used as the spdx docment name + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "v0.42.0-bogus", + Configuration: map[string]string{ + "config-key": "config-value", }, }, - *updateSpdxTagValue, - false, - spdxTagValueRedactor, + } + + testutils.AssertEncoderAgainstGoldenSnapshot(t, + testutils.EncoderSnapshotTestConfig{ + Subject: s, + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(), + }, ) } @@ -74,23 +88,36 @@ func TestSPDXRelationshipOrder(t *testing.T) { testImage := "image-simple" s := testutils.ImageInput(t, testImage, testutils.FromSnapshot()) testutils.AddSampleFileRelationships(&s) + testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - s, - testImage, - *updateSpdxTagValue, - false, - spdxTagValueRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: s, + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(), + }, ) } -func spdxTagValueRedactor(s []byte) []byte { - // each SBOM reports the time it was generated, which is not useful during snapshot testing - s = regexp.MustCompile(`Created: .*`).ReplaceAll(s, []byte("redacted")) +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...). + WithPatternRedactors( + map[string]string{ + // each SBOM reports the time it was generated, which is not useful during snapshot testing + `Created: .*`: "Created: redacted", - // each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing - s = regexp.MustCompile(`DocumentNamespace: https://anchore.com/syft/.*`).ReplaceAll(s, []byte("redacted")) + // each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing + `DocumentNamespace: https://anchore.com/syft/.*`: "DocumentNamespace: redacted", - // the license list will be updated periodically, the value here should not be directly tested in snapshot tests - return regexp.MustCompile(`LicenseListVersion: .*`).ReplaceAll(s, []byte("redacted")) + // the license list will be updated periodically, the value here should not be directly tested in snapshot tests + `LicenseListVersion: .*`: "LicenseListVersion: redacted", + }, + ) } diff --git a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden index ca1775a5d..7f6320eba 100644 --- a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden +++ b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXJSONSPDXIDs.golden @@ -2,11 +2,11 @@ SPDXVersion: SPDX-2.3 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT DocumentName: foobar/baz -DocumentNamespace: https://anchore.com/syft/dir/foobar/baz-1813dede-1ac5-4c44-a640-4c56e213d575 -LicenseListVersion: 3.20 +DocumentNamespace: redacted +LicenseListVersion: redacted Creator: Organization: Anchore, Inc Creator: Tool: syft-v0.42.0-bogus -Created: 2023-05-09T17:11:49Z +Created: redacted ##### Package: @at-sign diff --git a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden index 339b17c2c..26e6e9a67 100644 --- a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden +++ b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXRelationshipOrder.golden @@ -2,11 +2,11 @@ SPDXVersion: SPDX-2.3 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT DocumentName: user-image-input -DocumentNamespace: https://anchore.com/syft/image/user-image-input-96ea886a-3297-4847-b211-6da405ff1f8f -LicenseListVersion: 3.20 +DocumentNamespace: redacted +LicenseListVersion: redacted Creator: Organization: Anchore, Inc Creator: Tool: syft-v0.42.0-bogus -Created: 2023-05-09T17:11:49Z +Created: redacted ##### Unpackaged files diff --git a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden index 818d62e7d..52b63c984 100644 --- a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden +++ b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueDirectoryEncoder.golden @@ -1,12 +1,12 @@ SPDXVersion: SPDX-2.3 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT -DocumentName: /some/path -DocumentNamespace: https://anchore.com/syft/dir/some/path-f7bdb1ee-7fef-48e7-a386-6ee3836d4a28 -LicenseListVersion: 3.20 +DocumentName: some/path +DocumentNamespace: redacted +LicenseListVersion: redacted Creator: Organization: Anchore, Inc Creator: Tool: syft-v0.42.0-bogus -Created: 2023-05-09T17:11:49Z +Created: redacted ##### Package: package-2 diff --git a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden index 867e8e039..65b111107 100644 --- a/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden +++ b/syft/formats/spdxtagvalue/test-fixtures/snapshot/TestSPDXTagValueImageEncoder.golden @@ -2,11 +2,11 @@ SPDXVersion: SPDX-2.3 DataLicense: CC0-1.0 SPDXID: SPDXRef-DOCUMENT DocumentName: user-image-input -DocumentNamespace: https://anchore.com/syft/image/user-image-input-44d44a85-2207-4b51-bd73-d0c7b080f6d3 -LicenseListVersion: 3.20 +DocumentNamespace: redacted +LicenseListVersion: redacted Creator: Organization: Anchore, Inc Creator: Tool: syft-v0.42.0-bogus -Created: 2023-05-09T17:11:49Z +Created: redacted ##### Package: package-2 diff --git a/syft/formats/syftjson/encoder_test.go b/syft/formats/syftjson/encoder_test.go index 231333bb8..8ad7ababd 100644 --- a/syft/formats/syftjson/encoder_test.go +++ b/syft/formats/syftjson/encoder_test.go @@ -2,7 +2,6 @@ package syftjson import ( "flag" - "regexp" "testing" stereoFile "github.com/anchore/stereoscope/pkg/file" @@ -16,36 +15,41 @@ import ( "github.com/anchore/syft/syft/source" ) -var updateJson = flag.Bool("update-json", false, "update the *.golden files for json encoders") +var updateSnapshot = flag.Bool("update-json", false, "update the *.golden files for json encoders") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestDirectoryEncoder(t *testing.T) { + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateJson, - true, - schemaVersionRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(dir), + }, ) } func TestImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage, testutils.FromSnapshot()), - testImage, - *updateJson, - true, - schemaVersionRedactor, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage, testutils.FromSnapshot()), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(), + }, ) } -func schemaVersionRedactor(s []byte) []byte { - pattern := regexp.MustCompile(`,?\s*"schema":\s*\{[^}]*}`) - out := pattern.ReplaceAll(s, []byte("")) - return out -} - func TestEncodeFullJSONDocument(t *testing.T) { catalog := pkg.NewCollection() @@ -176,10 +180,9 @@ func TestEncodeFullJSONDocument(t *testing.T) { }, }, }, - Source: source.Metadata{ - ID: "c2b46b4eb06296933b7cf0722683964e9ecbd93265b9ef6ae9642e3952afbba0", - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + Source: source.Description{ + ID: "c2b46b4eb06296933b7cf0722683964e9ecbd93265b9ef6ae9642e3952afbba0", + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "user-image-input", ID: "sha256:c2b46b4eb06296933b7cf0722683964e9ecbd93265b9ef6ae9642e3952afbba0", ManifestDigest: "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368", @@ -188,7 +191,7 @@ func TestEncodeFullJSONDocument(t *testing.T) { "stereoscope-fixture-image-simple:85066c51088bdd274f7a89e99e00490f666c49e72ffc955707cd6e18f0e22c5b", }, Size: 38, - Layers: []source.LayerMetadata{ + Layers: []source.StereoscopeLayerMetadata{ { MediaType: "application/vnd.docker.image.rootfs.diff.tar.gzip", Digest: "sha256:3de16c5b8659a2e8d888b8ded8427be7a5686a3c8c4e4dd30de20f362827285b", @@ -217,10 +220,24 @@ func TestEncodeFullJSONDocument(t *testing.T) { } testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - s, - *updateJson, - true, - schemaVersionRedactor, + testutils.EncoderSnapshotTestConfig{ + Subject: s, + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: true, + Redactor: redactor(), + }, ) } + +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...). + WithPatternRedactors( + map[string]string{ + // remove schema version (don't even show the key or value) + `,?\s*"schema":\s*\{[^}]*}`: "", + }, + ) +} diff --git a/syft/formats/syftjson/model/package.go b/syft/formats/syftjson/model/package.go index fccf04c0b..d4a819f2a 100644 --- a/syft/formats/syftjson/model/package.go +++ b/syft/formats/syftjson/model/package.go @@ -102,7 +102,7 @@ func (p *Package) UnmarshalJSON(b []byte) error { return err } - err := unpackMetadata(p, unpacker) + err := unpackPkgMetadata(p, unpacker) if errors.Is(err, errUnknownMetadataType) { log.Warnf("unknown package metadata type=%q for packageID=%q", p.MetadataType, p.ID) return nil @@ -111,7 +111,7 @@ func (p *Package) UnmarshalJSON(b []byte) error { return err } -func unpackMetadata(p *Package, unpacker packageMetadataUnpacker) error { +func unpackPkgMetadata(p *Package, unpacker packageMetadataUnpacker) error { p.MetadataType = pkg.CleanMetadataType(unpacker.MetadataType) typ, ok := pkg.MetadataTypeByName[p.MetadataType] diff --git a/syft/formats/syftjson/model/package_test.go b/syft/formats/syftjson/model/package_test.go index 8027764d8..aec5d1ec8 100644 --- a/syft/formats/syftjson/model/package_test.go +++ b/syft/formats/syftjson/model/package_test.go @@ -362,7 +362,7 @@ func Test_unpackMetadata(t *testing.T) { var unpacker packageMetadataUnpacker require.NoError(t, json.Unmarshal(test.packageData, &unpacker)) - err := unpackMetadata(p, unpacker) + err := unpackPkgMetadata(p, unpacker) assert.Equal(t, test.metadataType, p.MetadataType) test.wantErr(t, err) diff --git a/syft/formats/syftjson/model/source.go b/syft/formats/syftjson/model/source.go index d546cf11c..b0e843ef2 100644 --- a/syft/formats/syftjson/model/source.go +++ b/syft/formats/syftjson/model/source.go @@ -3,53 +3,114 @@ package model import ( "encoding/json" "fmt" + "reflect" "strconv" + "strings" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/source" ) // Source object represents the thing that was cataloged type Source struct { - ID string `json:"id"` - Type string `json:"type"` - Target interface{} `json:"target"` + ID string `json:"id"` + Name string `json:"name"` + Version string `json:"version"` + Type string `json:"type"` + Metadata interface{} `json:"metadata"` } // sourceUnpacker is used to unmarshal Source objects type sourceUnpacker struct { - ID string `json:"id,omitempty"` - Type string `json:"type"` - Target json.RawMessage `json:"target"` + ID string `json:"id,omitempty"` + Name string `json:"name"` + Version string `json:"version"` + Type string `json:"type"` + Metadata json.RawMessage `json:"metadata"` + Target json.RawMessage `json:"target"` // pre-v9 schema support } // UnmarshalJSON populates a source object from JSON bytes. func (s *Source) UnmarshalJSON(b []byte) error { var unpacker sourceUnpacker - if err := json.Unmarshal(b, &unpacker); err != nil { + err := json.Unmarshal(b, &unpacker) + if err != nil { return err } + s.Name = unpacker.Name + s.Version = unpacker.Version s.Type = unpacker.Type s.ID = unpacker.ID - switch s.Type { - case "directory", "file": - if target, err := strconv.Unquote(string(unpacker.Target)); err == nil { - s.Target = target - } else { - s.Target = string(unpacker.Target[:]) + if len(unpacker.Target) > 0 { + s.Type = cleanPreSchemaV9MetadataType(s.Type) + s.Metadata, err = extractPreSchemaV9Metadata(s.Type, unpacker.Target) + if err != nil { + return fmt.Errorf("unable to extract pre-schema-v9 source metadata: %w", err) } + return nil + } - case "image": - var payload source.ImageMetadata - if err := json.Unmarshal(unpacker.Target, &payload); err != nil { + return unpackSrcMetadata(s, unpacker) +} + +func unpackSrcMetadata(s *Source, unpacker sourceUnpacker) error { + rt := sourcemetadata.ReflectTypeFromJSONName(s.Type) + if rt == nil { + return fmt.Errorf("unable to find source metadata type=%q", s.Type) + } + + val := reflect.New(rt).Interface() + if len(unpacker.Metadata) > 0 { + if err := json.Unmarshal(unpacker.Metadata, val); err != nil { return err } - s.Target = payload - - default: - return fmt.Errorf("unsupported package metadata type: %+v", s.Type) } + s.Metadata = reflect.ValueOf(val).Elem().Interface() + return nil } + +func cleanPreSchemaV9MetadataType(t string) string { + t = strings.ToLower(t) + if t == "dir" { + return "directory" + } + return t +} + +func extractPreSchemaV9Metadata(t string, target []byte) (interface{}, error) { + switch t { + case "directory", "dir": + cleanTarget, err := strconv.Unquote(string(target)) + if err != nil { + cleanTarget = string(target) + } + + return source.DirectorySourceMetadata{ + Path: cleanTarget, + }, nil + + case "file": + cleanTarget, err := strconv.Unquote(string(target)) + if err != nil { + cleanTarget = string(target) + } + + return source.FileSourceMetadata{ + Path: cleanTarget, + }, nil + + case "image": + var payload source.StereoscopeImageSourceMetadata + if err := json.Unmarshal(target, &payload); err != nil { + return nil, err + } + return payload, nil + + default: + return nil, fmt.Errorf("unsupported package metadata type: %+v", t) + } +} diff --git a/syft/formats/syftjson/model/source_test.go b/syft/formats/syftjson/model/source_test.go index ee7969d74..e9118f090 100644 --- a/syft/formats/syftjson/model/source_test.go +++ b/syft/formats/syftjson/model/source_test.go @@ -6,17 +6,194 @@ import ( "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/source" ) func TestSource_UnmarshalJSON(t *testing.T) { + tracker := sourcemetadata.NewCompletionTester(t) + + cases := []struct { + name string + input []byte + expected *Source + wantErr require.ErrorAssertionFunc + }{ + { + name: "directory", + input: []byte(`{ + "id": "foobar", + "type": "directory", + "metadata": {"path": "/var/lib/foo", "base":"/nope"} + }`), + expected: &Source{ + ID: "foobar", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "/var/lib/foo", + //Base: "/nope", // note: should be ignored entirely + }, + }, + }, + { + name: "image", + input: []byte(`{ + "id": "foobar", + "type": "image", + "metadata": { + "userInput": "alpine:3.10", + "imageID": "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a", + "manifestDigest": "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c", + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "tags": [], + "imageSize": 5576169, + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "digest": "sha256:9fb3aa2f8b8023a4bebbf92aa567caf88e38e969ada9f0ac12643b2847391635", + "size": 5576169 + } + ], + "manifest": "ewogICAic2NoZW1hVmVyc2lvbiI6IDIsCiAgICJtZWRpYVR5cGUiOiAiYXBwbGljYXRpb24vdm5kLmRvY2tlci5kaXN0cmlidXRpb24ubWFuaWZlc3QudjIranNvbiIsCiAgICJjb25maWciOiB7CiAgICAgICJtZWRpYVR5cGUiOiAiYXBwbGljYXRpb24vdm5kLmRvY2tlci5jb250YWluZXIuaW1hZ2UudjEranNvbiIsCiAgICAgICJzaXplIjogMTQ3MiwKICAgICAgImRpZ2VzdCI6ICJzaGEyNTY6ZTdiMzAwYWVlOWY5YmYzNDMzZDMyYmM5MzA1YmZkZDIyMTgzYmViNTlkOTMzYjQ4ZDc3YWI1NmJhNTNhMTk3YSIKICAgfSwKICAgImxheWVycyI6IFsKICAgICAgewogICAgICAgICAibWVkaWFUeXBlIjogImFwcGxpY2F0aW9uL3ZuZC5kb2NrZXIuaW1hZ2Uucm9vdGZzLmRpZmYudGFyLmd6aXAiLAogICAgICAgICAic2l6ZSI6IDI3OTgzMzgsCiAgICAgICAgICJkaWdlc3QiOiAic2hhMjU2OjM5NmMzMTgzNzExNmFjMjkwNDU4YWZjYjkyOGY2OGI2Y2MxYzdiZGQ2OTYzZmM3MmY1MmYzNjVhMmE4OWMxYjUiCiAgICAgIH0KICAgXQp9", + "config": "eyJhcmNoaXRlY3R1cmUiOiJhbWQ2NCIsImNvbmZpZyI6eyJIb3N0bmFtZSI6IiIsIkRvbWFpbm5hbWUiOiIiLCJVc2VyIjoiIiwiQXR0YWNoU3RkaW4iOmZhbHNlLCJBdHRhY2hTdGRvdXQiOmZhbHNlLCJBdHRhY2hTdGRlcnIiOmZhbHNlLCJUdHkiOmZhbHNlLCJPcGVuU3RkaW4iOmZhbHNlLCJTdGRpbk9uY2UiOmZhbHNlLCJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiQ21kIjpbIi9iaW4vc2giXSwiSW1hZ2UiOiJzaGEyNTY6ZWIyMDgwYzQ1NWU5NGMyMmFlMzViM2FlZjllMDc4YzQ5MmEwMDc5NTQxMmUwMjZlNGQ2YjQxZWY2NGJjN2RkOCIsIlZvbHVtZXMiOm51bGwsIldvcmtpbmdEaXIiOiIiLCJFbnRyeXBvaW50IjpudWxsLCJPbkJ1aWxkIjpudWxsLCJMYWJlbHMiOm51bGx9LCJjb250YWluZXIiOiJmZGI3ZTgwZTMzMzllOGQwNTk5MjgyZTYwNmM5MDdhYTU4ODFlZTRjNjY4YTY4MTM2MTE5ZTZkZmFjNmNlM2E0IiwiY29udGFpbmVyX2NvbmZpZyI6eyJIb3N0bmFtZSI6ImZkYjdlODBlMzMzOSIsIkRvbWFpbm5hbWUiOiIiLCJVc2VyIjoiIiwiQXR0YWNoU3RkaW4iOmZhbHNlLCJBdHRhY2hTdGRvdXQiOmZhbHNlLCJBdHRhY2hTdGRlcnIiOmZhbHNlLCJUdHkiOmZhbHNlLCJPcGVuU3RkaW4iOmZhbHNlLCJTdGRpbk9uY2UiOmZhbHNlLCJFbnYiOlsiUEFUSD0vdXNyL2xvY2FsL3NiaW46L3Vzci9sb2NhbC9iaW46L3Vzci9zYmluOi91c3IvYmluOi9zYmluOi9iaW4iXSwiQ21kIjpbIi9iaW4vc2giLCItYyIsIiMobm9wKSAiLCJDTUQgW1wiL2Jpbi9zaFwiXSJdLCJJbWFnZSI6InNoYTI1NjplYjIwODBjNDU1ZTk0YzIyYWUzNWIzYWVmOWUwNzhjNDkyYTAwNzk1NDEyZTAyNmU0ZDZiNDFlZjY0YmM3ZGQ4IiwiVm9sdW1lcyI6bnVsbCwiV29ya2luZ0RpciI6IiIsIkVudHJ5cG9pbnQiOm51bGwsIk9uQnVpbGQiOm51bGwsIkxhYmVscyI6e319LCJjcmVhdGVkIjoiMjAyMS0wNC0xNFQxOToyMDowNS4zMzgzOTc3NjFaIiwiZG9ja2VyX3ZlcnNpb24iOiIxOS4wMy4xMiIsImhpc3RvcnkiOlt7ImNyZWF0ZWQiOiIyMDIxLTA0LTE0VDE5OjIwOjA0Ljk4NzIxOTEyNFoiLCJjcmVhdGVkX2J5IjoiL2Jpbi9zaCAtYyAjKG5vcCkgQUREIGZpbGU6YzUzNzdlYWE5MjZiZjQxMmRkOGQ0YTA4YjBhMWYyMzk5Y2ZkNzA4NzQzNTMzYjBhYTAzYjUzZDE0Y2I0YmI0ZSBpbiAvICJ9LHsiY3JlYXRlZCI6IjIwMjEtMDQtMTRUMTk6MjA6MDUuMzM4Mzk3NzYxWiIsImNyZWF0ZWRfYnkiOiIvYmluL3NoIC1jICMobm9wKSAgQ01EIFtcIi9iaW4vc2hcIl0iLCJlbXB0eV9sYXllciI6dHJ1ZX1dLCJvcyI6ImxpbnV4Iiwicm9vdGZzIjp7InR5cGUiOiJsYXllcnMiLCJkaWZmX2lkcyI6WyJzaGEyNTY6OWZiM2FhMmY4YjgwMjNhNGJlYmJmOTJhYTU2N2NhZjg4ZTM4ZTk2OWFkYTlmMGFjMTI2NDNiMjg0NzM5MTYzNSJdfX0=", + "repoDigests": [ + "index.docker.io/library/alpine@sha256:451eee8bedcb2f029756dc3e9d73bab0e7943c1ac55cff3a4861c52a0fdd3e98" + ] + } + }`), + expected: &Source{ + ID: "foobar", + Type: "image", + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: "alpine:3.10", + ID: "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a", + ManifestDigest: "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c", + MediaType: "application/vnd.docker.distribution.manifest.v2+json", + Tags: []string{}, + Size: 5576169, + Layers: []source.StereoscopeLayerMetadata{ + { + MediaType: "application/vnd.docker.image.rootfs.diff.tar.gzip", + Digest: "sha256:9fb3aa2f8b8023a4bebbf92aa567caf88e38e969ada9f0ac12643b2847391635", + Size: 5576169, + }, + }, + RawManifest: []byte(`{ + "schemaVersion": 2, + "mediaType": "application/vnd.docker.distribution.manifest.v2+json", + "config": { + "mediaType": "application/vnd.docker.container.image.v1+json", + "size": 1472, + "digest": "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a" + }, + "layers": [ + { + "mediaType": "application/vnd.docker.image.rootfs.diff.tar.gzip", + "size": 2798338, + "digest": "sha256:396c31837116ac290458afcb928f68b6cc1c7bdd6963fc72f52f365a2a89c1b5" + } + ] +}`), + RawConfig: []byte(`{"architecture":"amd64","config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/bin/sh"],"Image":"sha256:eb2080c455e94c22ae35b3aef9e078c492a00795412e026e4d6b41ef64bc7dd8","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null},"container":"fdb7e80e3339e8d0599282e606c907aa5881ee4c668a68136119e6dfac6ce3a4","container_config":{"Hostname":"fdb7e80e3339","Domainname":"","User":"","AttachStdin":false,"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false,"StdinOnce":false,"Env":["PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"],"Cmd":["/bin/sh","-c","#(nop) ","CMD [\"/bin/sh\"]"],"Image":"sha256:eb2080c455e94c22ae35b3aef9e078c492a00795412e026e4d6b41ef64bc7dd8","Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":{}},"created":"2021-04-14T19:20:05.338397761Z","docker_version":"19.03.12","history":[{"created":"2021-04-14T19:20:04.987219124Z","created_by":"/bin/sh -c #(nop) ADD file:c5377eaa926bf412dd8d4a08b0a1f2399cfd708743533b0aa03b53d14cb4bb4e in / "},{"created":"2021-04-14T19:20:05.338397761Z","created_by":"/bin/sh -c #(nop) CMD [\"/bin/sh\"]","empty_layer":true}],"os":"linux","rootfs":{"type":"layers","diff_ids":["sha256:9fb3aa2f8b8023a4bebbf92aa567caf88e38e969ada9f0ac12643b2847391635"]}}`), + RepoDigests: []string{ + "index.docker." + + "io/library/alpine@sha256:451eee8bedcb2f029756dc3e9d73bab0e7943c1ac55cff3a4861c52a0fdd3e98", + }, + }, + }, + }, + { + name: "file", + input: []byte(`{ + "id": "foobar", + "type": "file", + "metadata": { + "path": "/var/lib/foo/go.mod", + "mimeType": "text/plain", + "digests": [ + { + "algorithm": "sha256", + "value": "e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a" + } + ] + } + }`), + expected: &Source{ + ID: "foobar", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "/var/lib/foo/go.mod", + Digests: []file.Digest{ + { + Algorithm: "sha256", + Value: "e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a", + }, + }, + MIMEType: "text/plain", + }, + }, + }, + { + name: "unknown source type", + input: []byte(`{ + "id": "foobar", + "type": "unknown-thing", + "target":"/var/lib/foo" + }`), + expected: &Source{ + ID: "foobar", + Type: "unknown-thing", + }, + wantErr: require.Error, + }, + } + + for _, tt := range cases { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + + var src Source + err := json.Unmarshal(tt.input, &src) + + tt.wantErr(t, err) + + if diff := cmp.Diff(tt.expected, &src); diff != "" { + t.Errorf("unexpected result from Source unmarshaling (-want +got)\n%s", diff) + } + + tracker.Tested(t, tt.expected.Metadata) + }) + } +} + +func TestSource_UnmarshalJSON_PreSchemaV9(t *testing.T) { cases := []struct { name string input []byte expectedSource *Source errAssertion assert.ErrorAssertionFunc }{ + { + name: "abbreviated directory", + input: []byte(`{ + "id": "foobar", + "type": "dir", + "target":"/var/lib/foo" + }`), + expectedSource: &Source{ + ID: "foobar", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "/var/lib/foo", + }, + }, + errAssertion: assert.NoError, + }, { name: "directory", input: []byte(`{ @@ -25,9 +202,11 @@ func TestSource_UnmarshalJSON(t *testing.T) { "target":"/var/lib/foo" }`), expectedSource: &Source{ - ID: "foobar", - Type: "directory", - Target: "/var/lib/foo", + ID: "foobar", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "/var/lib/foo", + }, }, errAssertion: assert.NoError, }, @@ -60,14 +239,14 @@ func TestSource_UnmarshalJSON(t *testing.T) { expectedSource: &Source{ ID: "foobar", Type: "image", - Target: source.ImageMetadata{ + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "alpine:3.10", ID: "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a", ManifestDigest: "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c", MediaType: "application/vnd.docker.distribution.manifest.v2+json", Tags: []string{}, Size: 5576169, - Layers: []source.LayerMetadata{ + Layers: []source.StereoscopeLayerMetadata{ { MediaType: "application/vnd.docker.image.rootfs.diff.tar.gzip", Digest: "sha256:9fb3aa2f8b8023a4bebbf92aa567caf88e38e969ada9f0ac12643b2847391635", @@ -107,9 +286,11 @@ func TestSource_UnmarshalJSON(t *testing.T) { "target":"/var/lib/foo/go.mod" }`), expectedSource: &Source{ - ID: "foobar", - Type: "file", - Target: "/var/lib/foo/go.mod", + ID: "foobar", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "/var/lib/foo/go.mod", + }, }, errAssertion: assert.NoError, }, @@ -130,12 +311,12 @@ func TestSource_UnmarshalJSON(t *testing.T) { for _, testCase := range cases { t.Run(testCase.name, func(t *testing.T) { - source := new(Source) + src := new(Source) - err := json.Unmarshal(testCase.input, source) + err := json.Unmarshal(testCase.input, src) testCase.errAssertion(t, err) - if diff := cmp.Diff(testCase.expectedSource, source); diff != "" { + if diff := cmp.Diff(testCase.expectedSource, src); diff != "" { t.Errorf("unexpected result from Source unmarshaling (-want +got)\n%s", diff) } }) diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden index a162e9835..61bb2efe4 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestDirectoryEncoder.golden @@ -72,9 +72,13 @@ ], "artifactRelationships": [], "source": { - "id": "eda6cf0b63f1a1d2eaf7792a2a98c832c21a18e6992bcebffe6381781cc85cbc", + "id": "d1563248892cd59af469f406eee907c76fa4f9041f5410d45b93aef903bc4216", + "name": "some/path", + "version": "", "type": "directory", - "target": "/some/path" + "metadata": { + "path": "redacted/some/path" + } }, "distro": { "prettyName": "debian", @@ -92,9 +96,5 @@ "configuration": { "config-key": "config-value" } - }, - "schema": { - "version": "8.0.0", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-8.0.0.json" } } diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden index daca7eb5e..f83789be5 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestEncodeFullJSONDocument.golden @@ -149,8 +149,10 @@ ], "source": { "id": "c2b46b4eb06296933b7cf0722683964e9ecbd93265b9ef6ae9642e3952afbba0", + "name": "", + "version": "", "type": "image", - "target": { + "metadata": { "userInput": "user-image-input", "imageID": "sha256:c2b46b4eb06296933b7cf0722683964e9ecbd93265b9ef6ae9642e3952afbba0", "manifestDigest": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368", @@ -192,9 +194,5 @@ "configuration": { "config-key": "config-value" } - }, - "schema": { - "version": "8.0.0", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-8.0.0.json" } } diff --git a/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden b/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden index c0a2f758d..518a90ab5 100644 --- a/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden +++ b/syft/formats/syftjson/test-fixtures/snapshot/TestImageEncoder.golden @@ -70,8 +70,10 @@ "artifactRelationships": [], "source": { "id": "c8ac88bbaf3d1c036f6a1d601c3d52bafbf05571c97d68322e7cb3a7ecaa304f", + "name": "user-image-input", + "version": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368", "type": "image", - "target": { + "metadata": { "userInput": "user-image-input", "imageID": "sha256:a3c61dc134d2f31b415c50324e75842d7f91622f39a89468e51938330b3fd3af", "manifestDigest": "sha256:2731251dc34951c0e50fcc643b4c5f74922dad1a5d98f302b504cf46cd5d9368", @@ -115,9 +117,5 @@ "configuration": { "config-key": "config-value" } - }, - "schema": { - "version": "8.0.0", - "url": "https://raw.githubusercontent.com/anchore/syft/main/schema/json/schema-8.0.0.json" } } diff --git a/syft/formats/syftjson/to_format_model.go b/syft/formats/syftjson/to_format_model.go index 7b3688ced..2cafddf14 100644 --- a/syft/formats/syftjson/to_format_model.go +++ b/syft/formats/syftjson/to_format_model.go @@ -12,6 +12,7 @@ import ( "github.com/anchore/syft/syft/cpe" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/formats/syftjson/model" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/linux" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" @@ -20,17 +21,12 @@ import ( // ToFormatModel transforms the sbom import a format-specific model. func ToFormatModel(s sbom.SBOM) model.Document { - src, err := toSourceModel(s.Source) - if err != nil { - log.Warnf("unable to create syft-json source object: %+v", err) - } - return model.Document{ Artifacts: toPackageModels(s.Artifacts.Packages), ArtifactRelationships: toRelationshipModel(s.Relationships), Files: toFile(s), Secrets: toSecrets(s.Artifacts.Secrets), - Source: src, + Source: toSourceModel(s.Source), Distro: toLinuxReleaser(s.Artifacts.LinuxDistribution), Descriptor: toDescriptor(s.Descriptor), Schema: model.Schema{ @@ -267,10 +263,16 @@ func toRelationshipModel(relationships []artifact.Relationship) []model.Relation } // toSourceModel creates a new source object to be represented into JSON. -func toSourceModel(src source.Metadata) (model.Source, error) { - switch src.Scheme { - case source.ImageScheme: - metadata := src.ImageMetadata +func toSourceModel(src source.Description) model.Source { + m := model.Source{ + ID: src.ID, + Name: src.Name, + Version: src.Version, + Type: sourcemetadata.JSONName(src.Metadata), + Metadata: src.Metadata, + } + + if metadata, ok := src.Metadata.(source.StereoscopeImageSourceMetadata); ok { // ensure that empty collections are not shown as null if metadata.RepoDigests == nil { metadata.RepoDigests = []string{} @@ -278,24 +280,8 @@ func toSourceModel(src source.Metadata) (model.Source, error) { if metadata.Tags == nil { metadata.Tags = []string{} } - return model.Source{ - ID: src.ID, - Type: "image", - Target: metadata, - }, nil - case source.DirectoryScheme: - return model.Source{ - ID: src.ID, - Type: "directory", - Target: src.Path, - }, nil - case source.FileScheme: - return model.Source{ - ID: src.ID, - Type: "file", - Target: src.Path, - }, nil - default: - return model.Source{}, fmt.Errorf("unsupported source: %q", src.Scheme) + m.Metadata = metadata } + + return m } diff --git a/syft/formats/syftjson/to_format_model_test.go b/syft/formats/syftjson/to_format_model_test.go index 98f03c7b0..8dd3475d8 100644 --- a/syft/formats/syftjson/to_format_model_test.go +++ b/syft/formats/syftjson/to_format_model_test.go @@ -1,62 +1,174 @@ package syftjson import ( + "encoding/json" "testing" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/formats/syftjson/model" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/source" ) -func Test_toSourceModel(t *testing.T) { - allSchemes := strset.New() - for _, s := range source.AllSchemes { - allSchemes.Add(string(s)) +func Test_toSourceModel_IgnoreBase(t *testing.T) { + tests := []struct { + name string + src source.Description + }{ + { + name: "directory", + src: source.Description{ + ID: "test-id", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + }, } - testedSchemes := strset.New() + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // assert the model transformation is correct + actual := toSourceModel(test.src) + + by, err := json.Marshal(actual) + require.NoError(t, err) + assert.NotContains(t, string(by), "some/base") + }) + } +} + +func Test_toSourceModel(t *testing.T) { + tracker := sourcemetadata.NewCompletionTester(t) tests := []struct { name string - src source.Metadata + src source.Description expected model.Source }{ { name: "directory", - src: source.Metadata{ - ID: "test-id", - Scheme: source.DirectoryScheme, - Path: "some/path", + src: source.Description{ + ID: "test-id", + Name: "some-name", + Version: "some-version", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, }, expected: model.Source{ - ID: "test-id", - Type: "directory", - Target: "some/path", + ID: "test-id", + Name: "some-name", + Version: "some-version", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, }, }, { name: "file", - src: source.Metadata{ - ID: "test-id", - Scheme: source.FileScheme, - Path: "some/path", + src: source.Description{ + ID: "test-id", + Name: "some-name", + Version: "some-version", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, }, expected: model.Source{ - ID: "test-id", - Type: "file", - Target: "some/path", + ID: "test-id", + Name: "some-name", + Version: "some-version", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, }, }, { name: "image", - src: source.Metadata{ - ID: "test-id", - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + src: source.Description{ + ID: "test-id", + Name: "some-name", + Version: "some-version", + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: "user-input", + ID: "id...", + ManifestDigest: "digest...", + MediaType: "type...", + }, + }, + expected: model.Source{ + ID: "test-id", + Name: "some-name", + Version: "some-version", + Type: "image", + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: "user-input", + ID: "id...", + ManifestDigest: "digest...", + MediaType: "type...", + RepoDigests: []string{}, + Tags: []string{}, + }, + }, + }, + // below are regression tests for when the name/version are not provided + // historically we've hoisted up the name/version from the metadata, now it is a simple pass-through + { + name: "directory - no name/version", + src: source.Description{ + ID: "test-id", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + expected: model.Source{ + ID: "test-id", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + }, + { + name: "file - no name/version", + src: source.Description{ + ID: "test-id", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, + }, + expected: model.Source{ + ID: "test-id", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, + }, + }, + { + name: "image - no name/version", + src: source.Description{ + ID: "test-id", + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "user-input", ID: "id...", ManifestDigest: "digest...", @@ -66,7 +178,7 @@ func Test_toSourceModel(t *testing.T) { expected: model.Source{ ID: "test-id", Type: "image", - Target: source.ImageMetadata{ + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "user-input", ID: "id...", ManifestDigest: "digest...", @@ -79,18 +191,14 @@ func Test_toSourceModel(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - // track each scheme tested (passed or not) - testedSchemes.Add(string(test.src.Scheme)) - // assert the model transformation is correct - actual, err := toSourceModel(test.src) - require.NoError(t, err) + actual := toSourceModel(test.src) assert.Equal(t, test.expected, actual) + + // track each scheme tested (passed or not) + tracker.Tested(t, test.expected.Metadata) }) } - - // assert all possible schemes were under test - assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test") } func Test_toFileType(t *testing.T) { diff --git a/syft/formats/syftjson/to_syft_model.go b/syft/formats/syftjson/to_syft_model.go index aeb0c24f1..419cf3ed4 100644 --- a/syft/formats/syftjson/to_syft_model.go +++ b/syft/formats/syftjson/to_syft_model.go @@ -202,12 +202,12 @@ func toSyftRelationships(doc *model.Document, catalog *pkg.Collection, relations return out, conversionErrors } -func toSyftSource(s model.Source) *source.Source { - newSrc := &source.Source{ - Metadata: *toSyftSourceData(s), +func toSyftSource(s model.Source) source.Source { + description := toSyftSourceData(s) + if description == nil { + return nil } - newSrc.SetID() - return newSrc + return source.FromDescription(*description) } func toSyftRelationship(idMap map[string]interface{}, relationship model.Relationship, idAliases map[string]string) (*artifact.Relationship, error) { @@ -257,43 +257,13 @@ func toSyftDescriptor(d model.Descriptor) sbom.Descriptor { } } -func toSyftSourceData(s model.Source) *source.Metadata { - switch s.Type { - case "directory": - path, ok := s.Target.(string) - if !ok { - log.Warnf("unable to parse source target as string: %+v", s.Target) - return nil - } - return &source.Metadata{ - ID: s.ID, - Scheme: source.DirectoryScheme, - Path: path, - } - case "file": - path, ok := s.Target.(string) - if !ok { - log.Warnf("unable to parse source target as string: %+v", s.Target) - return nil - } - return &source.Metadata{ - ID: s.ID, - Scheme: source.FileScheme, - Path: path, - } - case "image": - metadata, ok := s.Target.(source.ImageMetadata) - if !ok { - log.Warnf("unable to parse source target as image metadata: %+v", s.Target) - return nil - } - return &source.Metadata{ - ID: s.ID, - Scheme: source.ImageScheme, - ImageMetadata: metadata, - } +func toSyftSourceData(s model.Source) *source.Description { + return &source.Description{ + ID: s.ID, + Name: s.Name, + Version: s.Version, + Metadata: s.Metadata, } - return nil } func toSyftCatalog(pkgs []model.Package, idAliases map[string]string) *pkg.Collection { diff --git a/syft/formats/syftjson/to_syft_model_test.go b/syft/formats/syftjson/to_syft_model_test.go index dabc33f38..5600ec155 100644 --- a/syft/formats/syftjson/to_syft_model_test.go +++ b/syft/formats/syftjson/to_syft_model_test.go @@ -4,66 +4,154 @@ import ( "errors" "testing" - "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" stereoFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/formats/syftjson/model" + "github.com/anchore/syft/syft/internal/sourcemetadata" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" ) func Test_toSyftSourceData(t *testing.T) { - allSchemes := strset.New() - for _, s := range source.AllSchemes { - allSchemes.Add(string(s)) - } - testedSchemes := strset.New() + tracker := sourcemetadata.NewCompletionTester(t) tests := []struct { name string src model.Source - expected source.Metadata + expected *source.Description }{ { name: "directory", - expected: source.Metadata{ - Scheme: source.DirectoryScheme, - Path: "some/path", - }, src: model.Source{ - Type: "directory", - Target: "some/path", + ID: "the-id", + Name: "some-name", + Version: "some-version", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + expected: &source.Description{ + ID: "the-id", + Name: "some-name", + Version: "some-version", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, }, }, { name: "file", - expected: source.Metadata{ - Scheme: source.FileScheme, - Path: "some/path", - }, src: model.Source{ - Type: "file", - Target: "some/path", + ID: "the-id", + Name: "some-name", + Version: "some-version", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, + }, + expected: &source.Description{ + ID: "the-id", + Name: "some-name", + Version: "some-version", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, }, }, { name: "image", - expected: source.Metadata{ - Scheme: source.ImageScheme, - ImageMetadata: source.ImageMetadata{ + src: model.Source{ + ID: "the-id", + Name: "some-name", + Version: "some-version", + Type: "image", + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "user-input", ID: "id...", ManifestDigest: "digest...", MediaType: "type...", }, }, + expected: &source.Description{ + ID: "the-id", + Name: "some-name", + Version: "some-version", + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: "user-input", + ID: "id...", + ManifestDigest: "digest...", + MediaType: "type...", + }, + }, + }, + // below are regression tests for when the name/version are not provided + // historically we've hoisted up the name/version from the metadata, now it is a simple pass-through + { + name: "directory - no name/version", src: model.Source{ + ID: "the-id", + Type: "directory", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + expected: &source.Description{ + ID: "the-id", + Metadata: source.DirectorySourceMetadata{ + Path: "some/path", + Base: "some/base", + }, + }, + }, + { + name: "file - no name/version", + src: model.Source{ + ID: "the-id", + Type: "file", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, + }, + expected: &source.Description{ + ID: "the-id", + Metadata: source.FileSourceMetadata{ + Path: "some/path", + Digests: []file.Digest{{Algorithm: "sha256", Value: "some-digest"}}, + MIMEType: "text/plain", + }, + }, + }, + { + name: "image - no name/version", + src: model.Source{ + ID: "the-id", Type: "image", - Target: source.ImageMetadata{ + Metadata: source.StereoscopeImageSourceMetadata{ + UserInput: "user-input", + ID: "id...", + ManifestDigest: "digest...", + MediaType: "type...", + }, + }, + expected: &source.Description{ + ID: "the-id", + Metadata: source.StereoscopeImageSourceMetadata{ UserInput: "user-input", ID: "id...", ManifestDigest: "digest...", @@ -76,22 +164,18 @@ func Test_toSyftSourceData(t *testing.T) { t.Run(test.name, func(t *testing.T) { // assert the model transformation is correct actual := toSyftSourceData(test.src) - assert.Equal(t, test.expected, *actual) + assert.Equal(t, test.expected, actual) - // track each scheme tested (passed or not) - testedSchemes.Add(string(test.expected.Scheme)) + tracker.Tested(t, test.expected.Metadata) }) } - - // assert all possible schemes were under test - assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test") } func Test_idsHaveChanged(t *testing.T) { s, err := toSyftModel(model.Document{ Source: model.Source{ - Type: "file", - Target: "some/path", + Type: "file", + Metadata: source.FileSourceMetadata{Path: "some/path"}, }, Artifacts: []model.Package{ { @@ -116,17 +200,17 @@ func Test_idsHaveChanged(t *testing.T) { }, }) - assert.NoError(t, err) - assert.Len(t, s.Relationships, 1) + require.NoError(t, err) + require.Len(t, s.Relationships, 1) r := s.Relationships[0] from := s.Artifacts.Packages.Package(r.From.ID()) - assert.NotNil(t, from) + require.NotNil(t, from) assert.Equal(t, "pkg-1", from.Name) to := s.Artifacts.Packages.Package(r.To.ID()) - assert.NotNil(t, to) + require.NotNil(t, to) assert.Equal(t, "pkg-2", to.Name) } diff --git a/syft/formats/table/encoder_test.go b/syft/formats/table/encoder_test.go index 44e9f4730..d0c672237 100644 --- a/syft/formats/table/encoder_test.go +++ b/syft/formats/table/encoder_test.go @@ -9,14 +9,17 @@ import ( "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateTableGoldenFiles = flag.Bool("update-table", false, "update the *.golden files for table format") +var updateSnapshot = flag.Bool("update-table", false, "update the *.golden files for table format") func TestTableEncoder(t *testing.T) { testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateTableGoldenFiles, - false, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, t.TempDir()), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + }, ) } diff --git a/syft/formats/template/encoder_test.go b/syft/formats/template/encoder_test.go index aed6dca52..82a54498a 100644 --- a/syft/formats/template/encoder_test.go +++ b/syft/formats/template/encoder_test.go @@ -9,19 +9,21 @@ import ( "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateTmpl = flag.Bool("update-tmpl", false, "update the *.golden files for json encoders") +var updateSnapshot = flag.Bool("update-template", false, "update the *.golden files for json encoders") func TestFormatWithOption(t *testing.T) { f := OutputFormat{} f.SetTemplatePath("test-fixtures/csv.template") testutils.AssertEncoderAgainstGoldenSnapshot(t, - f, - testutils.DirectoryInput(t), - *updateTmpl, - false, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, t.TempDir()), + Format: f, + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + }, ) - } func TestFormatWithOptionAndHasField(t *testing.T) { @@ -29,16 +31,19 @@ func TestFormatWithOptionAndHasField(t *testing.T) { f.SetTemplatePath("test-fixtures/csv-hasField.template") testutils.AssertEncoderAgainstGoldenSnapshot(t, - f, - testutils.DirectoryInputWithAuthorField(t), - *updateTmpl, - false, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInputWithAuthorField(t), + Format: f, + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + }, ) } func TestFormatWithoutOptions(t *testing.T) { f := Format() - err := f.Encode(nil, testutils.DirectoryInput(t)) + err := f.Encode(nil, testutils.DirectoryInput(t, t.TempDir())) assert.ErrorContains(t, err, "no template file: please provide a template path") } diff --git a/syft/formats/text/encoder.go b/syft/formats/text/encoder.go index d16ef1798..1c19084d5 100644 --- a/syft/formats/text/encoder.go +++ b/syft/formats/text/encoder.go @@ -14,13 +14,15 @@ func encoder(output io.Writer, s sbom.SBOM) error { w := new(tabwriter.Writer) w.Init(output, 0, 8, 0, '\t', tabwriter.AlignRight) - switch s.Source.Scheme { - case source.DirectoryScheme, source.FileScheme: - fmt.Fprintf(w, "[Path: %s]\n", s.Source.Path) - case source.ImageScheme: + switch metadata := s.Source.Metadata.(type) { + case source.DirectorySourceMetadata: + fmt.Fprintf(w, "[Path: %s]\n", metadata.Path) + case source.FileSourceMetadata: + fmt.Fprintf(w, "[Path: %s]\n", metadata.Path) + case source.StereoscopeImageSourceMetadata: fmt.Fprintln(w, "[Image]") - for idx, l := range s.Source.ImageMetadata.Layers { + for idx, l := range metadata.Layers { fmt.Fprintln(w, " Layer:\t", idx) fmt.Fprintln(w, " Digest:\t", l.Digest) fmt.Fprintln(w, " Size:\t", l.Size) @@ -29,7 +31,7 @@ func encoder(output io.Writer, s sbom.SBOM) error { w.Flush() } default: - return fmt.Errorf("unsupported source: %T", s.Source.Scheme) + return fmt.Errorf("unsupported source: %T", s.Source.Metadata) } // populate artifacts... diff --git a/syft/formats/text/encoder_test.go b/syft/formats/text/encoder_test.go index 7b5e9f472..4a52d2834 100644 --- a/syft/formats/text/encoder_test.go +++ b/syft/formats/text/encoder_test.go @@ -7,24 +7,42 @@ import ( "github.com/anchore/syft/syft/formats/internal/testutils" ) -var updateTextEncoderGoldenFiles = flag.Bool("update-text", false, "update the *.golden files for text encoder") +var updateSnapshot = flag.Bool("update-text", false, "update the *.golden files for text encoder") +var updateImage = flag.Bool("update-image", false, "update the golden image used for image encoder testing") func TestTextDirectoryEncoder(t *testing.T) { + dir := t.TempDir() testutils.AssertEncoderAgainstGoldenSnapshot(t, - Format(), - testutils.DirectoryInput(t), - *updateTextEncoderGoldenFiles, - false, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.DirectoryInput(t, dir), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(dir), + }, ) } func TestTextImageEncoder(t *testing.T) { testImage := "image-simple" testutils.AssertEncoderAgainstGoldenImageSnapshot(t, - Format(), - testutils.ImageInput(t, testImage, testutils.FromSnapshot()), - testImage, - *updateTextEncoderGoldenFiles, - false, + testutils.ImageSnapshotTestConfig{ + Image: testImage, + UpdateImageSnapshot: *updateImage, + }, + testutils.EncoderSnapshotTestConfig{ + Subject: testutils.ImageInput(t, testImage, testutils.FromSnapshot()), + Format: Format(), + UpdateSnapshot: *updateSnapshot, + PersistRedactionsInSnapshot: true, + IsJSON: false, + Redactor: redactor(), + }, ) } + +func redactor(values ...string) testutils.Redactor { + return testutils.NewRedactions(). + WithValuesRedacted(values...) +} diff --git a/syft/formats/text/test-fixtures/snapshot/TestTextDirectoryEncoder.golden b/syft/formats/text/test-fixtures/snapshot/TestTextDirectoryEncoder.golden index 25881f2d9..9efab3804 100644 --- a/syft/formats/text/test-fixtures/snapshot/TestTextDirectoryEncoder.golden +++ b/syft/formats/text/test-fixtures/snapshot/TestTextDirectoryEncoder.golden @@ -1,4 +1,4 @@ -[Path: /some/path] +[Path: redacted/some/path] [package-1] Version: 1.0.1 Type: python diff --git a/syft/internal/fileresolver/chroot_context.go b/syft/internal/fileresolver/chroot_context.go new file mode 100644 index 000000000..a5245952b --- /dev/null +++ b/syft/internal/fileresolver/chroot_context.go @@ -0,0 +1,165 @@ +package fileresolver + +import ( + "fmt" + "os" + "path" + "path/filepath" + "strings" + + "github.com/anchore/syft/syft/internal/windows" +) + +// ChrootContext helps to modify path from a real filesystem to a chroot-like filesystem, taking into account +// the user given root, the base path (if any) to consider as the root, and the current working directory. +// Note: this only works on a real filesystem, not on a virtual filesystem (such as a stereoscope filetree). +type ChrootContext struct { + root string + base string + cwd string + cwdRelativeToRoot string +} + +func NewChrootContextFromCWD(root, base string) (*ChrootContext, error) { + currentWD, err := os.Getwd() + if err != nil { + return nil, fmt.Errorf("could not get current working directory: %w", err) + } + + return NewChrootContext(root, base, currentWD) +} + +func NewChrootContext(root, base, cwd string) (*ChrootContext, error) { + cleanRoot, err := NormalizeRootDirectory(root) + if err != nil { + return nil, err + } + + cleanBase, err := NormalizeBaseDirectory(base) + if err != nil { + return nil, err + } + + chroot := &ChrootContext{ + root: cleanRoot, + base: cleanBase, + cwd: cwd, + } + + return chroot, chroot.ChangeDirectory(cwd) +} + +func NormalizeRootDirectory(root string) (string, error) { + cleanRoot, err := filepath.EvalSymlinks(root) + if err != nil { + return "", fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err) + } + return cleanRoot, nil +} + +func NormalizeBaseDirectory(base string) (string, error) { + if base == "" { + return "", nil + } + + cleanBase, err := filepath.EvalSymlinks(base) + if err != nil { + return "", fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err) + } + + return filepath.Abs(cleanBase) +} + +// Root returns the root path with all symlinks evaluated. +func (r ChrootContext) Root() string { + return r.root +} + +// Base returns the absolute base path with all symlinks evaluated. +func (r ChrootContext) Base() string { + return r.base +} + +// ChangeRoot swaps the path for the chroot. +func (r *ChrootContext) ChangeRoot(dir string) error { + newR, err := NewChrootContext(dir, r.base, r.cwd) + if err != nil { + return fmt.Errorf("could not change root: %w", err) + } + + *r = *newR + + return nil +} + +// ChangeDirectory changes the current working directory so that any relative paths passed +// into ToNativePath() and ToChrootPath() honor the new CWD. If the process changes the CWD in-flight, this should be +// called again to ensure correct functionality of ToNativePath() and ToChrootPath(). +func (r *ChrootContext) ChangeDirectory(dir string) error { + var ( + cwdRelativeToRoot string + err error + ) + + dir, err = filepath.Abs(dir) + if err != nil { + return fmt.Errorf("could not determine absolute path to CWD: %w", err) + } + + if path.IsAbs(r.root) { + cwdRelativeToRoot, err = filepath.Rel(dir, r.root) + if err != nil { + return fmt.Errorf("could not determine given root path to CWD: %w", err) + } + } else { + cwdRelativeToRoot = filepath.Clean(r.root) + } + + r.cwd = dir + r.cwdRelativeToRoot = cwdRelativeToRoot + return nil +} + +// ToNativePath takes a path in the context of the chroot-like filesystem and converts it to a path in the underlying fs domain. +func (r ChrootContext) ToNativePath(chrootPath string) (string, error) { + responsePath := chrootPath + + if filepath.IsAbs(responsePath) { + // don't allow input to potentially hop above root path + responsePath = path.Join(r.root, responsePath) + } else { + // ensure we take into account any relative difference between the root path and the CWD for relative requests + responsePath = path.Join(r.cwdRelativeToRoot, responsePath) + } + + var err error + responsePath, err = filepath.Abs(responsePath) + if err != nil { + return "", err + } + return responsePath, nil +} + +// ToChrootPath takes a path from the underlying fs domain and converts it to a path that is relative to the current root context. +func (r ChrootContext) ToChrootPath(nativePath string) string { + responsePath := nativePath + // check to see if we need to encode back to Windows from posix + if windows.HostRunningOnWindows() { + responsePath = windows.FromPosix(responsePath) + } + + // clean references to the request path (either the root, or the base if set) + if filepath.IsAbs(responsePath) { + var prefix string + if r.base != "" { + prefix = r.base + } else { + // we need to account for the cwd relative to the running process and the given root for the directory resolver + prefix = filepath.Clean(filepath.Join(r.cwd, r.cwdRelativeToRoot)) + prefix += string(filepath.Separator) + } + responsePath = strings.TrimPrefix(responsePath, prefix) + } + + return responsePath +} diff --git a/syft/internal/fileresolver/chroot_context_test.go b/syft/internal/fileresolver/chroot_context_test.go new file mode 100644 index 000000000..2cd8befe1 --- /dev/null +++ b/syft/internal/fileresolver/chroot_context_test.go @@ -0,0 +1,481 @@ +package fileresolver + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_ChrootContext_RequestResponse(t *testing.T) { + // / + // somewhere/ + // outside.txt + // root-link -> ./ + // path/ + // to/ + // abs-inside.txt -> /path/to/the/file.txt # absolute link to somewhere inside of the root + // rel-inside.txt -> ./the/file.txt # relative link to somewhere inside of the root + // the/ + // file.txt + // abs-outside.txt -> /somewhere/outside.txt # absolute link to outside of the root + // rel-outside -> ../../../somewhere/outside.txt # relative link to outside of the root + // + + testDir, err := os.Getwd() + require.NoError(t, err) + relative := filepath.Join("test-fixtures", "req-resp") + absolute := filepath.Join(testDir, relative) + + absPathToTheFile := filepath.Join(absolute, "path", "to", "the", "file.txt") + + absAbsInsidePath := filepath.Join(absolute, "path", "to", "abs-inside.txt") + absAbsOutsidePath := filepath.Join(absolute, "path", "to", "the", "abs-outside.txt") + + absRelOutsidePath := filepath.Join(absolute, "path", "to", "the", "rel-outside.txt") + + relViaLink := filepath.Join(relative, "root-link") + absViaLink := filepath.Join(absolute, "root-link") + + absViaLinkPathToTheFile := filepath.Join(absViaLink, "path", "to", "the", "file.txt") + absViaLinkAbsOutsidePath := filepath.Join(absViaLink, "path", "to", "the", "abs-outside.txt") + absViaLinkRelOutsidePath := filepath.Join(absViaLink, "path", "to", "the", "rel-outside.txt") + + relViaDoubleLink := filepath.Join(relative, "root-link", "root-link") + absViaDoubleLink := filepath.Join(absolute, "root-link", "root-link") + + absViaDoubleLinkPathToTheFile := filepath.Join(absViaDoubleLink, "path", "to", "the", "file.txt") + absViaDoubleLinkRelOutsidePath := filepath.Join(absViaDoubleLink, "path", "to", "the", "rel-outside.txt") + + cleanup := func() { + _ = os.Remove(absAbsInsidePath) + _ = os.Remove(absAbsOutsidePath) + } + + // ensure the absolute symlinks are cleaned up from any previous runs + cleanup() + + require.NoError(t, os.Symlink(filepath.Join(absolute, "path", "to", "the", "file.txt"), absAbsInsidePath)) + require.NoError(t, os.Symlink(filepath.Join(absolute, "somewhere", "outside.txt"), absAbsOutsidePath)) + + t.Cleanup(cleanup) + + cases := []struct { + name string + cwd string + root string + base string + input string + expectedNativePath string + expectedChrootPath string + }{ + { + name: "relative root, relative request, direct", + root: relative, + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct", + root: absolute, + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct", + root: relative, + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct", + root: absolute, + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + // cwd within root... + { + name: "relative root, relative request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: "../../", + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: absolute, + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + root: "../../", + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within root", + cwd: filepath.Join(relative, "path/to"), + + root: absolute, + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + // cwd within symlink root... + { + name: "relative root, relative request, direct, cwd within symlink root", + cwd: relViaLink, + root: "./", + input: "path/to/the/file.txt", + expectedNativePath: absViaLinkPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within symlink root", + cwd: relViaLink, + root: absViaLink, + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within symlink root", + cwd: relViaLink, + root: "./", + input: "/path/to/the/file.txt", + expectedNativePath: absViaLinkPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within symlink root", + cwd: relViaLink, + root: absViaLink, + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + // cwd within symlink root, request nested within... + { + name: "relative root, relative nested request, direct, cwd within symlink root", + cwd: relViaLink, + root: "./path", + input: "to/the/file.txt", + expectedNativePath: absViaLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absViaLink, "path"), + input: "to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd within symlink root", + cwd: relViaLink, + root: "./path", + input: "/to/the/file.txt", + expectedNativePath: absViaLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absViaLink, "path"), + input: "/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + // cwd within DOUBLE symlink root... + { + name: "relative root, relative request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: "./", + input: "path/to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, relative request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: absViaDoubleLink, + input: "path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "relative root, abs request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: "./", + input: "/path/to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + { + name: "abs root, abs request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: absViaDoubleLink, + input: "/path/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "path/to/the/file.txt", + }, + // cwd within DOUBLE symlink root, request nested within... + { + name: "relative root, relative nested request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: "./path", + input: "to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: filepath.Join(absViaDoubleLink, "path"), + input: "to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: "./path", + input: "/to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd within (double) symlink root", + cwd: relViaDoubleLink, + root: filepath.Join(absViaDoubleLink, "path"), + input: "/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + // cwd within DOUBLE symlink root, request nested DEEP within... + { + name: "relative root, relative nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relViaDoubleLink, "path", "to"), + root: "../", + input: "to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, relative nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relViaDoubleLink, "path", "to"), + root: filepath.Join(absViaDoubleLink, "path"), + input: "to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "relative root, abs nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relViaDoubleLink, "path", "to"), + root: "../", + input: "/to/the/file.txt", + expectedNativePath: absViaDoubleLinkPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + { + name: "abs root, abs nested request, direct, cwd deep within (double) symlink root", + cwd: filepath.Join(relViaDoubleLink, "path", "to"), + root: filepath.Join(absViaDoubleLink, "path"), + input: "/to/the/file.txt", + expectedNativePath: absPathToTheFile, + expectedChrootPath: "to/the/file.txt", + }, + // link to outside of root cases... + { + name: "relative root, relative request, abs indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, relative request, abs indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, abs request, abs indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "/to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, abs request, abs indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "/to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root)", + root: filepath.Join(relative, "path"), + input: "/to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root)", + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + // link to outside of root cases... cwd within symlink root + { + name: "relative root, relative request, abs indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: "path", + input: "to/the/abs-outside.txt", + expectedNativePath: absViaLinkAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, relative request, abs indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absolute, "path"), + input: "to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, abs request, abs indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: "path", + input: "/to/the/abs-outside.txt", + expectedNativePath: absViaLinkAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "abs root, abs request, abs indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/abs-outside.txt", + expectedNativePath: absAbsOutsidePath, + expectedChrootPath: "to/the/abs-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: "path", + input: "to/the/rel-outside.txt", + expectedNativePath: absViaLinkRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: "path", + input: "/to/the/rel-outside.txt", + expectedNativePath: absViaLinkRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root), cwd within symlink root", + cwd: relViaLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relViaDoubleLink, + root: "path", + input: "to/the/rel-outside.txt", + expectedNativePath: absViaDoubleLinkRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, relative request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relViaDoubleLink, + root: filepath.Join(absolute, "path"), + input: "to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "relative root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relViaDoubleLink, + root: "path", + input: "/to/the/rel-outside.txt", + expectedNativePath: absViaDoubleLinkRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + { + name: "abs root, abs request, relative indirect (outside of root), cwd within DOUBLE symlink root", + cwd: relViaDoubleLink, + root: filepath.Join(absolute, "path"), + input: "/to/the/rel-outside.txt", + expectedNativePath: absRelOutsidePath, + expectedChrootPath: "to/the/rel-outside.txt", + }, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + + // we need to mimic a shell, otherwise we won't get a path within a symlink + targetPath := filepath.Join(testDir, c.cwd) + t.Setenv("PWD", filepath.Clean(targetPath)) + + require.NoError(t, err) + require.NoError(t, os.Chdir(targetPath)) + t.Cleanup(func() { + require.NoError(t, os.Chdir(testDir)) + }) + + chroot, err := NewChrootContextFromCWD(c.root, c.base) + require.NoError(t, err) + require.NotNil(t, chroot) + + req, err := chroot.ToNativePath(c.input) + require.NoError(t, err) + assert.Equal(t, c.expectedNativePath, req, "native path different") + + resp := chroot.ToChrootPath(req) + assert.Equal(t, c.expectedChrootPath, resp, "chroot path different") + }) + } +} diff --git a/syft/internal/fileresolver/container_image_squash_test.go b/syft/internal/fileresolver/container_image_squash_test.go index d65d0bccc..642f6b520 100644 --- a/syft/internal/fileresolver/container_image_squash_test.go +++ b/syft/internal/fileresolver/container_image_squash_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -513,6 +514,26 @@ func Test_imageSquashResolver_resolvesLinks(t *testing.T) { } +func compareLocations(t *testing.T, expected, actual []file.Location) { + t.Helper() + ignoreUnexported := cmpopts.IgnoreUnexported(file.LocationData{}) + ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") + ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") + + sort.Sort(file.Locations(expected)) + sort.Sort(file.Locations(actual)) + + if d := cmp.Diff(expected, actual, + ignoreUnexported, + ignoreFS, + ignoreMetadata, + ); d != "" { + + t.Errorf("unexpected locations (-want +got):\n%s", d) + } + +} + func TestSquashResolver_AllLocations(t *testing.T) { img := imagetest.GetFixtureImage(t, "docker-archive", "image-files-deleted") diff --git a/syft/internal/fileresolver/directory.go b/syft/internal/fileresolver/directory.go index 2d634cf1e..766d53c8f 100644 --- a/syft/internal/fileresolver/directory.go +++ b/syft/internal/fileresolver/directory.go @@ -5,19 +5,14 @@ import ( "fmt" "io" "os" - "path" - "path/filepath" - "runtime" - "strings" stereoscopeFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/stereoscope/pkg/filetree" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/windows" ) -const WindowsOS = "windows" - var unixSystemRuntimePrefixes = []string{ "/proc", "/dev", @@ -30,14 +25,12 @@ var _ file.Resolver = (*Directory)(nil) // Directory implements path and content access for the directory data source. type Directory struct { - path string - base string - currentWdRelativeToRoot string - currentWd string - tree filetree.Reader - index filetree.IndexReader - searchContext filetree.Searcher - indexer *directoryIndexer + path string + chroot ChrootContext + tree filetree.Reader + index filetree.IndexReader + searchContext filetree.Searcher + indexer *directoryIndexer } func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { @@ -50,46 +43,20 @@ func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) } func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { - currentWD, err := os.Getwd() + chroot, err := NewChrootContextFromCWD(root, base) if err != nil { - return nil, fmt.Errorf("could not get CWD: %w", err) + return nil, fmt.Errorf("unable to interpret chroot context: %w", err) } - cleanRoot, err := filepath.EvalSymlinks(root) - if err != nil { - return nil, fmt.Errorf("could not evaluate root=%q symlinks: %w", root, err) - } - - cleanBase := "" - if base != "" { - cleanBase, err = filepath.EvalSymlinks(base) - if err != nil { - return nil, fmt.Errorf("could not evaluate base=%q symlinks: %w", base, err) - } - cleanBase, err = filepath.Abs(cleanBase) - if err != nil { - return nil, err - } - } - - var currentWdRelRoot string - if path.IsAbs(cleanRoot) { - currentWdRelRoot, err = filepath.Rel(currentWD, cleanRoot) - if err != nil { - return nil, fmt.Errorf("could not determine given root path to CWD: %w", err) - } - } else { - currentWdRelRoot = filepath.Clean(cleanRoot) - } + cleanRoot := chroot.Root() + cleanBase := chroot.Base() return &Directory{ - path: cleanRoot, - base: cleanBase, - currentWd: currentWD, - currentWdRelativeToRoot: currentWdRelRoot, - tree: filetree.New(), - index: filetree.NewIndex(), - indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...), + path: cleanRoot, + chroot: *chroot, + tree: filetree.New(), + index: filetree.NewIndex(), + indexer: newDirectoryIndexer(cleanRoot, cleanBase, pathFilters...), }, nil } @@ -110,43 +77,12 @@ func (r *Directory) buildIndex() error { } func (r Directory) requestPath(userPath string) (string, error) { - if filepath.IsAbs(userPath) { - // don't allow input to potentially hop above root path - userPath = path.Join(r.path, userPath) - } else { - // ensure we take into account any relative difference between the root path and the CWD for relative requests - userPath = path.Join(r.currentWdRelativeToRoot, userPath) - } - - var err error - userPath, err = filepath.Abs(userPath) - if err != nil { - return "", err - } - return userPath, nil + return r.chroot.ToNativePath(userPath) } // responsePath takes a path from the underlying fs domain and converts it to a path that is relative to the root of the directory resolver. func (r Directory) responsePath(path string) string { - // check to see if we need to encode back to Windows from posix - if runtime.GOOS == WindowsOS { - path = posixToWindows(path) - } - - // clean references to the request path (either the root, or the base if set) - if filepath.IsAbs(path) { - var prefix string - if r.base != "" { - prefix = r.base - } else { - // we need to account for the cwd relative to the running process and the given root for the directory resolver - prefix = filepath.Clean(filepath.Join(r.currentWd, r.currentWdRelativeToRoot)) - prefix += string(filepath.Separator) - } - path = strings.TrimPrefix(path, prefix) - } - - return path + return r.chroot.ToChrootPath(path) } // HasPath indicates if the given path exists in the underlying source. @@ -196,8 +132,8 @@ func (r Directory) FilesByPath(userPaths ...string) ([]file.Location, error) { continue } - if runtime.GOOS == WindowsOS { - userStrPath = windowsToPosix(userStrPath) + if windows.HostRunningOnWindows() { + userStrPath = windows.ToPosix(userStrPath) } if ref.HasReference() { @@ -286,8 +222,8 @@ func (r Directory) FileContentsByLocation(location file.Location) (io.ReadCloser // RealPath is posix so for windows directory resolver we need to translate // to its true on disk path. filePath := string(location.Reference().RealPath) - if runtime.GOOS == WindowsOS { - filePath = posixToWindows(filePath) + if windows.HostRunningOnWindows() { + filePath = windows.FromPosix(filePath) } return stereoscopeFile.NewLazyReadCloser(filePath), nil @@ -338,30 +274,3 @@ func (r *Directory) FilesByMIMEType(types ...string) ([]file.Location, error) { return uniqueLocations, nil } - -func windowsToPosix(windowsPath string) (posixPath string) { - // volume should be encoded at the start (e.g /c/) where c is the volume - volumeName := filepath.VolumeName(windowsPath) - pathWithoutVolume := strings.TrimPrefix(windowsPath, volumeName) - volumeLetter := strings.ToLower(strings.TrimSuffix(volumeName, ":")) - - // translate non-escaped backslash to forwardslash - translatedPath := strings.ReplaceAll(pathWithoutVolume, "\\", "/") - - // always have `/` as the root... join all components, e.g.: - // convert: C:\\some\windows\Place - // into: /c/some/windows/Place - return path.Clean("/" + strings.Join([]string{volumeLetter, translatedPath}, "/")) -} - -func posixToWindows(posixPath string) (windowsPath string) { - // decode the volume (e.g. /c/ --> C:\\) - There should always be a volume name. - pathFields := strings.Split(posixPath, "/") - volumeName := strings.ToUpper(pathFields[1]) + `:\\` - - // translate non-escaped forward slashes into backslashes - remainingTranslatedPath := strings.Join(pathFields[2:], "\\") - - // combine volume name and backslash components - return filepath.Clean(volumeName + remainingTranslatedPath) -} diff --git a/syft/internal/fileresolver/directory_indexer.go b/syft/internal/fileresolver/directory_indexer.go index 6bdbae0c7..47349a445 100644 --- a/syft/internal/fileresolver/directory_indexer.go +++ b/syft/internal/fileresolver/directory_indexer.go @@ -7,7 +7,6 @@ import ( "os" "path" "path/filepath" - "runtime" "strings" "github.com/wagoodman/go-partybus" @@ -19,6 +18,7 @@ import ( "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/event" + "github.com/anchore/syft/syft/internal/windows" ) type PathIndexVisitor func(string, os.FileInfo, error) error @@ -263,8 +263,8 @@ func (r *directoryIndexer) indexPath(path string, info os.FileInfo, err error) ( } // here we check to see if we need to normalize paths to posix on the way in coming from windows - if runtime.GOOS == WindowsOS { - path = windowsToPosix(path) + if windows.HostRunningOnWindows() { + path = windows.ToPosix(path) } newRoot, err := r.addPathToIndex(path, info) diff --git a/syft/internal/fileresolver/excluding_file.go b/syft/internal/fileresolver/excluding_file.go index 81caa49c7..34c4948a2 100644 --- a/syft/internal/fileresolver/excluding_file.go +++ b/syft/internal/fileresolver/excluding_file.go @@ -16,9 +16,9 @@ type excluding struct { excludeFn excludeFn } -// NewExcluding create a new resolver which wraps the provided delegate and excludes +// NewExcludingDecorator create a new resolver which wraps the provided delegate and excludes // entries based on a provided path exclusion function -func NewExcluding(delegate file.Resolver, excludeFn excludeFn) file.Resolver { +func NewExcludingDecorator(delegate file.Resolver, excludeFn excludeFn) file.Resolver { return &excluding{ delegate, excludeFn, diff --git a/syft/internal/fileresolver/excluding_file_test.go b/syft/internal/fileresolver/excluding_file_test.go index 2ba514736..bb4e3ce1e 100644 --- a/syft/internal/fileresolver/excluding_file_test.go +++ b/syft/internal/fileresolver/excluding_file_test.go @@ -56,7 +56,7 @@ func TestExcludingResolver(t *testing.T) { resolver := &mockResolver{ locations: test.locations, } - er := NewExcluding(resolver, test.excludeFn) + er := NewExcludingDecorator(resolver, test.excludeFn) locations, _ := er.FilesByPath() assert.ElementsMatch(t, locationPaths(locations), test.expected) diff --git a/syft/internal/fileresolver/test-fixtures/req-resp/.gitignore b/syft/internal/fileresolver/test-fixtures/req-resp/.gitignore new file mode 100644 index 000000000..c94459921 --- /dev/null +++ b/syft/internal/fileresolver/test-fixtures/req-resp/.gitignore @@ -0,0 +1,2 @@ +path/to/abs-inside.txt +path/to/the/abs-outside.txt \ No newline at end of file diff --git a/syft/internal/fileresolver/unindexed_directory_test.go b/syft/internal/fileresolver/unindexed_directory_test.go index 3714d8d55..44ec69bf7 100644 --- a/syft/internal/fileresolver/unindexed_directory_test.go +++ b/syft/internal/fileresolver/unindexed_directory_test.go @@ -14,7 +14,6 @@ import ( "time" "github.com/google/go-cmp/cmp" - "github.com/google/go-cmp/cmp/cmpopts" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1263,23 +1262,3 @@ func testWithTimeout(t *testing.T, timeout time.Duration, test func(*testing.T)) case <-done: } } - -func compareLocations(t *testing.T, expected, actual []file.Location) { - t.Helper() - ignoreUnexported := cmpopts.IgnoreFields(file.LocationData{}, "ref") - ignoreMetadata := cmpopts.IgnoreFields(file.LocationMetadata{}, "Annotations") - ignoreFS := cmpopts.IgnoreFields(file.Coordinates{}, "FileSystemID") - - sort.Sort(file.Locations(expected)) - sort.Sort(file.Locations(actual)) - - if d := cmp.Diff(expected, actual, - ignoreUnexported, - ignoreFS, - ignoreMetadata, - ); d != "" { - - t.Errorf("unexpected locations (-want +got):\n%s", d) - } - -} diff --git a/syft/internal/generate.go b/syft/internal/generate.go new file mode 100644 index 000000000..6780080d3 --- /dev/null +++ b/syft/internal/generate.go @@ -0,0 +1,4 @@ +package internal + +//go:generate go run ./sourcemetadata/generate/main.go +//go:generate go run ./packagemetadata/generate/main.go diff --git a/syft/internal/jsonschema/README.md b/syft/internal/jsonschema/README.md new file mode 100644 index 000000000..dc7b76c91 --- /dev/null +++ b/syft/internal/jsonschema/README.md @@ -0,0 +1 @@ +Please see [schema/json/README.md](../../../schema/json/README.md) for more information on the JSON schema files in this directory. \ No newline at end of file diff --git a/schema/json/main.go b/syft/internal/jsonschema/main.go similarity index 56% rename from schema/json/main.go rename to syft/internal/jsonschema/main.go index 246abc532..26148eb23 100644 --- a/schema/json/main.go +++ b/syft/internal/jsonschema/main.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "os" + "path/filepath" "reflect" "sort" "strings" @@ -13,8 +14,8 @@ import ( "github.com/invopop/jsonschema" "github.com/anchore/syft/internal" - genInt "github.com/anchore/syft/schema/json/internal" - syftjsonModel "github.com/anchore/syft/syft/formats/syftjson/model" + syftJsonModel "github.com/anchore/syft/syft/formats/syftjson/model" + "github.com/anchore/syft/syft/internal/packagemetadata" ) /* @@ -24,30 +25,59 @@ are not captured (empty interfaces). This means that pkg.Package.Metadata is not can be extended to include specific package metadata struct shapes in the future. */ -//go:generate go run ./generate/main.go - -const schemaVersion = internal.JSONSchemaVersion - func main() { write(encode(build())) } +func schemaID() jsonschema.ID { + // Today we do not host the schemas at this address, but per the JSON schema spec we should be referencing + // the schema by a URL in a domain we control. This is a placeholder for now. + return jsonschema.ID(fmt.Sprintf("anchore.io/schema/syft/json/%s", internal.JSONSchemaVersion)) +} + +func assembleTypeContainer(items []any) any { + structFields := make([]reflect.StructField, len(items)) + + for i, item := range items { + itemType := reflect.TypeOf(item) + fieldName := itemType.Name() + + structFields[i] = reflect.StructField{ + Name: fieldName, + Type: itemType, + } + } + + structType := reflect.StructOf(structFields) + return reflect.New(structType).Elem().Interface() +} + func build() *jsonschema.Schema { reflector := &jsonschema.Reflector{ + BaseSchemaID: schemaID(), AllowAdditionalProperties: true, Namer: func(r reflect.Type) string { return strings.TrimPrefix(r.Name(), "JSON") }, } - documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftjsonModel.Document{})) - metadataSchema := reflector.ReflectFromType(reflect.TypeOf(&genInt.ArtifactMetadataContainer{})) - // TODO: inject source definitions - // inject the definitions of all metadatas into the schema definitions + pkgMetadataContainer := assembleTypeContainer(packagemetadata.AllTypes()) + pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer) + + // srcMetadataContainer := assembleTypeContainer(sourcemetadata.AllTypes()) + // srcMetadataContainerType := reflect.TypeOf(srcMetadataContainer) + + documentSchema := reflector.ReflectFromType(reflect.TypeOf(&syftJsonModel.Document{})) + pkgMetadataSchema := reflector.ReflectFromType(reflect.TypeOf(pkgMetadataContainer)) + // srcMetadataSchema := reflector.ReflectFromType(reflect.TypeOf(srcMetadataContainer)) + + // TODO: add source metadata types + + // inject the definitions of all packages metadatas into the schema definitions var metadataNames []string - for name, definition := range metadataSchema.Definitions { - if name == reflect.TypeOf(genInt.ArtifactMetadataContainer{}).Name() { + for name, definition := range pkgMetadataSchema.Definitions { + if name == pkgMetadataContainerType.Name() { // ignore the definition for the fake container continue } @@ -93,11 +123,16 @@ func encode(schema *jsonschema.Schema) []byte { } func write(schema []byte) { - filename := fmt.Sprintf("schema-%s.json", schemaVersion) + repoRoot, err := packagemetadata.RepoRoot() + if err != nil { + fmt.Println("unable to determine repo root") + os.Exit(1) + } + schemaPath := filepath.Join(repoRoot, "schema", "json", fmt.Sprintf("schema-%s.json", internal.JSONSchemaVersion)) - if _, err := os.Stat(filename); !os.IsNotExist(err) { + if _, err := os.Stat(schemaPath); !os.IsNotExist(err) { // check if the schema is the same... - existingFh, err := os.Open(filename) + existingFh, err := os.Open(schemaPath) if err != nil { panic(err) } @@ -114,11 +149,11 @@ func write(schema []byte) { } // the generated schema is different, bail with error :( - fmt.Printf("Cowardly refusing to overwrite existing schema (%s)!\nSee the schema/json/README.md for how to increment\n", filename) + fmt.Printf("Cowardly refusing to overwrite existing schema (%s)!\nSee the schema/json/README.md for how to increment\n", schemaPath) os.Exit(1) } - fh, err := os.Create(filename) + fh, err := os.Create(schemaPath) if err != nil { panic(err) } @@ -130,5 +165,5 @@ func write(schema []byte) { defer fh.Close() - fmt.Printf("Wrote new schema to %q\n", filename) + fmt.Printf("Wrote new schema to %q\n", schemaPath) } diff --git a/schema/json/internal/metadata_types.go b/syft/internal/packagemetadata/discover_type_names.go similarity index 96% rename from schema/json/internal/metadata_types.go rename to syft/internal/packagemetadata/discover_type_names.go index 4d515a188..467b0ffc5 100644 --- a/schema/json/internal/metadata_types.go +++ b/syft/internal/packagemetadata/discover_type_names.go @@ -1,4 +1,4 @@ -package internal +package packagemetadata import ( "fmt" @@ -18,8 +18,8 @@ var metadataExceptions = strset.New( "FileMetadata", ) -func AllSyftMetadataTypeNames() ([]string, error) { - root, err := repoRoot() +func DiscoverTypeNames() ([]string, error) { + root, err := RepoRoot() if err != nil { return nil, err } @@ -30,7 +30,7 @@ func AllSyftMetadataTypeNames() ([]string, error) { return findMetadataDefinitionNames(files...) } -func repoRoot() (string, error) { +func RepoRoot() (string, error) { root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() if err != nil { return "", fmt.Errorf("unable to find repo root dir: %+v", err) diff --git a/syft/internal/packagemetadata/generate/main.go b/syft/internal/packagemetadata/generate/main.go new file mode 100644 index 000000000..55c7de41b --- /dev/null +++ b/syft/internal/packagemetadata/generate/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "fmt" + "os" + + "github.com/dave/jennifer/jen" + + "github.com/anchore/syft/syft/internal/packagemetadata" +) + +// This program is invoked from syft/internal and generates packagemetadata/generated.go + +const ( + pkgImport = "github.com/anchore/syft/syft/pkg" + path = "packagemetadata/generated.go" +) + +func main() { + typeNames, err := packagemetadata.DiscoverTypeNames() + if err != nil { + panic(fmt.Errorf("unable to get all metadata type names: %w", err)) + } + + fmt.Printf("updating package metadata type list with %+v types\n", len(typeNames)) + + f := jen.NewFile("packagemetadata") + f.HeaderComment("DO NOT EDIT: generated by syft/internal/packagemetadata/generate/main.go") + f.ImportName(pkgImport, "pkg") + f.Comment("AllTypes returns a list of all pkg metadata types that syft supports (that are represented in the pkg.Package.Metadata field).") + + f.Func().Id("AllTypes").Params().Index().Any().BlockFunc(func(g *jen.Group) { + g.ReturnFunc(func(g *jen.Group) { + g.Index().Any().ValuesFunc(func(g *jen.Group) { + for _, typeName := range typeNames { + g.Qual(pkgImport, typeName).Values() + } + }) + }) + }) + + rendered := fmt.Sprintf("%#v", f) + + fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + panic(fmt.Errorf("unable to open file: %w", err)) + } + _, err = fh.WriteString(rendered) + if err != nil { + panic(fmt.Errorf("unable to write file: %w", err)) + } + if err := fh.Close(); err != nil { + panic(fmt.Errorf("unable to close file: %w", err)) + } +} diff --git a/syft/internal/packagemetadata/generated.go b/syft/internal/packagemetadata/generated.go new file mode 100644 index 000000000..42bca884f --- /dev/null +++ b/syft/internal/packagemetadata/generated.go @@ -0,0 +1,10 @@ +// DO NOT EDIT: generated by syft/internal/packagemetadata/generate/main.go + +package packagemetadata + +import "github.com/anchore/syft/syft/pkg" + +// AllTypes returns a list of all pkg metadata types that syft supports (that are represented in the pkg.Package.Metadata field). +func AllTypes() []any { + return []any{pkg.AlpmMetadata{}, pkg.ApkMetadata{}, pkg.BinaryMetadata{}, pkg.CargoPackageMetadata{}, pkg.CocoapodsMetadata{}, pkg.ConanLockMetadata{}, pkg.ConanMetadata{}, pkg.DartPubMetadata{}, pkg.DotnetDepsMetadata{}, pkg.DpkgMetadata{}, pkg.GemMetadata{}, pkg.GolangBinMetadata{}, pkg.GolangModMetadata{}, pkg.HackageMetadata{}, pkg.JavaMetadata{}, pkg.KbPackageMetadata{}, pkg.LinuxKernelMetadata{}, pkg.LinuxKernelModuleMetadata{}, pkg.MixLockMetadata{}, pkg.NixStoreMetadata{}, pkg.NpmPackageJSONMetadata{}, pkg.NpmPackageLockJSONMetadata{}, pkg.PhpComposerJSONMetadata{}, pkg.PortageMetadata{}, pkg.PythonPackageMetadata{}, pkg.PythonPipfileLockMetadata{}, pkg.PythonRequirementsMetadata{}, pkg.RDescriptionFileMetadata{}, pkg.RebarLockMetadata{}, pkg.RpmMetadata{}} +} diff --git a/syft/internal/packagemetadata/names.go b/syft/internal/packagemetadata/names.go new file mode 100644 index 000000000..f3dec9347 --- /dev/null +++ b/syft/internal/packagemetadata/names.go @@ -0,0 +1,13 @@ +package packagemetadata + +import ( + "reflect" +) + +func AllNames() []string { + names := make([]string, 0) + for _, t := range AllTypes() { + names = append(names, reflect.TypeOf(t).Name()) + } + return names +} diff --git a/syft/internal/packagemetadata/names_test.go b/syft/internal/packagemetadata/names_test.go new file mode 100644 index 000000000..60c0abaf3 --- /dev/null +++ b/syft/internal/packagemetadata/names_test.go @@ -0,0 +1,25 @@ +package packagemetadata + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAllNames(t *testing.T) { + // note: this is a form of completion testing relative to the current code base. + + expected, err := DiscoverTypeNames() + require.NoError(t, err) + + actual := AllNames() + + // ensure that the codebase (from ast analysis) reflects the latest code generated state + if !assert.ElementsMatch(t, expected, actual) { + t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + t.Log("did you add a new pkg.*Metadata type without updating the JSON schema?") + t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)") + } +} diff --git a/syft/internal/sourcemetadata/completion_tester.go b/syft/internal/sourcemetadata/completion_tester.go new file mode 100644 index 000000000..8dc9ce0c4 --- /dev/null +++ b/syft/internal/sourcemetadata/completion_tester.go @@ -0,0 +1,69 @@ +package sourcemetadata + +import ( + "reflect" + "testing" +) + +type CompletionTester struct { + saw []any + valid []any + ignore []any +} + +func NewCompletionTester(t testing.TB, ignore ...any) *CompletionTester { + tester := &CompletionTester{ + valid: AllTypes(), + ignore: ignore, + } + t.Cleanup(func() { + t.Helper() + tester.validate(t) + }) + return tester +} + +func (tr *CompletionTester) Tested(t testing.TB, m any) { + t.Helper() + + if m == nil { + return + } + if len(tr.valid) == 0 { + t.Fatal("no valid metadata types to test against") + } + ty := reflect.TypeOf(m) + for _, v := range tr.valid { + if reflect.TypeOf(v) == ty { + tr.saw = append(tr.saw, m) + return + } + } + + t.Fatalf("tested metadata type is not valid: %s", ty.Name()) +} + +func (tr *CompletionTester) validate(t testing.TB) { + t.Helper() + + count := make(map[reflect.Type]int) + for _, m := range tr.saw { + count[reflect.TypeOf(m)]++ + } + +validations: + for _, v := range tr.valid { + ty := reflect.TypeOf(v) + + for _, ignore := range tr.ignore { + if ty == reflect.TypeOf(ignore) { + // skip ignored types + continue validations + } + } + + if c, exists := count[ty]; c == 0 || !exists { + t.Errorf("metadata type %s is not covered by a test", ty.Name()) + } + } +} diff --git a/syft/internal/sourcemetadata/discover_type_names.go b/syft/internal/sourcemetadata/discover_type_names.go new file mode 100644 index 000000000..9b1ac2f58 --- /dev/null +++ b/syft/internal/sourcemetadata/discover_type_names.go @@ -0,0 +1,148 @@ +package sourcemetadata + +import ( + "fmt" + "go/ast" + "go/parser" + "go/token" + "os/exec" + "path/filepath" + "sort" + "strings" + "unicode" + + "github.com/scylladb/go-set/strset" +) + +var metadataExceptions = strset.New() + +func DiscoverTypeNames() ([]string, error) { + root, err := repoRoot() + if err != nil { + return nil, err + } + files, err := filepath.Glob(filepath.Join(root, "syft/source/*.go")) + if err != nil { + return nil, err + } + return findMetadataDefinitionNames(files...) +} + +func repoRoot() (string, error) { + root, err := exec.Command("git", "rev-parse", "--show-toplevel").Output() + if err != nil { + return "", fmt.Errorf("unable to find repo root dir: %+v", err) + } + absRepoRoot, err := filepath.Abs(strings.TrimSpace(string(root))) + if err != nil { + return "", fmt.Errorf("unable to get abs path to repo root: %w", err) + } + return absRepoRoot, nil +} + +func findMetadataDefinitionNames(paths ...string) ([]string, error) { + names := strset.New() + usedNames := strset.New() + for _, path := range paths { + metadataDefinitions, usedTypeNames, err := findMetadataDefinitionNamesInFile(path) + if err != nil { + return nil, err + } + + // useful for debugging... + // fmt.Println(path) + // fmt.Println("Defs:", metadataDefinitions) + // fmt.Println("Used Types:", usedTypeNames) + // fmt.Println() + + names.Add(metadataDefinitions...) + usedNames.Add(usedTypeNames...) + } + + // any definition that is used within another struct should not be considered a top-level metadata definition + names.Remove(usedNames.List()...) + + strNames := names.List() + sort.Strings(strNames) + + // note: 3 is a point-in-time gut check. This number could be updated if new metadata definitions are added, but is not required. + // it is really intended to catch any major issues with the generation process that would generate, say, 0 definitions. + if len(strNames) < 3 { + return nil, fmt.Errorf("not enough metadata definitions found (discovered: " + fmt.Sprintf("%d", len(strNames)) + ")") + } + + return strNames, nil +} + +func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error) { + // set up the parser + fs := token.NewFileSet() + f, err := parser.ParseFile(fs, path, nil, parser.ParseComments) + if err != nil { + return nil, nil, err + } + + var metadataDefinitions []string + var usedTypeNames []string + for _, decl := range f.Decls { + // check if the declaration is a type declaration + spec, ok := decl.(*ast.GenDecl) + if !ok || spec.Tok != token.TYPE { + continue + } + + // loop over all types declared in the type declaration + for _, typ := range spec.Specs { + // check if the type is a struct type + spec, ok := typ.(*ast.TypeSpec) + if !ok || spec.Type == nil { + continue + } + + structType, ok := spec.Type.(*ast.StructType) + if !ok { + continue + } + + // check if the struct type ends with "Metadata" + name := spec.Name.String() + + // only look for exported types that end with "Metadata" + if isMetadataTypeCandidate(name) { + // print the full declaration of the struct type + metadataDefinitions = append(metadataDefinitions, name) + usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...) + } + } + } + return metadataDefinitions, usedTypeNames, nil +} + +func typeNamesUsedInStruct(structType *ast.StructType) []string { + // recursively find all type names used in the struct type + var names []string + for i := range structType.Fields.List { + // capture names of all of the types (not field names) + ast.Inspect(structType.Fields.List[i].Type, func(n ast.Node) bool { + ident, ok := n.(*ast.Ident) + if !ok { + return true + } + + // add the type name to the list + names = append(names, ident.Name) + + // continue inspecting + return true + }) + } + + return names +} + +func isMetadataTypeCandidate(name string) bool { + return len(name) > 0 && + strings.HasSuffix(name, "Metadata") && + unicode.IsUpper(rune(name[0])) && // must be exported + !metadataExceptions.Has(name) +} diff --git a/syft/internal/sourcemetadata/generate/main.go b/syft/internal/sourcemetadata/generate/main.go new file mode 100644 index 000000000..ea40960a0 --- /dev/null +++ b/syft/internal/sourcemetadata/generate/main.go @@ -0,0 +1,55 @@ +package main + +import ( + "fmt" + "os" + + "github.com/dave/jennifer/jen" + + "github.com/anchore/syft/syft/internal/sourcemetadata" +) + +// This program is invoked from syft/internal and generates sourcemetadata/generated.go + +const ( + srcImport = "github.com/anchore/syft/syft/source" + path = "sourcemetadata/generated.go" +) + +func main() { + typeNames, err := sourcemetadata.DiscoverTypeNames() + if err != nil { + panic(fmt.Errorf("unable to get all metadata type names: %w", err)) + } + + fmt.Printf("updating source metadata type list with %+v types\n", len(typeNames)) + + f := jen.NewFile("sourcemetadata") + f.HeaderComment("DO NOT EDIT: generated by syft/internal/sourcemetadata/generate/main.go") + f.ImportName(srcImport, "source") + f.Comment("AllTypes returns a list of all source metadata types that syft supports (that are represented in the source.Description.Metadata field).") + + f.Func().Id("AllTypes").Params().Index().Any().BlockFunc(func(g *jen.Group) { + g.ReturnFunc(func(g *jen.Group) { + g.Index().Any().ValuesFunc(func(g *jen.Group) { + for _, typeName := range typeNames { + g.Qual(srcImport, typeName).Values() + } + }) + }) + }) + + rendered := fmt.Sprintf("%#v", f) + + fh, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + panic(fmt.Errorf("unable to open file: %w", err)) + } + _, err = fh.WriteString(rendered) + if err != nil { + panic(fmt.Errorf("unable to write file: %w", err)) + } + if err := fh.Close(); err != nil { + panic(fmt.Errorf("unable to close file: %w", err)) + } +} diff --git a/syft/internal/sourcemetadata/generated.go b/syft/internal/sourcemetadata/generated.go new file mode 100644 index 000000000..c829f7eac --- /dev/null +++ b/syft/internal/sourcemetadata/generated.go @@ -0,0 +1,10 @@ +// DO NOT EDIT: generated by syft/internal/sourcemetadata/generate/main.go + +package sourcemetadata + +import "github.com/anchore/syft/syft/source" + +// AllTypes returns a list of all source metadata types that syft supports (that are represented in the source.Description.Metadata field). +func AllTypes() []any { + return []any{source.DirectorySourceMetadata{}, source.FileSourceMetadata{}, source.StereoscopeImageSourceMetadata{}} +} diff --git a/syft/internal/sourcemetadata/names.go b/syft/internal/sourcemetadata/names.go new file mode 100644 index 000000000..b33e7f942 --- /dev/null +++ b/syft/internal/sourcemetadata/names.go @@ -0,0 +1,41 @@ +package sourcemetadata + +import ( + "reflect" + "strings" + + "github.com/anchore/syft/syft/source" +) + +var jsonNameFromType = map[reflect.Type][]string{ + reflect.TypeOf(source.DirectorySourceMetadata{}): {"directory", "dir"}, + reflect.TypeOf(source.FileSourceMetadata{}): {"file"}, + reflect.TypeOf(source.StereoscopeImageSourceMetadata{}): {"image"}, +} + +func AllNames() []string { + names := make([]string, 0) + for _, t := range AllTypes() { + names = append(names, reflect.TypeOf(t).Name()) + } + return names +} + +func JSONName(metadata any) string { + if vs, exists := jsonNameFromType[reflect.TypeOf(metadata)]; exists { + return vs[0] + } + return "" +} + +func ReflectTypeFromJSONName(name string) reflect.Type { + name = strings.ToLower(name) + for t, vs := range jsonNameFromType { + for _, v := range vs { + if v == name { + return t + } + } + } + return nil +} diff --git a/syft/internal/sourcemetadata/names_test.go b/syft/internal/sourcemetadata/names_test.go new file mode 100644 index 000000000..d3ff76211 --- /dev/null +++ b/syft/internal/sourcemetadata/names_test.go @@ -0,0 +1,29 @@ +package sourcemetadata + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestAllNames(t *testing.T) { + // note: this is a form of completion testing relative to the current code base. + + expected, err := DiscoverTypeNames() + require.NoError(t, err) + + actual := AllNames() + + // ensure that the codebase (from ast analysis) reflects the latest code generated state + if !assert.ElementsMatch(t, expected, actual) { + t.Errorf("metadata types not fully represented: \n%s", cmp.Diff(expected, actual)) + t.Log("did you add a new source.*Metadata type without updating the JSON schema?") + t.Log("if so, you need to update the schema version and regenerate the JSON schema (make generate-json-schema)") + } + + for _, ty := range AllTypes() { + assert.NotEmpty(t, JSONName(ty), "metadata type %q does not have a JSON name", ty) + } +} diff --git a/syft/internal/windows/path.go b/syft/internal/windows/path.go new file mode 100644 index 000000000..7aa59d1ca --- /dev/null +++ b/syft/internal/windows/path.go @@ -0,0 +1,41 @@ +package windows + +import ( + "path" + "path/filepath" + "runtime" + "strings" +) + +const windowsGoOS = "windows" + +func HostRunningOnWindows() bool { + return runtime.GOOS == windowsGoOS +} + +func ToPosix(windowsPath string) (posixPath string) { + // volume should be encoded at the start (e.g /c/) where c is the volume + volumeName := filepath.VolumeName(windowsPath) + pathWithoutVolume := strings.TrimPrefix(windowsPath, volumeName) + volumeLetter := strings.ToLower(strings.TrimSuffix(volumeName, ":")) + + // translate non-escaped backslash to forwardslash + translatedPath := strings.ReplaceAll(pathWithoutVolume, "\\", "/") + + // always have `/` as the root... join all components, e.g.: + // convert: C:\\some\windows\Place + // into: /c/some/windows/Place + return path.Clean("/" + strings.Join([]string{volumeLetter, translatedPath}, "/")) +} + +func FromPosix(posixPath string) (windowsPath string) { + // decode the volume (e.g. /c/ --> C:\\) - There should always be a volume name. + pathFields := strings.Split(posixPath, "/") + volumeName := strings.ToUpper(pathFields[1]) + `:\\` + + // translate non-escaped forward slashes into backslashes + remainingTranslatedPath := strings.Join(pathFields[2:], "\\") + + // combine volume name and backslash components + return filepath.Clean(volumeName + remainingTranslatedPath) +} diff --git a/syft/lib.go b/syft/lib.go index ea2869006..849584ab7 100644 --- a/syft/lib.go +++ b/syft/lib.go @@ -34,7 +34,7 @@ import ( // CatalogPackages takes an inventory of packages from the given image from a particular perspective // (e.g. squashed source, all-layers source). Returns the discovered set of packages, the identified Linux // distribution, and the source object used to wrap the data source. -func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Collection, []artifact.Relationship, *linux.Release, error) { +func CatalogPackages(src source.Source, cfg cataloger.Config) (*pkg.Collection, []artifact.Relationship, *linux.Release, error) { resolver, err := src.FileResolver(cfg.Search.Scope) if err != nil { return nil, nil, nil, fmt.Errorf("unable to determine resolver while cataloging packages: %w", err) @@ -54,18 +54,21 @@ func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Collection, catalogers = cataloger.AllCatalogers(cfg) } else { // otherwise conditionally use the correct set of loggers based on the input type (container image or directory) - switch src.Metadata.Scheme { - case source.ImageScheme: + + // TODO: this is bad, we should not be using the concrete type to determine the cataloger set + // instead this should be a caller concern (pass the catalogers you want to use). The SBOM build PR will do this. + switch src.(type) { + case *source.StereoscopeImageSource: log.Info("cataloging an image") catalogers = cataloger.ImageCatalogers(cfg) - case source.FileScheme: + case *source.FileSource: log.Info("cataloging a file") catalogers = cataloger.AllCatalogers(cfg) - case source.DirectoryScheme: + case *source.DirectorySource: log.Info("cataloging a directory") catalogers = cataloger.DirectoryCatalogers(cfg) default: - return nil, nil, nil, fmt.Errorf("unable to determine cataloger set from scheme=%+v", src.Metadata.Scheme) + return nil, nil, nil, fmt.Errorf("unsupported source type: %T", src) } } @@ -76,7 +79,7 @@ func CatalogPackages(src *source.Source, cfg cataloger.Config) (*pkg.Collection, return catalog, relationships, release, err } -func newSourceRelationshipsFromCatalog(src *source.Source, c *pkg.Collection) []artifact.Relationship { +func newSourceRelationshipsFromCatalog(src source.Source, c *pkg.Collection) []artifact.Relationship { relationships := make([]artifact.Relationship, 0) // Should we pre-allocate this by giving catalog a Len() method? for p := range c.Enumerate() { relationships = append(relationships, artifact.Relationship{ diff --git a/syft/linux/identify_release_test.go b/syft/linux/identify_release_test.go index 00d04ee71..9af79fcdf 100644 --- a/syft/linux/identify_release_test.go +++ b/syft/linux/identify_release_test.go @@ -336,7 +336,7 @@ func TestIdentifyRelease(t *testing.T) { for _, test := range tests { t.Run(test.fixture, func(t *testing.T) { - s, err := source.NewFromDirectory(test.fixture) + s, err := source.NewFromDirectoryPath(test.fixture) require.NoError(t, err) resolver, err := s.FileResolver(source.SquashedScope) diff --git a/syft/pkg/cataloger/binary/cataloger_test.go b/syft/pkg/cataloger/binary/cataloger_test.go index d6622423b..4a36a5e1d 100644 --- a/syft/pkg/cataloger/binary/cataloger_test.go +++ b/syft/pkg/cataloger/binary/cataloger_test.go @@ -649,7 +649,7 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases(t *testing.T) { t.Run(test.name, func(t *testing.T) { c := NewCataloger() - src, err := source.NewFromDirectory(test.fixtureDir) + src, err := source.NewFromDirectoryPath(test.fixtureDir) require.NoError(t, err) resolver, err := src.FileResolver(source.SquashedScope) @@ -688,7 +688,7 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { c := NewCataloger() img := imagetest.GetFixtureImage(t, "docker-archive", test.fixtureImage) - src, err := source.NewFromImage(img, "test-img") + src, err := source.NewFromStereoscopeImageObject(img, test.fixtureImage, nil) require.NoError(t, err) resolver, err := src.FileResolver(source.SquashedScope) @@ -718,7 +718,7 @@ func Test_Cataloger_DefaultClassifiers_PositiveCases_Image(t *testing.T) { func TestClassifierCataloger_DefaultClassifiers_NegativeCases(t *testing.T) { c := NewCataloger() - src, err := source.NewFromDirectory("test-fixtures/classifiers/negative") + src, err := source.NewFromDirectoryPath("test-fixtures/classifiers/negative") assert.NoError(t, err) resolver, err := src.FileResolver(source.SquashedScope) diff --git a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go index 9545c66b6..573cc5bee 100644 --- a/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go +++ b/syft/pkg/cataloger/internal/pkgtest/test_generic_parser.go @@ -85,7 +85,7 @@ func DefaultLicenseComparer(x, y pkg.License) bool { func (p *CatalogTester) FromDirectory(t *testing.T, path string) *CatalogTester { t.Helper() - s, err := source.NewFromDirectory(path) + s, err := source.NewFromDirectoryPath(path) require.NoError(t, err) resolver, err := s.FileResolver(source.AllLayersScope) @@ -149,7 +149,7 @@ func (p *CatalogTester) WithImageResolver(t *testing.T, fixtureName string) *Cat t.Helper() img := imagetest.GetFixtureImage(t, "docker-archive", fixtureName) - s, err := source.NewFromImage(img, fixtureName) + s, err := source.NewFromStereoscopeImageObject(img, fixtureName, nil) require.NoError(t, err) r, err := s.FileResolver(source.SquashedScope) diff --git a/syft/pkg/cataloger/search_config.go b/syft/pkg/cataloger/search_config.go index f92dc9928..17a6a3019 100644 --- a/syft/pkg/cataloger/search_config.go +++ b/syft/pkg/cataloger/search_config.go @@ -1,6 +1,8 @@ package cataloger -import "github.com/anchore/syft/syft/source" +import ( + "github.com/anchore/syft/syft/source" +) type SearchConfig struct { IncludeIndexedArchives bool diff --git a/syft/sbom/sbom.go b/syft/sbom/sbom.go index 0bc8feb0c..8592d8440 100644 --- a/syft/sbom/sbom.go +++ b/syft/sbom/sbom.go @@ -15,7 +15,7 @@ import ( type SBOM struct { Artifacts Artifacts Relationships []artifact.Relationship - Source source.Metadata + Source source.Description Descriptor Descriptor } diff --git a/syft/source/alias.go b/syft/source/alias.go new file mode 100644 index 000000000..e1c5c6701 --- /dev/null +++ b/syft/source/alias.go @@ -0,0 +1,13 @@ +package source + +type Alias struct { + Name string `json:"name" yaml:"name" mapstructure:"name"` + Version string `json:"version" yaml:"version" mapstructure:"version"` +} + +func (a *Alias) IsEmpty() bool { + if a == nil { + return true + } + return a.Name == "" && a.Version == "" +} diff --git a/syft/source/description.go b/syft/source/description.go new file mode 100644 index 000000000..0aae58257 --- /dev/null +++ b/syft/source/description.go @@ -0,0 +1,9 @@ +package source + +// Description represents any static source data that helps describe "what" was cataloged. +type Description struct { + ID string `hash:"ignore"` // the id generated from the parent source struct + Name string `hash:"ignore"` + Version string `hash:"ignore"` + Metadata interface{} +} diff --git a/syft/source/detection.go b/syft/source/detection.go new file mode 100644 index 000000000..3d301f14d --- /dev/null +++ b/syft/source/detection.go @@ -0,0 +1,200 @@ +package source + +import ( + "crypto" + "fmt" + "strings" + + "github.com/mitchellh/go-homedir" + "github.com/spf13/afero" + + "github.com/anchore/stereoscope/pkg/image" +) + +type detectedType string + +const ( + // unknownType is the default scheme + unknownType detectedType = "unknown-type" + + // directoryType indicates the source being cataloged is a directory on the root filesystem + directoryType detectedType = "directory-type" + + // containerImageType indicates the source being cataloged is a container image + containerImageType detectedType = "container-image-type" + + // fileType indicates the source being cataloged is a single file + fileType detectedType = "file-type" +) + +type sourceResolver func(string) (image.Source, string, error) + +// Detection is an object that captures the detected user input regarding source location, scheme, and provider type. +// It acts as a struct input for some source constructors. +type Detection struct { + detectedType detectedType + imageSource image.Source + location string +} + +func (d Detection) IsContainerImage() bool { + return d.detectedType == containerImageType +} + +type DetectConfig struct { + DefaultImageSource string +} + +func DefaultDetectConfig() DetectConfig { + return DetectConfig{} +} + +// Detect generates a source Detection that can be used as an argument to generate a new source +// from specific providers including a registry, with an explicit name. +func Detect(userInput string, cfg DetectConfig) (*Detection, error) { + fs := afero.NewOsFs() + ty, src, location, err := detect(fs, image.DetectSource, userInput) + if err != nil { + return nil, err + } + + if src == image.UnknownSource { + // only run for these two schemes + // only check on packages command, attest we automatically try to pull from userInput + switch ty { + case containerImageType, unknownType: + ty = containerImageType + location = userInput + if cfg.DefaultImageSource != "" { + src = parseDefaultImageSource(cfg.DefaultImageSource) + } else { + src = image.DetermineDefaultImagePullSource(userInput) + } + } + } + + // collect user input for downstream consumption + return &Detection{ + detectedType: ty, + imageSource: src, + location: location, + }, nil +} + +type DetectionSourceConfig struct { + Alias Alias + RegistryOptions *image.RegistryOptions + Platform *image.Platform + Exclude ExcludeConfig + DigestAlgorithms []crypto.Hash +} + +func DefaultDetectionSourceConfig() DetectionSourceConfig { + return DetectionSourceConfig{ + DigestAlgorithms: []crypto.Hash{ + crypto.SHA256, + }, + } +} + +// NewSource produces a Source based on userInput like dir: or image:tag +func (d Detection) NewSource(cfg DetectionSourceConfig) (Source, error) { + var err error + var src Source + + if d.detectedType != containerImageType && cfg.Platform != nil { + return nil, fmt.Errorf("cannot specify a platform for a non-image source") + } + + switch d.detectedType { + case fileType: + src, err = NewFromFile( + FileConfig{ + Path: d.location, + Exclude: cfg.Exclude, + DigestAlgorithms: cfg.DigestAlgorithms, + Alias: cfg.Alias, + }, + ) + case directoryType: + src, err = NewFromDirectory( + DirectoryConfig{ + Path: d.location, + Base: d.location, + Exclude: cfg.Exclude, + Alias: cfg.Alias, + }, + ) + case containerImageType: + src, err = NewFromStereoscopeImage( + StereoscopeImageConfig{ + Reference: d.location, + From: d.imageSource, + Platform: cfg.Platform, + RegistryOptions: cfg.RegistryOptions, + Exclude: cfg.Exclude, + Alias: cfg.Alias, + }, + ) + default: + err = fmt.Errorf("unable to process input for scanning") + } + + return src, err +} + +func detect(fs afero.Fs, imageSourceResolver sourceResolver, userInput string) (detectedType, image.Source, string, error) { + switch { + case strings.HasPrefix(userInput, "dir:"): + dirLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "dir:")) + if err != nil { + return unknownType, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) + } + return directoryType, image.UnknownSource, dirLocation, nil + + case strings.HasPrefix(userInput, "file:"): + fileLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "file:")) + if err != nil { + return unknownType, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) + } + return fileType, image.UnknownSource, fileLocation, nil + } + + // try the most specific sources first and move out towards more generic sources. + + // first: let's try the image detector, which has more scheme parsing internal to stereoscope + src, imageSpec, err := imageSourceResolver(userInput) + if err == nil && src != image.UnknownSource { + return containerImageType, src, imageSpec, nil + } + + // next: let's try more generic sources (dir, file, etc.) + location, err := homedir.Expand(userInput) + if err != nil { + return unknownType, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err) + } + + fileMeta, err := fs.Stat(location) + if err != nil { + return unknownType, src, "", nil + } + + if fileMeta.IsDir() { + return directoryType, src, location, nil + } + + return fileType, src, location, nil +} + +func parseDefaultImageSource(defaultImageSource string) image.Source { + switch defaultImageSource { + case "registry": + return image.OciRegistrySource + case "docker": + return image.DockerDaemonSource + case "podman": + return image.PodmanDaemonSource + default: + return image.UnknownSource + } +} diff --git a/syft/source/scheme_test.go b/syft/source/detection_test.go similarity index 88% rename from syft/source/scheme_test.go rename to syft/source/detection_test.go index 0523f977e..380ca8e65 100644 --- a/syft/source/scheme_test.go +++ b/syft/source/detection_test.go @@ -11,7 +11,7 @@ import ( "github.com/anchore/stereoscope/pkg/image" ) -func TestDetectScheme(t *testing.T) { +func Test_Detect(t *testing.T) { type detectorResult struct { src image.Source ref string @@ -24,7 +24,7 @@ func TestDetectScheme(t *testing.T) { dirs []string files []string detection detectorResult - expectedScheme Scheme + expectedScheme detectedType expectedLocation string }{ { @@ -34,7 +34,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive:latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "wagoodman/dive:latest", }, { @@ -44,7 +44,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "wagoodman/dive", }, { @@ -54,7 +54,7 @@ func TestDetectScheme(t *testing.T) { src: image.OciRegistrySource, ref: "wagoodman/dive:latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "wagoodman/dive:latest", }, { @@ -64,7 +64,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive:latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "wagoodman/dive:latest", }, { @@ -74,7 +74,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "wagoodman/dive", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "wagoodman/dive", }, { @@ -84,7 +84,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, // we expected to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "latest", @@ -96,7 +96,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "docker:latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, // we expected to be able to handle this case better, however, I don't see a way to do this // the user will need to provide more explicit input (docker:docker:latest) expectedLocation: "docker:latest", @@ -108,7 +108,7 @@ func TestDetectScheme(t *testing.T) { src: image.OciTarballSource, ref: "some/path-to-file", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "some/path-to-file", }, { @@ -119,7 +119,7 @@ func TestDetectScheme(t *testing.T) { ref: "some/path-to-dir", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "some/path-to-dir", }, { @@ -130,7 +130,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: "some/path-to-dir", }, { @@ -140,7 +140,7 @@ func TestDetectScheme(t *testing.T) { src: image.DockerDaemonSource, ref: "some/path-to-dir", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "some/path-to-dir", }, { @@ -150,7 +150,7 @@ func TestDetectScheme(t *testing.T) { src: image.PodmanDaemonSource, ref: "something:latest", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "something:latest", }, { @@ -161,7 +161,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"some/path-to-dir"}, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: "some/path-to-dir", }, { @@ -172,7 +172,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, files: []string{"some/path-to-file"}, - expectedScheme: FileScheme, + expectedScheme: fileType, expectedLocation: "some/path-to-file", }, { @@ -183,7 +183,7 @@ func TestDetectScheme(t *testing.T) { ref: "", }, files: []string{"some/path-to-file"}, - expectedScheme: FileScheme, + expectedScheme: fileType, expectedLocation: "some/path-to-file", }, { @@ -193,7 +193,7 @@ func TestDetectScheme(t *testing.T) { src: image.UnknownSource, ref: "", }, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: ".", }, { @@ -203,7 +203,7 @@ func TestDetectScheme(t *testing.T) { src: image.UnknownSource, ref: "", }, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: ".", }, // we should support tilde expansion @@ -214,7 +214,7 @@ func TestDetectScheme(t *testing.T) { src: image.OciDirectorySource, ref: "~/some-path", }, - expectedScheme: ImageScheme, + expectedScheme: containerImageType, expectedLocation: "~/some-path", }, { @@ -225,26 +225,26 @@ func TestDetectScheme(t *testing.T) { ref: "", }, dirs: []string{"~/some-path"}, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-explicit-exists", userInput: "dir:~/some-path", dirs: []string{"~/some-path"}, - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-explicit-dne", userInput: "dir:~/some-path", - expectedScheme: DirectoryScheme, + expectedScheme: directoryType, expectedLocation: "~/some-path", }, { name: "tilde-expansion-dir-implicit-dne", userInput: "~/some-path", - expectedScheme: UnknownScheme, + expectedScheme: unknownType, expectedLocation: "", }, } @@ -288,7 +288,7 @@ func TestDetectScheme(t *testing.T) { } } - actualScheme, actualSource, actualLocation, err := DetectScheme(fs, imageDetector, test.userInput) + actualScheme, actualSource, actualLocation, err := detect(fs, imageDetector, test.userInput) if err != nil { t.Fatalf("unexpected err : %+v", err) } diff --git a/syft/source/digest_utils.go b/syft/source/digest_utils.go new file mode 100644 index 000000000..6c7f2feeb --- /dev/null +++ b/syft/source/digest_utils.go @@ -0,0 +1,11 @@ +package source + +import ( + "strings" + + "github.com/anchore/syft/syft/artifact" +) + +func artifactIDFromDigest(input string) artifact.ID { + return artifact.ID(strings.TrimPrefix(input, "sha256:")) +} diff --git a/syft/source/directory_source.go b/syft/source/directory_source.go new file mode 100644 index 000000000..ab7f3d462 --- /dev/null +++ b/syft/source/directory_source.go @@ -0,0 +1,215 @@ +package source + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "sync" + + "github.com/bmatcuk/doublestar/v4" + "github.com/opencontainers/go-digest" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/fileresolver" +) + +var _ Source = (*DirectorySource)(nil) + +type DirectoryConfig struct { + Path string + Base string + Exclude ExcludeConfig + Alias Alias +} + +type DirectorySourceMetadata struct { + Path string `json:"path" yaml:"path"` + Base string `json:"-" yaml:"-"` // though this is important, for display purposes it leaks too much information (abs paths) +} + +type DirectorySource struct { + id artifact.ID + config DirectoryConfig + resolver *fileresolver.Directory + mutex *sync.Mutex +} + +func NewFromDirectoryPath(path string) (*DirectorySource, error) { + cfg := DirectoryConfig{ + Path: path, + } + return NewFromDirectory(cfg) +} + +func NewFromDirectory(cfg DirectoryConfig) (*DirectorySource, error) { + fi, err := os.Stat(cfg.Path) + if err != nil { + return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) + } + + if !fi.IsDir() { + return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path) + } + + return &DirectorySource{ + id: deriveIDFromDirectory(cfg), + config: cfg, + mutex: &sync.Mutex{}, + }, nil +} + +// deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then +// the artifact ID is derived exclusively from the alias name and version. Otherwise, the artifact ID is derived +// from the path provided with an attempt to prune a prefix if a base is given. Since the contents of the directory +// are not considered, there is no semantic meaning to the artifact ID -- this is why the alias is preferred without +// consideration for the path. +func deriveIDFromDirectory(cfg DirectoryConfig) artifact.ID { + var info string + if !cfg.Alias.IsEmpty() { + // don't use any of the path information -- instead use the alias name and version as the artifact ID. + // why? this allows the user to set a dependable stable value for the artifact ID in case the + // scanning root changes (e.g. a user scans a directory, then moves it to a new location and scans again). + info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version) + } else { + log.Warn("no explicit name and version provided for directory source, deriving artifact ID from the given path (which is not ideal)") + info = cleanDirPath(cfg.Path, cfg.Base) + } + + return artifactIDFromDigest(digest.SHA256.FromString(filepath.Clean(info)).String()) +} + +func cleanDirPath(path, base string) string { + if path == base { + return path + } + + if base != "" { + cleanRoot, rootErr := fileresolver.NormalizeRootDirectory(path) + cleanBase, baseErr := fileresolver.NormalizeBaseDirectory(base) + + if rootErr == nil && baseErr == nil { + // allows for normalizing inputs: + // cleanRoot: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001/some/path + // cleanBase: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001 + // normalized: some/path + + relPath, err := filepath.Rel(cleanBase, cleanRoot) + if err == nil { + path = relPath + } + // this is odd, but this means we can't use base + } + // if the base is not a valid chroot, then just use the path as-is + } + + return path +} + +func (s DirectorySource) ID() artifact.ID { + return s.id +} + +func (s DirectorySource) Describe() Description { + name := cleanDirPath(s.config.Path, s.config.Base) + version := "" + if !s.config.Alias.IsEmpty() { + a := s.config.Alias + if a.Name != "" { + name = a.Name + } + } + return Description{ + ID: string(s.id), + Name: name, + Version: version, + Metadata: DirectorySourceMetadata{ + Path: s.config.Path, + Base: s.config.Base, + }, + } +} + +func (s *DirectorySource) FileResolver(_ Scope) (file.Resolver, error) { + s.mutex.Lock() + defer s.mutex.Unlock() + + if s.resolver == nil { + exclusionFunctions, err := getDirectoryExclusionFunctions(s.config.Path, s.config.Exclude.Paths) + if err != nil { + return nil, err + } + + res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...) + if err != nil { + return nil, fmt.Errorf("unable to create directory resolver: %w", err) + } + + s.resolver = res + } + + return s.resolver, nil +} + +func (s *DirectorySource) Close() error { + s.mutex.Lock() + defer s.mutex.Unlock() + s.resolver = nil + return nil +} + +func getDirectoryExclusionFunctions(root string, exclusions []string) ([]fileresolver.PathIndexVisitor, error) { + if len(exclusions) == 0 { + return nil, nil + } + + // this is what directoryResolver.indexTree is doing to get the absolute path: + root, err := filepath.Abs(root) + if err != nil { + return nil, err + } + + // this handles Windows file paths by converting them to C:/something/else format + root = filepath.ToSlash(root) + + if !strings.HasSuffix(root, "/") { + root += "/" + } + + var errors []string + for idx, exclusion := range exclusions { + // check exclusions for supported paths, these are all relative to the "scan root" + if strings.HasPrefix(exclusion, "./") || strings.HasPrefix(exclusion, "*/") || strings.HasPrefix(exclusion, "**/") { + exclusion = strings.TrimPrefix(exclusion, "./") + exclusions[idx] = root + exclusion + } else { + errors = append(errors, exclusion) + } + } + + if errors != nil { + return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '")) + } + + return []fileresolver.PathIndexVisitor{ + func(path string, info os.FileInfo, _ error) error { + for _, exclusion := range exclusions { + // this is required to handle Windows filepaths + path = filepath.ToSlash(path) + matches, err := doublestar.Match(exclusion, path) + if err != nil { + return nil + } + if matches { + if info != nil && info.IsDir() { + return filepath.SkipDir + } + return fileresolver.ErrSkipPath + } + } + return nil + }, + }, nil +} diff --git a/syft/source/directory_source_test.go b/syft/source/directory_source_test.go new file mode 100644 index 000000000..c324fb91b --- /dev/null +++ b/syft/source/directory_source_test.go @@ -0,0 +1,560 @@ +package source + +import ( + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/internal/fileresolver" +) + +func TestNewFromDirectory(t *testing.T) { + testCases := []struct { + desc string + input string + expString string + inputPaths []string + expectedRefs int + cxErr require.ErrorAssertionFunc + }{ + { + desc: "no paths exist", + input: "foobar/", + inputPaths: []string{"/opt/", "/other"}, + cxErr: require.Error, + }, + { + desc: "path detected", + input: "test-fixtures", + inputPaths: []string{"path-detected/.vimrc"}, + expectedRefs: 1, + }, + { + desc: "directory ignored", + input: "test-fixtures", + inputPaths: []string{"path-detected"}, + expectedRefs: 0, + }, + { + desc: "no files-by-path detected", + input: "test-fixtures", + inputPaths: []string{"no-path-detected"}, + expectedRefs: 0, + }, + } + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + if test.cxErr == nil { + test.cxErr = require.NoError + } + src, err := NewFromDirectory(DirectoryConfig{ + Path: test.input, + }) + test.cxErr(t, err) + if err != nil { + return + } + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + assert.Equal(t, test.input, src.Describe().Metadata.(DirectorySourceMetadata).Path) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + refs, err := res.FilesByPath(test.inputPaths...) + require.NoError(t, err) + + if len(refs) != test.expectedRefs { + t.Errorf("unexpected number of refs returned: %d != %d", len(refs), test.expectedRefs) + } + + }) + } +} + +func Test_DirectorySource_FilesByGlob(t *testing.T) { + testCases := []struct { + desc string + input string + glob string + expected int + }{ + { + input: "test-fixtures", + desc: "no matches", + glob: "bar/foo", + expected: 0, + }, + { + input: "test-fixtures/path-detected", + desc: "a single match", + glob: "**/*vimrc", + expected: 1, + }, + { + input: "test-fixtures/path-detected", + desc: "multiple matches", + glob: "**", + expected: 2, + }, + } + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + src, err := NewFromDirectory(DirectoryConfig{Path: test.input}) + require.NoError(t, err) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + contents, err := res.FilesByGlob(test.glob) + require.NoError(t, err) + if len(contents) != test.expected { + t.Errorf("unexpected number of files found by glob (%s): %d != %d", test.glob, len(contents), test.expected) + } + + }) + } +} + +func Test_DirectorySource_Exclusions(t *testing.T) { + testCases := []struct { + desc string + input string + glob string + expected []string + exclusions []string + err bool + }{ + { + input: "test-fixtures/system_paths", + desc: "exclude everything", + glob: "**", + expected: nil, + exclusions: []string{"**/*"}, + }, + { + input: "test-fixtures/image-simple", + desc: "a single path excluded", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + }, + exclusions: []string{"**/target/**"}, + }, + { + input: "test-fixtures/image-simple", + desc: "exclude explicit directory relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + //"target/really/nested/file-3.txt", // explicitly skipped + }, + exclusions: []string{"./target"}, + }, + { + input: "test-fixtures/image-simple", + desc: "exclude explicit file relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + "file-2.txt", + "target/really/nested/file-3.txt", + }, + exclusions: []string{"./file-1.txt"}, + }, + { + input: "test-fixtures/image-simple", + desc: "exclude wildcard relative to the root", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + //"file-2.txt", // explicitly skipped + "target/really/nested/file-3.txt", + }, + exclusions: []string{"./*.txt"}, + }, + { + input: "test-fixtures/image-simple", + desc: "exclude files deeper", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + //"target/really/nested/file-3.txt", // explicitly skipped + }, + exclusions: []string{"**/really/**"}, + }, + { + input: "test-fixtures/image-simple", + desc: "files excluded with extension", + glob: "**", + expected: []string{ + "Dockerfile", + //"file-1.txt", // explicitly skipped + //"file-2.txt", // explicitly skipped + //"target/really/nested/file-3.txt", // explicitly skipped + }, + exclusions: []string{"**/*.txt"}, + }, + { + input: "test-fixtures/image-simple", + desc: "keep files with different extensions", + glob: "**", + expected: []string{ + "Dockerfile", + "file-1.txt", + "file-2.txt", + "target/really/nested/file-3.txt", + }, + exclusions: []string{"**/target/**/*.jar"}, + }, + { + input: "test-fixtures/path-detected", + desc: "file directly excluded", + glob: "**", + expected: []string{ + ".vimrc", + }, + exclusions: []string{"**/empty"}, + }, + { + input: "test-fixtures/path-detected", + desc: "pattern error containing **/", + glob: "**", + expected: []string{ + ".vimrc", + }, + exclusions: []string{"/**/empty"}, + err: true, + }, + { + input: "test-fixtures/path-detected", + desc: "pattern error incorrect start", + glob: "**", + expected: []string{ + ".vimrc", + }, + exclusions: []string{"empty"}, + err: true, + }, + { + input: "test-fixtures/path-detected", + desc: "pattern error starting with /", + glob: "**", + expected: []string{ + ".vimrc", + }, + exclusions: []string{"/empty"}, + err: true, + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + src, err := NewFromDirectory(DirectoryConfig{ + Path: test.input, + Exclude: ExcludeConfig{ + Paths: test.exclusions, + }, + }) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + if test.err { + _, err = src.FileResolver(SquashedScope) + require.Error(t, err) + return + } + require.NoError(t, err) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + locations, err := res.FilesByGlob(test.glob) + require.NoError(t, err) + + var actual []string + for _, l := range locations { + actual = append(actual, l.RealPath) + } + + assert.ElementsMatchf(t, test.expected, actual, "diff \n"+cmp.Diff(test.expected, actual)) + }) + } +} + +func Test_getDirectoryExclusionFunctions_crossPlatform(t *testing.T) { + testCases := []struct { + desc string + root string + path string + finfo os.FileInfo + exclude string + walkHint error + }{ + { + desc: "directory exclusion", + root: "/", + path: "/usr/var/lib", + exclude: "**/var/lib", + finfo: file.ManualInfo{ModeValue: os.ModeDir}, + walkHint: fs.SkipDir, + }, + { + desc: "no file info", + root: "/", + path: "/usr/var/lib", + exclude: "**/var/lib", + walkHint: fileresolver.ErrSkipPath, + }, + // linux specific tests... + { + desc: "linux doublestar", + root: "/usr", + path: "/usr/var/lib/etc.txt", + exclude: "**/*.txt", + finfo: file.ManualInfo{}, + walkHint: fileresolver.ErrSkipPath, + }, + { + desc: "linux relative", + root: "/usr/var/lib", + path: "/usr/var/lib/etc.txt", + exclude: "./*.txt", + finfo: file.ManualInfo{}, + + walkHint: fileresolver.ErrSkipPath, + }, + { + desc: "linux one level", + root: "/usr", + path: "/usr/var/lib/etc.txt", + exclude: "*/*.txt", + finfo: file.ManualInfo{}, + walkHint: nil, + }, + // NOTE: since these tests will run in linux and macOS, the windows paths will be + // considered relative if they do not start with a forward slash and paths with backslashes + // won't be modified by the filepath.ToSlash call, so these are emulating the result of + // filepath.ToSlash usage + + // windows specific tests... + { + desc: "windows doublestar", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "**/*.txt", + finfo: file.ManualInfo{}, + walkHint: fileresolver.ErrSkipPath, + }, + { + desc: "windows relative", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "./*.txt", + finfo: file.ManualInfo{}, + walkHint: fileresolver.ErrSkipPath, + }, + { + desc: "windows one level", + root: "/C:/User/stuff", + path: "/C:/User/stuff/thing.txt", + exclude: "*/*.txt", + finfo: file.ManualInfo{}, + walkHint: nil, + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + fns, err := getDirectoryExclusionFunctions(test.root, []string{test.exclude}) + require.NoError(t, err) + + for _, f := range fns { + result := f(test.path, test.finfo, nil) + require.Equal(t, test.walkHint, result) + } + }) + } +} + +func Test_DirectorySource_FilesByPathDoesNotExist(t *testing.T) { + testCases := []struct { + desc string + input string + path string + expected string + }{ + { + input: "test-fixtures/path-detected", + desc: "path does not exist", + path: "foo", + }, + } + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + src, err := NewFromDirectory(DirectoryConfig{Path: test.input}) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + refs, err := res.FilesByPath(test.path) + require.NoError(t, err) + + assert.Len(t, refs, 0) + }) + } +} + +func Test_DirectorySource_ID(t *testing.T) { + tests := []struct { + name string + cfg DirectoryConfig + want artifact.ID + wantErr require.ErrorAssertionFunc + }{ + { + name: "empty", + cfg: DirectoryConfig{}, + wantErr: require.Error, + }, + { + name: "to a non-existent directory", + cfg: DirectoryConfig{ + Path: "./test-fixtures/does-not-exist", + }, + wantErr: require.Error, + }, + { + name: "with odd unclean path through non-existent directory", + cfg: DirectoryConfig{Path: "test-fixtures/does-not-exist/../"}, + wantErr: require.Error, + }, + { + name: "to a file (not a directory)", + cfg: DirectoryConfig{ + Path: "./test-fixtures/image-simple/Dockerfile", + }, + wantErr: require.Error, + }, + { + name: "to dir with name and version", + cfg: DirectoryConfig{ + Path: "./test-fixtures", + Alias: Alias{ + Name: "name-me-that!", + Version: "version-me-this!", + }, + }, + want: artifact.ID("51a5f2a1536cf4b5220d4247814b07eec5862ab0547050f90e9ae216548ded7e"), + }, + { + name: "to different dir with name and version", + cfg: DirectoryConfig{ + Path: "./test-fixtures/image-simple", + Alias: Alias{ + Name: "name-me-that!", + Version: "version-me-this!", + }, + }, + // note: this must match the previous value because the alias should trump the path info + want: artifact.ID("51a5f2a1536cf4b5220d4247814b07eec5862ab0547050f90e9ae216548ded7e"), + }, + { + name: "with path", + cfg: DirectoryConfig{Path: "./test-fixtures"}, + want: artifact.ID("c2f936b0054dc6114fc02a3446bf8916bde8fdf87166a23aee22ea011b443522"), + }, + { + name: "with unclean path", + cfg: DirectoryConfig{Path: "test-fixtures/image-simple/../"}, + want: artifact.ID("c2f936b0054dc6114fc02a3446bf8916bde8fdf87166a23aee22ea011b443522"), + }, + { + name: "other fields do not affect ID", + cfg: DirectoryConfig{ + Path: "test-fixtures", + Base: "a-base!", + Exclude: ExcludeConfig{ + Paths: []string{"a", "b"}, + }, + }, + want: artifact.ID("c2f936b0054dc6114fc02a3446bf8916bde8fdf87166a23aee22ea011b443522"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + s, err := NewFromDirectory(tt.cfg) + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equalf(t, tt.want, s.ID(), "ID()") + }) + } +} + +func Test_cleanDirPath(t *testing.T) { + + abs, err := filepath.Abs("test-fixtures") + require.NoError(t, err) + + tests := []struct { + name string + path string + base string + want string + }{ + { + name: "abs path, abs base, base contained in path", + path: filepath.Join(abs, "system_paths/outside_root"), + base: abs, + want: "system_paths/outside_root", + }, + { + name: "abs path, abs base, base not contained in path", + path: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/001/some/path", + base: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/002", + want: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/001/some/path", + }, + { + name: "path and base match", + path: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/001/some/path", + base: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/001/some/path", + want: "/var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/001/some/path", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, cleanDirPath(tt.path, tt.base)) + }) + } +} diff --git a/syft/source/directory_source_win_test.go b/syft/source/directory_source_win_test.go new file mode 100644 index 000000000..aa0d0a277 --- /dev/null +++ b/syft/source/directory_source_win_test.go @@ -0,0 +1,65 @@ +//go:build windows +// +build windows + +// why the build tags? there is behavior from filepath.ToSlash() that must be tested, but can't be tested on non-windows +// since the stdlib keeps this functionality behind a build tag (specifically filepath.Separator): +// - https://github.com/golang/go/blob/3aea422e2cb8b1ec2e0c2774be97fe96c7299838/src/path/filepath/path.go#L224-L227 +// - https://github.com/golang/go/blob/3aea422e2cb8b1ec2e0c2774be97fe96c7299838/src/path/filepath/path.go#L63 +// - https://github.com/golang/go/blob/master/src/os/path_windows.go#L8 +// +// It would be nice to extract this to simplify testing, however, we also need filepath.Abs(), which in windows +// requires a specific syscall: +// - https://github.com/golang/go/blob/3aea422e2cb8b1ec2e0c2774be97fe96c7299838/src/path/filepath/path_windows.go#L216 +// ... which means we can't extract this functionality without build tags. + +package source + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func Test_DirectorySource_crossPlatformExclusions(t *testing.T) { + testCases := []struct { + desc string + root string + path string + exclude string + match bool + }{ + { + desc: "windows doublestar", + root: "C:\\User\\stuff", + path: "C:\\User\\stuff\\thing.txt", + exclude: "**/*.txt", + match: true, + }, + { + desc: "windows relative", + root: "C:\\User\\stuff", + path: "C:\\User\\stuff\\thing.txt", + exclude: "./*.txt", + match: true, + }, + { + desc: "windows one level", + root: "C:\\User\\stuff", + path: "C:\\User\\stuff\\thing.txt", + exclude: "*/*.txt", + match: false, + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + fns, err := getDirectoryExclusionFunctions(test.root, []string{test.exclude}) + require.NoError(t, err) + + for _, f := range fns { + result := f(test.path, nil, nil) + require.Equal(t, test.match, result) + } + }) + } +} diff --git a/syft/source/exclude.go b/syft/source/exclude.go new file mode 100644 index 000000000..f41dc0e31 --- /dev/null +++ b/syft/source/exclude.go @@ -0,0 +1,5 @@ +package source + +type ExcludeConfig struct { + Paths []string +} diff --git a/syft/source/file_source.go b/syft/source/file_source.go new file mode 100644 index 000000000..2025d0856 --- /dev/null +++ b/syft/source/file_source.go @@ -0,0 +1,280 @@ +package source + +import ( + "crypto" + "fmt" + "io/fs" + "os" + "path" + "path/filepath" + "sync" + + "github.com/mholt/archiver/v3" + "github.com/opencontainers/go-digest" + + stereoFile "github.com/anchore/stereoscope/pkg/file" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/fileresolver" +) + +var _ Source = (*FileSource)(nil) + +type FileConfig struct { + Path string + Exclude ExcludeConfig + DigestAlgorithms []crypto.Hash + Alias Alias +} + +type FileSourceMetadata struct { + Path string `json:"path" yaml:"path"` + Digests []file.Digest `json:"digests,omitempty" yaml:"digests,omitempty"` + MIMEType string `json:"mimeType" yaml:"mimeType"` +} + +type FileSource struct { + id artifact.ID + digestForVersion string + config FileConfig + resolver *fileresolver.Directory + mutex *sync.Mutex + closer func() error + digests []file.Digest + mimeType string + analysisPath string +} + +func NewFromFile(cfg FileConfig) (*FileSource, error) { + fileMeta, err := os.Stat(cfg.Path) + if err != nil { + return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) + } + + if fileMeta.IsDir() { + return nil, fmt.Errorf("given path is a directory: %q", cfg.Path) + } + + analysisPath, cleanupFn := fileAnalysisPath(cfg.Path) + + var digests []file.Digest + if len(cfg.DigestAlgorithms) > 0 { + fh, err := os.Open(cfg.Path) + if err != nil { + return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) + } + + defer fh.Close() + + digests, err = file.NewDigestsFromFile(fh, cfg.DigestAlgorithms) + if err != nil { + return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err) + } + } + + fh, err := os.Open(cfg.Path) + if err != nil { + return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) + } + + defer fh.Close() + + id, versionDigest := deriveIDFromFile(cfg) + + return &FileSource{ + id: id, + config: cfg, + mutex: &sync.Mutex{}, + closer: cleanupFn, + analysisPath: analysisPath, + digestForVersion: versionDigest, + digests: digests, + mimeType: stereoFile.MIMEType(fh), + }, nil +} + +// deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included +// in the ID derivation (along with contents). This way if the user scans the same item but is considered to be +// logically different, then ID will express that. +func deriveIDFromFile(cfg FileConfig) (artifact.ID, string) { + d := digestOfFileContents(cfg.Path) + info := d + + if !cfg.Alias.IsEmpty() { + // if the user provided an alias, we want to consider that in the artifact ID. This way if the user + // scans the same item but is considered to be logically different, then ID will express that. + info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version) + } + + if d != "" { + d = fmt.Sprintf("sha256:%s", d) + } + + return artifactIDFromDigest(digest.SHA256.FromString(info).String()), d +} + +func (s FileSource) ID() artifact.ID { + return s.id +} + +func (s FileSource) Describe() Description { + name := path.Base(s.config.Path) + version := s.digestForVersion + if !s.config.Alias.IsEmpty() { + a := s.config.Alias + if a.Name != "" { + name = a.Name + } + + if a.Version != "" { + version = a.Version + } + } + return Description{ + ID: string(s.id), + Name: name, + Version: version, + Metadata: FileSourceMetadata{ + Path: s.config.Path, + Digests: s.digests, + MIMEType: s.mimeType, + }, + } +} + +func (s FileSource) FileResolver(_ Scope) (file.Resolver, error) { + s.mutex.Lock() + defer s.mutex.Unlock() + + if s.resolver != nil { + return s.resolver, nil + } + + exclusionFunctions, err := getDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths) + if err != nil { + return nil, err + } + + fi, err := os.Stat(s.analysisPath) + if err != nil { + return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err) + } + isArchiveAnalysis := fi.IsDir() + + absAnalysisPath, err := filepath.Abs(s.analysisPath) + if err != nil { + return nil, fmt.Errorf("unable to get absolute path for analysis path=%q: %w", s.analysisPath, err) + } + absParentDir := filepath.Dir(absAnalysisPath) + + var res *fileresolver.Directory + if isArchiveAnalysis { + // this is an analysis of an archive file... we should scan the directory where the archive contents + res, err = fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) + if err != nil { + return nil, fmt.Errorf("unable to create directory resolver: %w", err) + } + } else { + // this is an analysis of a single file. We want to ultimately scan the directory that the file is in, but we + // don't want to include any other files except this the given file. + exclusionFunctions = append([]fileresolver.PathIndexVisitor{ + + // note: we should exclude these kinds of paths first before considering any other user-provided exclusions + func(p string, info os.FileInfo, err error) error { + if p == absParentDir { + // this is the root directory... always include it + return nil + } + + if filepath.Dir(p) != absParentDir { + // we are no longer in the root directory containing the single file we want to scan... + // we should skip the directory this path resides in entirely! + return fs.SkipDir + } + + if path.Base(p) != path.Base(s.config.Path) { + // we're in the root directory, but this is not the file we want to scan... + // we should selectively skip this file (not the directory we're in). + return fileresolver.ErrSkipPath + } + return nil + }, + }, exclusionFunctions...) + + res, err = fileresolver.NewFromDirectory(absParentDir, absParentDir, exclusionFunctions...) + if err != nil { + return nil, fmt.Errorf("unable to create directory resolver: %w", err) + } + } + + s.resolver = res + + return s.resolver, nil +} + +func (s *FileSource) Close() error { + if s.closer == nil { + return nil + } + s.resolver = nil + return s.closer() +} + +// fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive +// contents have been made available. A cleanup function is provided for any temp files created (if any). +func fileAnalysisPath(path string) (string, func() error) { + var analysisPath = path + var cleanupFn = func() error { return nil } + + // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and + // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is + // unarchived. + envelopedUnarchiver, err := archiver.ByExtension(path) + if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { + if tar, ok := unarchiver.(*archiver.Tar); ok { + // when tar files are extracted, if there are multiple entries at the same + // location, the last entry wins + // NOTE: this currently does not display any messages if an overwrite happens + tar.OverwriteExisting = true + } + unarchivedPath, tmpCleanup, err := unarchiveToTmp(path, unarchiver) + if err != nil { + log.Warnf("file could not be unarchived: %+v", err) + } else { + log.Debugf("source path is an archive") + analysisPath = unarchivedPath + } + if tmpCleanup != nil { + cleanupFn = tmpCleanup + } + } + + return analysisPath, cleanupFn +} + +func digestOfFileContents(path string) string { + file, err := os.Open(path) + if err != nil { + return digest.SHA256.FromString(path).String() + } + defer file.Close() + di, err := digest.SHA256.FromReader(file) + if err != nil { + return digest.SHA256.FromString(path).String() + } + return di.String() +} + +func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func() error, error) { + tempDir, err := os.MkdirTemp("", "syft-archive-contents-") + if err != nil { + return "", func() error { return nil }, fmt.Errorf("unable to create tempdir for archive processing: %w", err) + } + + cleanupFn := func() error { + return os.RemoveAll(tempDir) + } + + return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) +} diff --git a/syft/source/file_source_test.go b/syft/source/file_source_test.go new file mode 100644 index 000000000..11fcc3428 --- /dev/null +++ b/syft/source/file_source_test.go @@ -0,0 +1,278 @@ +package source + +import ( + "io" + "os" + "os/exec" + "path" + "path/filepath" + "syscall" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" +) + +func TestNewFromFile(t *testing.T) { + testCases := []struct { + desc string + input string + expString string + testPathFn func(file.Resolver) ([]file.Location, error) + expRefs int + }{ + { + desc: "path detected by glob", + input: "test-fixtures/file-index-filter/.vimrc", + testPathFn: func(resolver file.Resolver) ([]file.Location, error) { + return resolver.FilesByGlob("**/.vimrc", "**/.2", "**/.1/*", "**/empty") + }, + expRefs: 1, + }, + { + desc: "path detected by abs path", + input: "test-fixtures/file-index-filter/.vimrc", + testPathFn: func(resolver file.Resolver) ([]file.Location, error) { + return resolver.FilesByPath("/.vimrc", "/.2", "/.1/something", "/empty") + }, + expRefs: 1, + }, + { + desc: "path detected by relative path", + input: "test-fixtures/file-index-filter/.vimrc", + testPathFn: func(resolver file.Resolver) ([]file.Location, error) { + return resolver.FilesByPath(".vimrc", "/.2", "/.1/something", "empty") + }, + expRefs: 1, + }, + } + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + src, err := NewFromFile(FileConfig{ + Path: test.input, + }) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + assert.Equal(t, test.input, src.Describe().Metadata.(FileSourceMetadata).Path) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + refs, err := test.testPathFn(res) + require.NoError(t, err) + require.Len(t, refs, test.expRefs) + if test.expRefs == 1 { + assert.Equal(t, path.Base(test.input), path.Base(refs[0].RealPath)) + } + + }) + } +} + +func TestNewFromFile_WithArchive(t *testing.T) { + testCases := []struct { + desc string + input string + expString string + inputPaths []string + expRefs int + layer2 bool + contents string + }{ + { + desc: "path detected", + input: "test-fixtures/path-detected", + inputPaths: []string{"/.vimrc"}, + expRefs: 1, + }, + { + desc: "use first entry for duplicate paths", + input: "test-fixtures/path-detected", + inputPaths: []string{"/.vimrc"}, + expRefs: 1, + layer2: true, + contents: "Another .vimrc file", + }, + } + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + archivePath := setupArchiveTest(t, test.input, test.layer2) + + src, err := NewFromFile(FileConfig{ + Path: archivePath, + }) + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + assert.Equal(t, archivePath, src.Describe().Metadata.(FileSourceMetadata).Path) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + refs, err := res.FilesByPath(test.inputPaths...) + require.NoError(t, err) + assert.Len(t, refs, test.expRefs) + + if test.contents != "" { + reader, err := res.FileContentsByLocation(refs[0]) + require.NoError(t, err) + + data, err := io.ReadAll(reader) + require.NoError(t, err) + + assert.Equal(t, test.contents, string(data)) + } + + }) + } +} + +// setupArchiveTest encapsulates common test setup work for tar file tests. It returns a cleanup function, +// which should be called (typically deferred) by the caller, the path of the created tar archive, and an error, +// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil +// (even if there's an error), and it should always be called. +func setupArchiveTest(t testing.TB, sourceDirPath string, layer2 bool) string { + t.Helper() + + archivePrefix, err := os.CreateTemp("", "syft-archive-TEST-") + require.NoError(t, err) + + t.Cleanup(func() { + assert.NoError(t, os.Remove(archivePrefix.Name())) + }) + + destinationArchiveFilePath := archivePrefix.Name() + ".tar" + t.Logf("archive path: %s", destinationArchiveFilePath) + createArchive(t, sourceDirPath, destinationArchiveFilePath, layer2) + + t.Cleanup(func() { + assert.NoError(t, os.Remove(destinationArchiveFilePath)) + }) + + cwd, err := os.Getwd() + require.NoError(t, err) + + t.Logf("running from: %s", cwd) + + return destinationArchiveFilePath +} + +// createArchive creates a new archive file at destinationArchivePath based on the directory found at sourceDirPath. +func createArchive(t testing.TB, sourceDirPath, destinationArchivePath string, layer2 bool) { + t.Helper() + + cwd, err := os.Getwd() + if err != nil { + t.Fatalf("unable to get cwd: %+v", err) + } + + cmd := exec.Command("./generate-tar-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath)) + cmd.Dir = filepath.Join(cwd, "test-fixtures") + + if err := cmd.Start(); err != nil { + t.Fatalf("unable to start generate zip fixture script: %+v", err) + } + + if err := cmd.Wait(); err != nil { + if exiterr, ok := err.(*exec.ExitError); ok { + // The program has exited with an exit code != 0 + + // This works on both Unix and Windows. Although package + // syscall is generally platform dependent, WaitStatus is + // defined for both Unix and Windows and in both cases has + // an ExitStatus() method with the same signature. + if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { + if status.ExitStatus() != 0 { + t.Fatalf("failed to generate fixture: rc=%d", status.ExitStatus()) + } + } + } else { + t.Fatalf("unable to get generate fixture script result: %+v", err) + } + } + + if layer2 { + cmd = exec.Command("tar", "-rvf", destinationArchivePath, ".") + cmd.Dir = filepath.Join(cwd, "test-fixtures", path.Base(sourceDirPath+"-2")) + if err := cmd.Start(); err != nil { + t.Fatalf("unable to start tar appending fixture script: %+v", err) + } + _ = cmd.Wait() + } +} + +func Test_FileSource_ID(t *testing.T) { + tests := []struct { + name string + cfg FileConfig + want artifact.ID + wantErr require.ErrorAssertionFunc + }{ + { + name: "empty", + cfg: FileConfig{}, + wantErr: require.Error, + }, + { + name: "does not exist", + cfg: FileConfig{ + Path: "./test-fixtures/does-not-exist", + }, + wantErr: require.Error, + }, + { + name: "to dir", + cfg: FileConfig{ + Path: "./test-fixtures/image-simple", + }, + wantErr: require.Error, + }, + { + name: "with path", + cfg: FileConfig{Path: "./test-fixtures/image-simple/Dockerfile"}, + want: artifact.ID("db7146472cf6d49b3ac01b42812fb60020b0b4898b97491b21bb690c808d5159"), + }, + { + name: "with path and alias", + cfg: FileConfig{ + Path: "./test-fixtures/image-simple/Dockerfile", + Alias: Alias{ + Name: "name-me-that!", + Version: "version-me-this!", + }, + }, + want: artifact.ID("3c713003305ac6605255cec8bf4ea649aa44b2b9a9f3a07bd683869d1363438a"), + }, + { + name: "other fields do not affect ID", + cfg: FileConfig{ + Path: "test-fixtures/image-simple/Dockerfile", + Exclude: ExcludeConfig{ + Paths: []string{"a", "b"}, + }, + }, + want: artifact.ID("db7146472cf6d49b3ac01b42812fb60020b0b4898b97491b21bb690c808d5159"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.wantErr == nil { + tt.wantErr = require.NoError + } + s, err := NewFromFile(tt.cfg) + tt.wantErr(t, err) + if err != nil { + return + } + assert.Equalf(t, tt.want, s.ID(), "ID()") + }) + } +} diff --git a/syft/source/image_metadata.go b/syft/source/image_metadata.go deleted file mode 100644 index 0d70ed775..000000000 --- a/syft/source/image_metadata.go +++ /dev/null @@ -1,62 +0,0 @@ -package source - -import "github.com/anchore/stereoscope/pkg/image" - -// ImageMetadata represents all static metadata that defines what a container image is. This is useful to later describe -// "what" was cataloged without needing the more complicated stereoscope Image objects or FileResolver objects. -type ImageMetadata struct { - UserInput string `json:"userInput"` - ID string `json:"imageID"` - ManifestDigest string `json:"manifestDigest"` - MediaType string `json:"mediaType"` - Tags []string `json:"tags"` - Size int64 `json:"imageSize"` - Layers []LayerMetadata `json:"layers"` - RawManifest []byte `json:"manifest"` - RawConfig []byte `json:"config"` - RepoDigests []string `json:"repoDigests"` - Architecture string `json:"architecture"` - Variant string `json:"architectureVariant,omitempty"` - OS string `json:"os"` -} - -// LayerMetadata represents all static metadata that defines what a container image layer is. -type LayerMetadata struct { - MediaType string `json:"mediaType"` - Digest string `json:"digest"` - Size int64 `json:"size"` -} - -// NewImageMetadata creates a new ImageMetadata object populated from the given stereoscope Image object and user configuration. -func NewImageMetadata(img *image.Image, userInput string) ImageMetadata { - // populate artifacts... - tags := make([]string, len(img.Metadata.Tags)) - for idx, tag := range img.Metadata.Tags { - tags[idx] = tag.String() - } - theImg := ImageMetadata{ - ID: img.Metadata.ID, - UserInput: userInput, - ManifestDigest: img.Metadata.ManifestDigest, - Size: img.Metadata.Size, - MediaType: string(img.Metadata.MediaType), - Tags: tags, - Layers: make([]LayerMetadata, len(img.Layers)), - RawConfig: img.Metadata.RawConfig, - RawManifest: img.Metadata.RawManifest, - RepoDigests: img.Metadata.RepoDigests, - Architecture: img.Metadata.Architecture, - Variant: img.Metadata.Variant, - OS: img.Metadata.OS, - } - - // populate image metadata - for idx, l := range img.Layers { - theImg.Layers[idx] = LayerMetadata{ - MediaType: string(l.Metadata.MediaType), - Digest: l.Metadata.Digest, - Size: l.Metadata.Size, - } - } - return theImg -} diff --git a/syft/source/metadata.go b/syft/source/metadata.go deleted file mode 100644 index ecbad4f1d..000000000 --- a/syft/source/metadata.go +++ /dev/null @@ -1,12 +0,0 @@ -package source - -// Metadata represents any static source data that helps describe "what" was cataloged. -type Metadata struct { - ID string `hash:"ignore"` // the id generated from the parent source struct - Scheme Scheme // the source data scheme type (directory or image) - ImageMetadata ImageMetadata // all image info (image only) - Path string // the root path to be cataloged (directory only) - Base string // the base path to be cataloged (directory only) - Name string - Version string -} diff --git a/syft/source/scheme.go b/syft/source/scheme.go deleted file mode 100644 index 46a621478..000000000 --- a/syft/source/scheme.go +++ /dev/null @@ -1,74 +0,0 @@ -package source - -import ( - "fmt" - "strings" - - "github.com/mitchellh/go-homedir" - "github.com/spf13/afero" - - "github.com/anchore/stereoscope/pkg/image" -) - -// Scheme represents the optional prefixed string at the beginning of a user request (e.g. "docker:"). -type Scheme string - -const ( - // UnknownScheme is the default scheme - UnknownScheme Scheme = "UnknownScheme" - // DirectoryScheme indicates the source being cataloged is a directory on the root filesystem - DirectoryScheme Scheme = "DirectoryScheme" - // ImageScheme indicates the source being cataloged is a container image - ImageScheme Scheme = "ImageScheme" - // FileScheme indicates the source being cataloged is a single file - FileScheme Scheme = "FileScheme" -) - -var AllSchemes = []Scheme{ - DirectoryScheme, - ImageScheme, - FileScheme, -} - -func DetectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, image.Source, string, error) { - switch { - case strings.HasPrefix(userInput, "dir:"): - dirLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "dir:")) - if err != nil { - return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) - } - return DirectoryScheme, image.UnknownSource, dirLocation, nil - - case strings.HasPrefix(userInput, "file:"): - fileLocation, err := homedir.Expand(strings.TrimPrefix(userInput, "file:")) - if err != nil { - return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand directory path: %w", err) - } - return FileScheme, image.UnknownSource, fileLocation, nil - } - - // try the most specific sources first and move out towards more generic sources. - - // first: let's try the image detector, which has more scheme parsing internal to stereoscope - source, imageSpec, err := imageDetector(userInput) - if err == nil && source != image.UnknownSource { - return ImageScheme, source, imageSpec, nil - } - - // next: let's try more generic sources (dir, file, etc.) - location, err := homedir.Expand(userInput) - if err != nil { - return UnknownScheme, image.UnknownSource, "", fmt.Errorf("unable to expand potential directory path: %w", err) - } - - fileMeta, err := fs.Stat(location) - if err != nil { - return UnknownScheme, source, "", nil - } - - if fileMeta.IsDir() { - return DirectoryScheme, source, location, nil - } - - return FileScheme, source, location, nil -} diff --git a/syft/source/scope.go b/syft/source/scope.go index e959d1a42..05f14644a 100644 --- a/syft/source/scope.go +++ b/syft/source/scope.go @@ -10,7 +10,7 @@ const ( UnknownScope Scope = "UnknownScope" // SquashedScope indicates to only catalog content visible from the squashed filesystem representation (what can be seen only within the container at runtime) SquashedScope Scope = "Squashed" - // AllLayersScope indicates to catalog content on all layers, irregardless if it is visible from the container at runtime. + // AllLayersScope indicates to catalog content on all layers, regardless if it is visible from the container at runtime. AllLayersScope Scope = "AllLayers" ) diff --git a/syft/source/source.go b/syft/source/source.go index 4ff747ae2..6b77b16f0 100644 --- a/syft/source/source.go +++ b/syft/source/source.go @@ -6,628 +6,42 @@ within this package. package source import ( - "context" - "fmt" - "os" - "path/filepath" - "strings" - "sync" + "errors" + "io" - "github.com/bmatcuk/doublestar/v4" - "github.com/mholt/archiver/v3" - digest "github.com/opencontainers/go-digest" - "github.com/spf13/afero" - - "github.com/anchore/stereoscope" - "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/internal/fileresolver" ) -// Source is an object that captures the data source to be cataloged, configuration, and a specific resolver used -// in cataloging (based on the data source and configuration) -type Source struct { - id artifact.ID `hash:"ignore"` - Image *image.Image `hash:"ignore"` // the image object to be cataloged (image only) - Metadata Metadata - directoryResolver *fileresolver.Directory `hash:"ignore"` - path string - base string - mutex *sync.Mutex - Exclusions []string `hash:"ignore"` +type Source interface { + artifact.Identifiable + FileResolver(Scope) (file.Resolver, error) + Describe() Description + io.Closer } -// Input is an object that captures the detected user input regarding source location, scheme, and provider type. -// It acts as a struct input for some source constructors. -type Input struct { - UserInput string - Scheme Scheme - ImageSource image.Source - Location string - Platform string - Name string - Version string +type emptySource struct { + description Description } -// ParseInput generates a source Input that can be used as an argument to generate a new source -// from specific providers including a registry. -func ParseInput(userInput string, platform string) (*Input, error) { - return ParseInputWithName(userInput, platform, "", "") -} - -// ParseInputWithName generates a source Input that can be used as an argument to generate a new source -// from specific providers including a registry, with an explicit name. -func ParseInputWithName(userInput string, platform, name, defaultImageSource string) (*Input, error) { - return ParseInputWithNameVersion(userInput, platform, name, "", defaultImageSource) -} - -// ParseInputWithNameVersion generates a source Input that can be used as an argument to generate a new source -// from specific providers including a registry, with an explicit name and version. -func ParseInputWithNameVersion(userInput, platform, name, version, defaultImageSource string) (*Input, error) { - fs := afero.NewOsFs() - scheme, source, location, err := DetectScheme(fs, image.DetectSource, userInput) - if err != nil { - return nil, err - } - - if source == image.UnknownSource { - // only run for these two scheme - // only check on packages command, attest we automatically try to pull from userInput - switch scheme { - case ImageScheme, UnknownScheme: - scheme = ImageScheme - location = userInput - if defaultImageSource != "" { - source = parseDefaultImageSource(defaultImageSource) - } else { - imagePullSource := image.DetermineDefaultImagePullSource(userInput) - source = imagePullSource - } - if location == "" { - location = userInput - } - default: - } - } - - if scheme != ImageScheme && platform != "" { - return nil, fmt.Errorf("cannot specify a platform for a non-image source") - } - - // collect user input for downstream consumption - return &Input{ - UserInput: userInput, - Scheme: scheme, - ImageSource: source, - Location: location, - Platform: platform, - Name: name, - Version: version, - }, nil -} - -func parseDefaultImageSource(defaultImageSource string) image.Source { - switch defaultImageSource { - case "registry": - return image.OciRegistrySource - case "docker": - return image.DockerDaemonSource - case "podman": - return image.PodmanDaemonSource - default: - return image.UnknownSource +func FromDescription(d Description) Source { + return &emptySource{ + description: d, } } -type sourceDetector func(string) (image.Source, string, error) - -func NewFromRegistry(in Input, registryOptions *image.RegistryOptions, exclusions []string) (*Source, func(), error) { - source, cleanupFn, err := generateImageSource(in, registryOptions) - if source != nil { - source.Exclusions = exclusions - } - return source, cleanupFn, err +func (e emptySource) ID() artifact.ID { + return artifact.ID(e.description.ID) } -// New produces a Source based on userInput like dir: or image:tag -func New(in Input, registryOptions *image.RegistryOptions, exclusions []string) (*Source, func(), error) { - var err error - fs := afero.NewOsFs() - var source *Source - cleanupFn := func() {} - - switch in.Scheme { - case FileScheme: - source, cleanupFn, err = generateFileSource(fs, in) - case DirectoryScheme: - source, cleanupFn, err = generateDirectorySource(fs, in) - case ImageScheme: - source, cleanupFn, err = generateImageSource(in, registryOptions) - default: - err = fmt.Errorf("unable to process input for scanning: %q", in.UserInput) - } - - if err == nil { - source.Exclusions = exclusions - } - - return source, cleanupFn, err +func (e emptySource) FileResolver(_ Scope) (file.Resolver, error) { + return nil, errors.New("no file resolver available for description-only source") } -func generateImageSource(in Input, registryOptions *image.RegistryOptions) (*Source, func(), error) { - img, cleanup, err := getImageWithRetryStrategy(in, registryOptions) - if err != nil || img == nil { - return nil, cleanup, fmt.Errorf("could not fetch image %q: %w", in.Location, err) - } - - s, err := NewFromImageWithNameVersion(img, in.Location, in.Name, in.Version) - if err != nil { - return nil, cleanup, fmt.Errorf("could not populate source with image: %w", err) - } - - return &s, cleanup, nil +func (e emptySource) Describe() Description { + return e.description } -func parseScheme(userInput string) string { - parts := strings.SplitN(userInput, ":", 2) - if len(parts) < 2 { - return "" - } - - return parts[0] -} - -func getImageWithRetryStrategy(in Input, registryOptions *image.RegistryOptions) (*image.Image, func(), error) { - ctx := context.TODO() - - var opts []stereoscope.Option - if registryOptions != nil { - opts = append(opts, stereoscope.WithRegistryOptions(*registryOptions)) - } - - if in.Platform != "" { - opts = append(opts, stereoscope.WithPlatform(in.Platform)) - } - - img, err := stereoscope.GetImageFromSource(ctx, in.Location, in.ImageSource, opts...) - cleanup := func() { - if err := img.Cleanup(); err != nil { - log.Warnf("unable to cleanup image=%q: %w", in.UserInput, err) - } - } - if err == nil { - // Success on the first try! - return img, cleanup, nil - } - - scheme := parseScheme(in.UserInput) - if !(scheme == "docker" || scheme == "registry") { - // Image retrieval failed, and we shouldn't retry it. It's most likely that the - // user _did_ intend the parsed scheme, but there was a legitimate failure with - // using the scheme to load the image. Alert the user to this failure, so they - // can fix the problem. - return nil, nil, err - } - - // Maybe the user wanted "docker" or "registry" to refer to an _image name_ - // (e.g. "docker:latest"), not a scheme. We'll retry image retrieval with this - // alternative interpretation, in an attempt to avoid unnecessary user friction. - - log.Warnf( - "scheme %q specified, but it coincides with a common image name; re-examining user input %q"+ - " without scheme parsing because image retrieval using scheme parsing was unsuccessful: %v", - scheme, - in.UserInput, - err, - ) - - // We need to determine the image source again, such that this determination - // doesn't take scheme parsing into account. - in.ImageSource = image.DetermineDefaultImagePullSource(in.UserInput) - img, userInputErr := stereoscope.GetImageFromSource(ctx, in.UserInput, in.ImageSource, opts...) - cleanup = func() { - if err := img.Cleanup(); err != nil { - log.Warnf("unable to cleanup image=%q: %w", in.UserInput, err) - } - } - if userInputErr != nil { - // Image retrieval failed on both tries, we will want to return both errors. - return nil, nil, fmt.Errorf( - "scheme %q specified; "+ - "image retrieval using scheme parsing (%s) was unsuccessful: %v; "+ - "image retrieval without scheme parsing (%s) was unsuccessful: %v", - scheme, - in.Location, - err, - in.UserInput, - userInputErr, - ) - } - - return img, cleanup, nil -} - -func generateDirectorySource(fs afero.Fs, in Input) (*Source, func(), error) { - fileMeta, err := fs.Stat(in.Location) - if err != nil { - return nil, func() {}, fmt.Errorf("unable to stat dir=%q: %w", in.Location, err) - } - - if !fileMeta.IsDir() { - return nil, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", in.Location, err) - } - - s, err := NewFromDirectoryWithNameVersion(in.Location, in.Name, in.Version) - if err != nil { - return nil, func() {}, fmt.Errorf("could not populate source from path=%q: %w", in.Location, err) - } - - return &s, func() {}, nil -} - -func generateFileSource(fs afero.Fs, in Input) (*Source, func(), error) { - fileMeta, err := fs.Stat(in.Location) - if err != nil { - return nil, func() {}, fmt.Errorf("unable to stat dir=%q: %w", in.Location, err) - } - - if fileMeta.IsDir() { - return nil, func() {}, fmt.Errorf("given path is not a directory (path=%q): %w", in.Location, err) - } - - s, cleanupFn := NewFromFileWithNameVersion(in.Location, in.Name, in.Version) - - return &s, cleanupFn, nil -} - -// NewFromDirectory creates a new source object tailored to catalog a given filesystem directory recursively. -func NewFromDirectory(path string) (Source, error) { - return NewFromDirectoryWithName(path, "") -} - -// NewFromDirectoryWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. -func NewFromDirectoryWithName(path string, name string) (Source, error) { - return NewFromDirectoryWithNameVersion(path, name, "") -} - -// NewFromDirectoryWithNameVersion creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. -func NewFromDirectoryWithNameVersion(path string, name string, version string) (Source, error) { - s := Source{ - mutex: &sync.Mutex{}, - Metadata: Metadata{ - Name: name, - Version: version, - Scheme: DirectoryScheme, - Path: path, - }, - path: path, - } - s.SetID() - return s, nil -} - -// NewFromDirectoryRoot creates a new source object tailored to catalog a given filesystem directory recursively. -func NewFromDirectoryRoot(path string) (Source, error) { - return NewFromDirectoryRootWithName(path, "") -} - -// NewFromDirectoryRootWithName creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. -func NewFromDirectoryRootWithName(path string, name string) (Source, error) { - return NewFromDirectoryRootWithNameVersion(path, name, "") -} - -// NewFromDirectoryRootWithNameVersion creates a new source object tailored to catalog a given filesystem directory recursively, with an explicitly provided name. -func NewFromDirectoryRootWithNameVersion(path string, name string, version string) (Source, error) { - s := Source{ - mutex: &sync.Mutex{}, - Metadata: Metadata{ - Name: name, - Version: version, - Scheme: DirectoryScheme, - Path: path, - Base: path, - }, - path: path, - base: path, - } - s.SetID() - return s, nil -} - -// NewFromFile creates a new source object tailored to catalog a file. -func NewFromFile(path string) (Source, func()) { - return NewFromFileWithName(path, "") -} - -// NewFromFileWithName creates a new source object tailored to catalog a file, with an explicitly provided name. -func NewFromFileWithName(path string, name string) (Source, func()) { - return NewFromFileWithNameVersion(path, name, "") -} - -// NewFromFileWithNameVersion creates a new source object tailored to catalog a file, with an explicitly provided name and version. -func NewFromFileWithNameVersion(path string, name string, version string) (Source, func()) { - analysisPath, cleanupFn := fileAnalysisPath(path) - - s := Source{ - mutex: &sync.Mutex{}, - Metadata: Metadata{ - Name: name, - Version: version, - Scheme: FileScheme, - Path: path, - }, - path: analysisPath, - } - - s.SetID() - return s, cleanupFn -} - -// fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive -// contents have been made available. A cleanup function is provided for any temp files created (if any). -func fileAnalysisPath(path string) (string, func()) { - var analysisPath = path - var cleanupFn = func() {} - - // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and - // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is - // unarchived. - envelopedUnarchiver, err := archiver.ByExtension(path) - if unarchiver, ok := envelopedUnarchiver.(archiver.Unarchiver); err == nil && ok { - if tar, ok := unarchiver.(*archiver.Tar); ok { - // when tar files are extracted, if there are multiple entries at the same - // location, the last entry wins - // NOTE: this currently does not display any messages if an overwrite happens - tar.OverwriteExisting = true - } - unarchivedPath, tmpCleanup, err := unarchiveToTmp(path, unarchiver) - if err != nil { - log.Warnf("file could not be unarchived: %+v", err) - } else { - log.Debugf("source path is an archive") - analysisPath = unarchivedPath - } - if tmpCleanup != nil { - cleanupFn = tmpCleanup - } - } - - return analysisPath, cleanupFn -} - -// NewFromImage creates a new source object tailored to catalog a given container image, relative to the -// option given (e.g. all-layers, squashed, etc) -func NewFromImage(img *image.Image, userImageStr string) (Source, error) { - return NewFromImageWithName(img, userImageStr, "") -} - -// NewFromImageWithName creates a new source object tailored to catalog a given container image, relative to the -// option given (e.g. all-layers, squashed, etc), with an explicit name. -func NewFromImageWithName(img *image.Image, userImageStr string, name string) (Source, error) { - return NewFromImageWithNameVersion(img, userImageStr, name, "") -} - -// NewFromImageWithNameVersion creates a new source object tailored to catalog a given container image, relative to the -// option given (e.g. all-layers, squashed, etc), with an explicit name and version. -func NewFromImageWithNameVersion(img *image.Image, userImageStr string, name string, version string) (Source, error) { - if img == nil { - return Source{}, fmt.Errorf("no image given") - } - - s := Source{ - Image: img, - Metadata: Metadata{ - Name: name, - Version: version, - Scheme: ImageScheme, - ImageMetadata: NewImageMetadata(img, userImageStr), - }, - } - s.SetID() - return s, nil -} - -func (s *Source) ID() artifact.ID { - if s.id == "" { - s.SetID() - } - return s.id -} - -func (s *Source) SetID() { - var d string - switch s.Metadata.Scheme { - case DirectoryScheme: - d = digest.FromString(s.Metadata.Path).String() - case FileScheme: - // attempt to use the digest of the contents of the file as the ID - file, err := os.Open(s.Metadata.Path) - if err != nil { - d = digest.FromString(s.Metadata.Path).String() - break - } - defer file.Close() - di, err := digest.FromReader(file) - if err != nil { - d = digest.FromString(s.Metadata.Path).String() - break - } - d = di.String() - case ImageScheme: - manifestDigest := digest.FromBytes(s.Metadata.ImageMetadata.RawManifest).String() - if manifestDigest != "" { - d = manifestDigest - break - } - - // calcuate chain ID for image sources where manifestDigest is not available - // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid - d = calculateChainID(s.Metadata.ImageMetadata.Layers) - if d == "" { - // TODO what happens here if image has no layers? - // Is this case possible - d = digest.FromString(s.Metadata.ImageMetadata.UserInput).String() - } - default: // for UnknownScheme we hash the struct - id, _ := artifact.IDByHash(s) - d = string(id) - } - - s.id = artifact.ID(strings.TrimPrefix(d, "sha256:")) - s.Metadata.ID = strings.TrimPrefix(d, "sha256:") -} - -func calculateChainID(lm []LayerMetadata) string { - if len(lm) < 1 { - return "" - } - - // DiffID(L0) = digest of layer 0 - // https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32 - chainID := lm[0].Digest - id := chain(chainID, lm[1:]) - - return id -} - -func chain(chainID string, layers []LayerMetadata) string { - if len(layers) < 1 { - return chainID - } - - chainID = digest.FromString(layers[0].Digest + " " + chainID).String() - return chain(chainID, layers[1:]) -} - -func (s *Source) FileResolver(scope Scope) (file.Resolver, error) { - switch s.Metadata.Scheme { - case DirectoryScheme, FileScheme: - s.mutex.Lock() - defer s.mutex.Unlock() - if s.directoryResolver == nil { - exclusionFunctions, err := getDirectoryExclusionFunctions(s.path, s.Exclusions) - if err != nil { - return nil, err - } - res, err := fileresolver.NewFromDirectory(s.path, s.base, exclusionFunctions...) - if err != nil { - return nil, fmt.Errorf("unable to create directory resolver: %w", err) - } - s.directoryResolver = res - } - return s.directoryResolver, nil - case ImageScheme: - var res file.Resolver - var err error - switch scope { - case SquashedScope: - res, err = fileresolver.NewFromContainerImageSquash(s.Image) - case AllLayersScope: - res, err = fileresolver.NewFromContainerImageAllLayers(s.Image) - default: - return nil, fmt.Errorf("bad image scope provided: %+v", scope) - } - if err != nil { - return nil, err - } - // image tree contains all paths, so we filter out the excluded entries afterwards - if len(s.Exclusions) > 0 { - res = fileresolver.NewExcluding(res, getImageExclusionFunction(s.Exclusions)) - } - return res, nil - } - return nil, fmt.Errorf("unable to determine FilePathResolver with current scheme=%q", s.Metadata.Scheme) -} - -func unarchiveToTmp(path string, unarchiver archiver.Unarchiver) (string, func(), error) { - tempDir, err := os.MkdirTemp("", "syft-archive-contents-") - if err != nil { - return "", func() {}, fmt.Errorf("unable to create tempdir for archive processing: %w", err) - } - - cleanupFn := func() { - if err := os.RemoveAll(tempDir); err != nil { - log.Warnf("unable to cleanup archive tempdir: %+v", err) - } - } - - return tempDir, cleanupFn, unarchiver.Unarchive(path, tempDir) -} - -func getImageExclusionFunction(exclusions []string) func(string) bool { - if len(exclusions) == 0 { - return nil - } - // add subpath exclusions - for _, exclusion := range exclusions { - exclusions = append(exclusions, exclusion+"/**") - } - return func(path string) bool { - for _, exclusion := range exclusions { - matches, err := doublestar.Match(exclusion, path) - if err != nil { - return false - } - if matches { - return true - } - } - return false - } -} - -func getDirectoryExclusionFunctions(root string, exclusions []string) ([]fileresolver.PathIndexVisitor, error) { - if len(exclusions) == 0 { - return nil, nil - } - - // this is what Directory.indexTree is doing to get the absolute path: - root, err := filepath.Abs(root) - if err != nil { - return nil, err - } - - // this handles Windows file paths by converting them to C:/something/else format - root = filepath.ToSlash(root) - - if !strings.HasSuffix(root, "/") { - root += "/" - } - - var errors []string - for idx, exclusion := range exclusions { - // check exclusions for supported paths, these are all relative to the "scan root" - if strings.HasPrefix(exclusion, "./") || strings.HasPrefix(exclusion, "*/") || strings.HasPrefix(exclusion, "**/") { - exclusion = strings.TrimPrefix(exclusion, "./") - exclusions[idx] = root + exclusion - } else { - errors = append(errors, exclusion) - } - } - - if errors != nil { - return nil, fmt.Errorf("invalid exclusion pattern(s): '%s' (must start with one of: './', '*/', or '**/')", strings.Join(errors, "', '")) - } - - return []fileresolver.PathIndexVisitor{ - func(path string, info os.FileInfo, _ error) error { - for _, exclusion := range exclusions { - // this is required to handle Windows filepaths - path = filepath.ToSlash(path) - matches, err := doublestar.Match(exclusion, path) - if err != nil { - return nil - } - if matches { - if info != nil && info.IsDir() { - return filepath.SkipDir - } - return fileresolver.ErrSkipPath - } - } - return nil - }, - }, nil +func (e emptySource) Close() error { + return nil // no-op } diff --git a/syft/source/source_test.go b/syft/source/source_test.go deleted file mode 100644 index bfa085d09..000000000 --- a/syft/source/source_test.go +++ /dev/null @@ -1,920 +0,0 @@ -//go:build !windows -// +build !windows - -package source - -import ( - "io" - "io/fs" - "os" - "os/exec" - "path" - "path/filepath" - "sort" - "strings" - "syscall" - "testing" - "time" - - "github.com/google/go-cmp/cmp" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/anchore/stereoscope/pkg/image" - "github.com/anchore/stereoscope/pkg/imagetest" - "github.com/anchore/syft/syft/artifact" - "github.com/anchore/syft/syft/internal/fileresolver" -) - -func TestParseInput(t *testing.T) { - tests := []struct { - name string - input string - platform string - expected Scheme - errFn require.ErrorAssertionFunc - }{ - { - name: "ParseInput parses a file input", - input: "test-fixtures/image-simple/file-1.txt", - expected: FileScheme, - }, - { - name: "errors out when using platform for non-image scheme", - input: "test-fixtures/image-simple/file-1.txt", - platform: "arm64", - errFn: require.Error, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.errFn == nil { - test.errFn = require.NoError - } - sourceInput, err := ParseInput(test.input, test.platform) - test.errFn(t, err) - if test.expected != "" { - require.NotNil(t, sourceInput) - assert.Equal(t, sourceInput.Scheme, test.expected) - } - }) - } -} - -func TestNewFromImageFails(t *testing.T) { - t.Run("no image given", func(t *testing.T) { - _, err := NewFromImage(nil, "") - if err == nil { - t.Errorf("expected an error condition but none was given") - } - }) -} - -func TestSetID(t *testing.T) { - layer := image.NewLayer(nil) - layer.Metadata = image.LayerMetadata{ - Digest: "sha256:6f4fb385d4e698647bf2a450749dfbb7bc2831ec9a730ef4046c78c08d468e89", - } - img := image.Image{ - Layers: []*image.Layer{layer}, - } - - tests := []struct { - name string - input *Source - expected artifact.ID - }{ - { - name: "source.SetID sets the ID for FileScheme", - input: &Source{ - Metadata: Metadata{ - Scheme: FileScheme, - Path: "test-fixtures/image-simple/file-1.txt", - }, - }, - expected: artifact.ID("55096713247489add592ce977637be868497132b36d1e294a3831925ec64319a"), - }, - { - name: "source.SetID sets the ID for ImageScheme", - input: &Source{ - Image: &img, - Metadata: Metadata{ - Scheme: ImageScheme, - }, - }, - expected: artifact.ID("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"), - }, - { - name: "source.SetID sets the ID for DirectoryScheme", - input: &Source{ - Image: &img, - Metadata: Metadata{ - Scheme: DirectoryScheme, - Path: "test-fixtures/image-simple", - }, - }, - expected: artifact.ID("91db61e5e0ae097ef764796ce85e442a93f2a03e5313d4c7307e9b413f62e8c4"), - }, - { - name: "source.SetID sets the ID for UnknownScheme", - input: &Source{ - Image: &img, - Metadata: Metadata{ - Scheme: UnknownScheme, - Path: "test-fixtures/image-simple", - }, - }, - expected: artifact.ID("9ee9e786412d6ae5"), - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - test.input.SetID() - assert.Equal(t, test.expected, test.input.ID()) - }) - } -} - -func TestNewFromImage(t *testing.T) { - layer := image.NewLayer(nil) - img := image.Image{ - Layers: []*image.Layer{layer}, - } - - t.Run("create a new source object from image", func(t *testing.T) { - _, err := NewFromImage(&img, "") - if err != nil { - t.Errorf("unexpected error when creating a new Locations from img: %+v", err) - } - }) -} - -func TestNewFromDirectory(t *testing.T) { - testCases := []struct { - desc string - input string - expString string - inputPaths []string - expectedRefs int - expectedErr bool - }{ - { - desc: "no paths exist", - input: "foobar/", - inputPaths: []string{"/opt/", "/other"}, - expectedErr: true, - }, - { - desc: "path detected", - input: "test-fixtures", - inputPaths: []string{"path-detected/.vimrc"}, - expectedRefs: 1, - }, - { - desc: "directory ignored", - input: "test-fixtures", - inputPaths: []string{"path-detected"}, - expectedRefs: 0, - }, - { - desc: "no files-by-path detected", - input: "test-fixtures", - inputPaths: []string{"no-path-detected"}, - expectedRefs: 0, - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - src, err := NewFromDirectory(test.input) - require.NoError(t, err) - assert.Equal(t, test.input, src.Metadata.Path) - - res, err := src.FileResolver(SquashedScope) - if test.expectedErr { - if err == nil { - t.Fatal("expected an error when making the resolver but got none") - } - return - } else { - require.NoError(t, err) - } - - refs, err := res.FilesByPath(test.inputPaths...) - if err != nil { - t.Errorf("FilesByPath call produced an error: %+v", err) - } - if len(refs) != test.expectedRefs { - t.Errorf("unexpected number of refs returned: %d != %d", len(refs), test.expectedRefs) - - } - - }) - } -} - -func TestNewFromFile(t *testing.T) { - testCases := []struct { - desc string - input string - expString string - inputPaths []string - expRefs int - }{ - { - desc: "path detected", - input: "test-fixtures/path-detected", - inputPaths: []string{"/.vimrc"}, - expRefs: 1, - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - src, cleanup := NewFromFile(test.input) - if cleanup != nil { - t.Cleanup(cleanup) - } - - assert.Equal(t, test.input, src.Metadata.Path) - assert.Equal(t, src.Metadata.Path, src.path) - - res, err := src.FileResolver(SquashedScope) - require.NoError(t, err) - - refs, err := res.FilesByPath(test.inputPaths...) - require.NoError(t, err) - assert.Len(t, refs, test.expRefs) - - }) - } -} - -func TestNewFromFile_WithArchive(t *testing.T) { - testCases := []struct { - desc string - input string - expString string - inputPaths []string - expRefs int - layer2 bool - contents string - }{ - { - desc: "path detected", - input: "test-fixtures/path-detected", - inputPaths: []string{"/.vimrc"}, - expRefs: 1, - }, - { - desc: "lest entry for duplicate paths", - input: "test-fixtures/path-detected", - inputPaths: []string{"/.vimrc"}, - expRefs: 1, - layer2: true, - contents: "Another .vimrc file", - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - archivePath := setupArchiveTest(t, test.input, test.layer2) - - src, cleanup := NewFromFile(archivePath) - if cleanup != nil { - t.Cleanup(cleanup) - } - - assert.Equal(t, archivePath, src.Metadata.Path) - assert.NotEqual(t, src.Metadata.Path, src.path) - - res, err := src.FileResolver(SquashedScope) - require.NoError(t, err) - - refs, err := res.FilesByPath(test.inputPaths...) - require.NoError(t, err) - assert.Len(t, refs, test.expRefs) - - if test.contents != "" { - reader, err := res.FileContentsByLocation(refs[0]) - require.NoError(t, err) - - data, err := io.ReadAll(reader) - require.NoError(t, err) - - assert.Equal(t, test.contents, string(data)) - } - - }) - } -} - -func TestNewFromDirectoryShared(t *testing.T) { - testCases := []struct { - desc string - input string - expString string - notExist string - inputPaths []string - expRefs int - }{ - { - desc: "path detected", - input: "test-fixtures", - notExist: "foobar/", - inputPaths: []string{"path-detected/.vimrc"}, - expRefs: 1, - }, - { - desc: "directory ignored", - input: "test-fixtures", - notExist: "foobar/", - inputPaths: []string{"path-detected"}, - expRefs: 0, - }, - { - desc: "no files-by-path detected", - input: "test-fixtures", - notExist: "foobar/", - inputPaths: []string{"no-path-detected"}, - expRefs: 0, - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - src, err := NewFromDirectory(test.input) - - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - if src.Metadata.Path != test.input { - t.Errorf("mismatched stringer: '%s' != '%s'", src.Metadata.Path, test.input) - } - - _, err = src.FileResolver(SquashedScope) - assert.NoError(t, err) - - src.Metadata.Path = test.notExist - resolver, err := src.FileResolver(SquashedScope) - assert.NoError(t, err) - - refs, err := resolver.FilesByPath(test.inputPaths...) - if err != nil { - t.Errorf("FilesByPath call produced an error: %+v", err) - } - if len(refs) != test.expRefs { - t.Errorf("unexpected number of refs returned: %d != %d", len(refs), test.expRefs) - - } - - }) - } -} - -func TestFilesByPathDoesNotExist(t *testing.T) { - testCases := []struct { - desc string - input string - path string - expected string - }{ - { - input: "test-fixtures/path-detected", - desc: "path does not exist", - path: "foo", - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - src, err := NewFromDirectory(test.input) - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - res, err := src.FileResolver(SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } - refs, err := res.FilesByPath(test.path) - if err != nil { - t.Errorf("could not get file references from path: %s, %v", test.path, err) - } - - if len(refs) != 0 { - t.Errorf("didnt' expect a ref, but got: %d", len(refs)) - } - - }) - } -} - -func TestFilesByGlob(t *testing.T) { - testCases := []struct { - desc string - input string - glob string - expected int - }{ - { - input: "test-fixtures", - desc: "no matches", - glob: "bar/foo", - expected: 0, - }, - { - input: "test-fixtures/path-detected", - desc: "a single match", - glob: "**/*vimrc", - expected: 1, - }, - { - input: "test-fixtures/path-detected", - desc: "multiple matches", - glob: "**", - expected: 2, - }, - } - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - src, err := NewFromDirectory(test.input) - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - res, err := src.FileResolver(SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } - contents, err := res.FilesByGlob(test.glob) - if err != nil { - t.Errorf("could not get files by glob: %s+v", err) - } - if len(contents) != test.expected { - t.Errorf("unexpected number of files found by glob (%s): %d != %d", test.glob, len(contents), test.expected) - } - - }) - } -} - -func TestDirectoryExclusions(t *testing.T) { - testCases := []struct { - desc string - input string - glob string - expected []string - exclusions []string - err bool - }{ - { - input: "test-fixtures/system_paths", - desc: "exclude everything", - glob: "**", - expected: nil, - exclusions: []string{"**/*"}, - }, - { - input: "test-fixtures/image-simple", - desc: "a single path excluded", - glob: "**", - expected: []string{ - "Dockerfile", - "file-1.txt", - "file-2.txt", - }, - exclusions: []string{"**/target/**"}, - }, - { - input: "test-fixtures/image-simple", - desc: "exclude explicit directory relative to the root", - glob: "**", - expected: []string{ - "Dockerfile", - "file-1.txt", - "file-2.txt", - //"target/really/nested/file-3.txt", // explicitly skipped - }, - exclusions: []string{"./target"}, - }, - { - input: "test-fixtures/image-simple", - desc: "exclude explicit file relative to the root", - glob: "**", - expected: []string{ - "Dockerfile", - //"file-1.txt", // explicitly skipped - "file-2.txt", - "target/really/nested/file-3.txt", - }, - exclusions: []string{"./file-1.txt"}, - }, - { - input: "test-fixtures/image-simple", - desc: "exclude wildcard relative to the root", - glob: "**", - expected: []string{ - "Dockerfile", - //"file-1.txt", // explicitly skipped - //"file-2.txt", // explicitly skipped - "target/really/nested/file-3.txt", - }, - exclusions: []string{"./*.txt"}, - }, - { - input: "test-fixtures/image-simple", - desc: "exclude files deeper", - glob: "**", - expected: []string{ - "Dockerfile", - "file-1.txt", - "file-2.txt", - //"target/really/nested/file-3.txt", // explicitly skipped - }, - exclusions: []string{"**/really/**"}, - }, - { - input: "test-fixtures/image-simple", - desc: "files excluded with extension", - glob: "**", - expected: []string{ - "Dockerfile", - //"file-1.txt", // explicitly skipped - //"file-2.txt", // explicitly skipped - //"target/really/nested/file-3.txt", // explicitly skipped - }, - exclusions: []string{"**/*.txt"}, - }, - { - input: "test-fixtures/image-simple", - desc: "keep files with different extensions", - glob: "**", - expected: []string{ - "Dockerfile", - "file-1.txt", - "file-2.txt", - "target/really/nested/file-3.txt", - }, - exclusions: []string{"**/target/**/*.jar"}, - }, - { - input: "test-fixtures/path-detected", - desc: "file directly excluded", - glob: "**", - expected: []string{ - ".vimrc", - }, - exclusions: []string{"**/empty"}, - }, - { - input: "test-fixtures/path-detected", - desc: "pattern error containing **/", - glob: "**", - expected: []string{ - ".vimrc", - }, - exclusions: []string{"/**/empty"}, - err: true, - }, - { - input: "test-fixtures/path-detected", - desc: "pattern error incorrect start", - glob: "**", - expected: []string{ - ".vimrc", - }, - exclusions: []string{"empty"}, - err: true, - }, - { - input: "test-fixtures/path-detected", - desc: "pattern error starting with /", - glob: "**", - expected: []string{ - ".vimrc", - }, - exclusions: []string{"/empty"}, - err: true, - }, - } - registryOpts := &image.RegistryOptions{} - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - sourceInput, err := ParseInput("dir:"+test.input, "") - require.NoError(t, err) - src, fn, err := New(*sourceInput, registryOpts, test.exclusions) - defer fn() - - if test.err { - _, err = src.FileResolver(SquashedScope) - if err == nil { - t.Errorf("expected an error for patterns: %s", strings.Join(test.exclusions, " or ")) - } - return - } - - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - res, err := src.FileResolver(SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } - locations, err := res.FilesByGlob(test.glob) - if err != nil { - t.Errorf("could not get files by glob: %s+v", err) - } - var actual []string - for _, l := range locations { - actual = append(actual, l.RealPath) - } - - sort.Strings(test.expected) - sort.Strings(actual) - - assert.Equal(t, test.expected, actual, "diff \n"+cmp.Diff(test.expected, actual)) - }) - } -} - -func TestImageExclusions(t *testing.T) { - testCases := []struct { - desc string - input string - glob string - expected int - exclusions []string - }{ - // NOTE: in the Dockerfile, /target is moved to /, which makes /really a top-level dir - { - input: "image-simple", - desc: "a single path excluded", - glob: "**", - expected: 2, - exclusions: []string{"/really/**"}, - }, - { - input: "image-simple", - desc: "a directly referenced directory is excluded", - glob: "**", - expected: 2, - exclusions: []string{"/really"}, - }, - { - input: "image-simple", - desc: "a partial directory is not excluded", - glob: "**", - expected: 3, - exclusions: []string{"/reall"}, - }, - { - input: "image-simple", - desc: "exclude files deeper", - glob: "**", - expected: 2, - exclusions: []string{"**/nested/**"}, - }, - { - input: "image-simple", - desc: "files excluded with extension", - glob: "**", - expected: 2, - exclusions: []string{"**/*1.txt"}, - }, - { - input: "image-simple", - desc: "keep files with different extensions", - glob: "**", - expected: 3, - exclusions: []string{"**/target/**/*.jar"}, - }, - { - input: "image-simple", - desc: "file directly excluded", - glob: "**", - expected: 2, - exclusions: []string{"**/somefile-1.txt"}, // file-1 renamed to somefile-1 in Dockerfile - }, - } - registryOpts := &image.RegistryOptions{} - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - archiveLocation := imagetest.PrepareFixtureImage(t, "docker-archive", test.input) - sourceInput, err := ParseInput(archiveLocation, "") - require.NoError(t, err) - src, fn, err := New(*sourceInput, registryOpts, test.exclusions) - defer fn() - - if err != nil { - t.Errorf("could not create NewDirScope: %+v", err) - } - res, err := src.FileResolver(SquashedScope) - if err != nil { - t.Errorf("could not get resolver error: %+v", err) - } - contents, err := res.FilesByGlob(test.glob) - if err != nil { - t.Errorf("could not get files by glob: %s+v", err) - } - if len(contents) != test.expected { - t.Errorf("wrong number of files after exclusions (%s): %d != %d", test.glob, len(contents), test.expected) - } - }) - } -} - -type dummyInfo struct { - isDir bool -} - -func (d dummyInfo) Name() string { - //TODO implement me - panic("implement me") -} - -func (d dummyInfo) Size() int64 { - //TODO implement me - panic("implement me") -} - -func (d dummyInfo) Mode() fs.FileMode { - //TODO implement me - panic("implement me") -} - -func (d dummyInfo) ModTime() time.Time { - //TODO implement me - panic("implement me") -} - -func (d dummyInfo) IsDir() bool { - return d.isDir -} - -func (d dummyInfo) Sys() any { - //TODO implement me - panic("implement me") -} - -func Test_crossPlatformExclusions(t *testing.T) { - testCases := []struct { - desc string - root string - path string - finfo os.FileInfo - exclude string - walkHint error - }{ - { - desc: "directory exclusion", - root: "/", - path: "/usr/var/lib", - exclude: "**/var/lib", - finfo: dummyInfo{isDir: true}, - walkHint: fs.SkipDir, - }, - { - desc: "no file info", - root: "/", - path: "/usr/var/lib", - exclude: "**/var/lib", - walkHint: fileresolver.ErrSkipPath, - }, - // linux specific tests... - { - desc: "linux doublestar", - root: "/usr", - path: "/usr/var/lib/etc.txt", - exclude: "**/*.txt", - finfo: dummyInfo{isDir: false}, - walkHint: fileresolver.ErrSkipPath, - }, - { - desc: "linux relative", - root: "/usr/var/lib", - path: "/usr/var/lib/etc.txt", - exclude: "./*.txt", - finfo: dummyInfo{isDir: false}, - - walkHint: fileresolver.ErrSkipPath, - }, - { - desc: "linux one level", - root: "/usr", - path: "/usr/var/lib/etc.txt", - exclude: "*/*.txt", - finfo: dummyInfo{isDir: false}, - walkHint: nil, - }, - // NOTE: since these tests will run in linux and macOS, the windows paths will be - // considered relative if they do not start with a forward slash and paths with backslashes - // won't be modified by the filepath.ToSlash call, so these are emulating the result of - // filepath.ToSlash usage - - // windows specific tests... - { - desc: "windows doublestar", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "**/*.txt", - finfo: dummyInfo{isDir: false}, - walkHint: fileresolver.ErrSkipPath, - }, - { - desc: "windows relative", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "./*.txt", - finfo: dummyInfo{isDir: false}, - walkHint: fileresolver.ErrSkipPath, - }, - { - desc: "windows one level", - root: "/C:/User/stuff", - path: "/C:/User/stuff/thing.txt", - exclude: "*/*.txt", - finfo: dummyInfo{isDir: false}, - walkHint: nil, - }, - } - - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - fns, err := getDirectoryExclusionFunctions(test.root, []string{test.exclude}) - require.NoError(t, err) - - for _, f := range fns { - result := f(test.path, test.finfo, nil) - require.Equal(t, test.walkHint, result) - } - }) - } -} - -// createArchive creates a new archive file at destinationArchivePath based on the directory found at sourceDirPath. -func createArchive(t testing.TB, sourceDirPath, destinationArchivePath string, layer2 bool) { - t.Helper() - - cwd, err := os.Getwd() - if err != nil { - t.Fatalf("unable to get cwd: %+v", err) - } - - cmd := exec.Command("./generate-tar-fixture-from-source-dir.sh", destinationArchivePath, path.Base(sourceDirPath)) - cmd.Dir = filepath.Join(cwd, "test-fixtures") - - if err := cmd.Start(); err != nil { - t.Fatalf("unable to start generate zip fixture script: %+v", err) - } - - if err := cmd.Wait(); err != nil { - if exiterr, ok := err.(*exec.ExitError); ok { - // The program has exited with an exit code != 0 - - // This works on both Unix and Windows. Although package - // syscall is generally platform dependent, WaitStatus is - // defined for both Unix and Windows and in both cases has - // an ExitStatus() method with the same signature. - if status, ok := exiterr.Sys().(syscall.WaitStatus); ok { - if status.ExitStatus() != 0 { - t.Fatalf("failed to generate fixture: rc=%d", status.ExitStatus()) - } - } - } else { - t.Fatalf("unable to get generate fixture script result: %+v", err) - } - } - - if layer2 { - cmd = exec.Command("tar", "-rvf", destinationArchivePath, ".") - cmd.Dir = filepath.Join(cwd, "test-fixtures", path.Base(sourceDirPath+"-2")) - if err := cmd.Start(); err != nil { - t.Fatalf("unable to start tar appending fixture script: %+v", err) - } - _ = cmd.Wait() - } -} - -// setupArchiveTest encapsulates common test setup work for tar file tests. It returns a cleanup function, -// which should be called (typically deferred) by the caller, the path of the created tar archive, and an error, -// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil -// (even if there's an error), and it should always be called. -func setupArchiveTest(t testing.TB, sourceDirPath string, layer2 bool) string { - t.Helper() - - archivePrefix, err := os.CreateTemp(t.TempDir(), "syft-archive-TEST-") - require.NoError(t, err) - - destinationArchiveFilePath := archivePrefix.Name() + ".tar" - t.Logf("archive path: %s", destinationArchiveFilePath) - createArchive(t, sourceDirPath, destinationArchiveFilePath, layer2) - - cwd, err := os.Getwd() - require.NoError(t, err) - - t.Logf("running from: %s", cwd) - - return destinationArchiveFilePath -} - -func assertNoError(t testing.TB, fn func() error) func() { - return func() { - assert.NoError(t, fn()) - } -} diff --git a/syft/source/source_win_test.go b/syft/source/source_win_test.go deleted file mode 100644 index 8fd5eb4b7..000000000 --- a/syft/source/source_win_test.go +++ /dev/null @@ -1,54 +0,0 @@ -//go:build windows -// +build windows - -package source - -import ( - "testing" - - "github.com/stretchr/testify/require" -) - -func Test_crossPlatformExclusions(t *testing.T) { - testCases := []struct { - desc string - root string - path string - exclude string - match bool - }{ - { - desc: "windows doublestar", - root: "C:\\User\\stuff", - path: "C:\\User\\stuff\\thing.txt", - exclude: "**/*.txt", - match: true, - }, - { - desc: "windows relative", - root: "C:\\User\\stuff", - path: "C:\\User\\stuff\\thing.txt", - exclude: "./*.txt", - match: true, - }, - { - desc: "windows one level", - root: "C:\\User\\stuff", - path: "C:\\User\\stuff\\thing.txt", - exclude: "*/*.txt", - match: false, - }, - } - - for _, test := range testCases { - t.Run(test.desc, func(t *testing.T) { - fns, err := getDirectoryExclusionFunctions(test.root, []string{test.exclude}) - require.NoError(t, err) - - for _, f := range fns { - result := f(test.path, nil) - require.Equal(t, test.match, result) - } - }) - } -} diff --git a/syft/source/stereoscope_image_metadata.go b/syft/source/stereoscope_image_metadata.go new file mode 100644 index 000000000..ade4f5923 --- /dev/null +++ b/syft/source/stereoscope_image_metadata.go @@ -0,0 +1,62 @@ +package source + +import "github.com/anchore/stereoscope/pkg/image" + +// StereoscopeImageSourceMetadata represents all static metadata that defines what a container image is. This is useful to later describe +// "what" was cataloged without needing the more complicated stereoscope Image objects or FileResolver objects. +type StereoscopeImageSourceMetadata struct { + UserInput string `json:"userInput"` + ID string `json:"imageID"` + ManifestDigest string `json:"manifestDigest"` + MediaType string `json:"mediaType"` + Tags []string `json:"tags"` + Size int64 `json:"imageSize"` + Layers []StereoscopeLayerMetadata `json:"layers"` + RawManifest []byte `json:"manifest"` + RawConfig []byte `json:"config"` + RepoDigests []string `json:"repoDigests"` + Architecture string `json:"architecture"` + Variant string `json:"architectureVariant,omitempty"` + OS string `json:"os"` +} + +// StereoscopeLayerMetadata represents all static metadata that defines what a container image layer is. +type StereoscopeLayerMetadata struct { + MediaType string `json:"mediaType"` + Digest string `json:"digest"` + Size int64 `json:"size"` +} + +// NewStereoscopeImageMetadata creates a new ImageMetadata object populated from the given stereoscope Image object and user configuration. +func NewStereoscopeImageMetadata(img *image.Image, userInput string) StereoscopeImageSourceMetadata { + // populate artifacts... + tags := make([]string, len(img.Metadata.Tags)) + for idx, tag := range img.Metadata.Tags { + tags[idx] = tag.String() + } + theImg := StereoscopeImageSourceMetadata{ + ID: img.Metadata.ID, + UserInput: userInput, + ManifestDigest: img.Metadata.ManifestDigest, + Size: img.Metadata.Size, + MediaType: string(img.Metadata.MediaType), + Tags: tags, + Layers: make([]StereoscopeLayerMetadata, len(img.Layers)), + RawConfig: img.Metadata.RawConfig, + RawManifest: img.Metadata.RawManifest, + RepoDigests: img.Metadata.RepoDigests, + Architecture: img.Metadata.Architecture, + Variant: img.Metadata.Variant, + OS: img.Metadata.OS, + } + + // populate image metadata + for idx, l := range img.Layers { + theImg.Layers[idx] = StereoscopeLayerMetadata{ + MediaType: string(l.Metadata.MediaType), + Digest: l.Metadata.Digest, + Size: l.Metadata.Size, + } + } + return theImg +} diff --git a/syft/source/stereoscope_image_source.go b/syft/source/stereoscope_image_source.go new file mode 100644 index 000000000..e9c39d17f --- /dev/null +++ b/syft/source/stereoscope_image_source.go @@ -0,0 +1,245 @@ +package source + +import ( + "context" + "fmt" + + "github.com/bmatcuk/doublestar/v4" + "github.com/opencontainers/go-digest" + + "github.com/anchore/stereoscope" + "github.com/anchore/stereoscope/pkg/image" + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/internal/fileresolver" +) + +var _ Source = (*StereoscopeImageSource)(nil) + +type StereoscopeImageConfig struct { + Reference string + From image.Source + Platform *image.Platform + RegistryOptions *image.RegistryOptions + Exclude ExcludeConfig + Alias Alias +} + +type StereoscopeImageSource struct { + id artifact.ID + config StereoscopeImageConfig + image *image.Image + metadata StereoscopeImageSourceMetadata +} + +func NewFromStereoscopeImageObject(img *image.Image, reference string, alias *Alias) (*StereoscopeImageSource, error) { + var aliasVal Alias + if !alias.IsEmpty() { + aliasVal = *alias + } + cfg := StereoscopeImageConfig{ + Reference: reference, + Alias: aliasVal, + } + metadata := imageMetadataFromStereoscopeImage(img, cfg.Reference) + + return &StereoscopeImageSource{ + id: deriveIDFromStereoscopeImage(cfg.Alias, metadata), + config: cfg, + image: img, + metadata: metadata, + }, nil +} + +func NewFromStereoscopeImage(cfg StereoscopeImageConfig) (*StereoscopeImageSource, error) { + ctx := context.TODO() + + var opts []stereoscope.Option + if cfg.RegistryOptions != nil { + opts = append(opts, stereoscope.WithRegistryOptions(*cfg.RegistryOptions)) + } + + if cfg.Platform != nil { + opts = append(opts, stereoscope.WithPlatform(cfg.Platform.String())) + } + + img, err := stereoscope.GetImageFromSource(ctx, cfg.Reference, cfg.From, opts...) + if err != nil { + return nil, fmt.Errorf("unable to load image: %w", err) + } + + metadata := imageMetadataFromStereoscopeImage(img, cfg.Reference) + + return &StereoscopeImageSource{ + id: deriveIDFromStereoscopeImage(cfg.Alias, metadata), + config: cfg, + image: img, + metadata: metadata, + }, nil +} + +func (s StereoscopeImageSource) ID() artifact.ID { + return s.id +} + +func (s StereoscopeImageSource) Describe() Description { + name := s.metadata.UserInput + version := s.metadata.ManifestDigest + + a := s.config.Alias + if a.Name != "" { + name = a.Name + } + + if a.Version != "" { + version = a.Version + } + + return Description{ + ID: string(s.id), + Name: name, + Version: version, + Metadata: s.metadata, + } +} + +func (s StereoscopeImageSource) FileResolver(scope Scope) (file.Resolver, error) { + var res file.Resolver + var err error + + switch scope { + case SquashedScope: + res, err = fileresolver.NewFromContainerImageSquash(s.image) + case AllLayersScope: + res, err = fileresolver.NewFromContainerImageAllLayers(s.image) + default: + return nil, fmt.Errorf("bad image scope provided: %+v", scope) + } + + if err != nil { + return nil, err + } + + // image tree contains all paths, so we filter out the excluded entries afterward + if len(s.config.Exclude.Paths) > 0 { + res = fileresolver.NewExcludingDecorator(res, getImageExclusionFunction(s.config.Exclude.Paths)) + } + + return res, nil +} + +func (s StereoscopeImageSource) Close() error { + if s.image == nil { + return nil + } + return s.image.Cleanup() +} + +func imageMetadataFromStereoscopeImage(img *image.Image, reference string) StereoscopeImageSourceMetadata { + tags := make([]string, len(img.Metadata.Tags)) + for idx, tag := range img.Metadata.Tags { + tags[idx] = tag.String() + } + + layers := make([]StereoscopeLayerMetadata, len(img.Layers)) + for idx, l := range img.Layers { + layers[idx] = StereoscopeLayerMetadata{ + MediaType: string(l.Metadata.MediaType), + Digest: l.Metadata.Digest, + Size: l.Metadata.Size, + } + } + + return StereoscopeImageSourceMetadata{ + ID: img.Metadata.ID, + UserInput: reference, + ManifestDigest: img.Metadata.ManifestDigest, + Size: img.Metadata.Size, + MediaType: string(img.Metadata.MediaType), + Tags: tags, + Layers: layers, + RawConfig: img.Metadata.RawConfig, + RawManifest: img.Metadata.RawManifest, + RepoDigests: img.Metadata.RepoDigests, + Architecture: img.Metadata.Architecture, + Variant: img.Metadata.Variant, + OS: img.Metadata.OS, + } +} + +// deriveIDFromStereoscopeImage derives an artifact ID from the given image metadata. The order of data precedence is: +// 1. prefer a digest of the raw container image manifest +// 2. if no manifest digest is available, calculate a chain ID from the image layer metadata +// 3. if no layer metadata is available, use the user input string +// +// in all cases, if an alias is provided, it is additionally considered in the ID calculation. This allows for the +// same image to be scanned multiple times with different aliases and be considered logically different. +func deriveIDFromStereoscopeImage(alias Alias, metadata StereoscopeImageSourceMetadata) artifact.ID { + var input string + + if len(metadata.RawManifest) > 0 { + input = digest.Canonical.FromBytes(metadata.RawManifest).String() + } else { + // calculate chain ID for image sources where manifestDigest is not available + // https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid + input = calculateChainID(metadata.Layers) + if input == "" { + // TODO what happens here if image has no layers? + // is this case possible? + input = digest.Canonical.FromString(metadata.UserInput).String() + } + } + + if !alias.IsEmpty() { + // if the user provided an alias, we want to consider that in the artifact ID. This way if the user + // scans the same item but is considered to be logically different, then ID will express that. + aliasStr := fmt.Sprintf(":%s@%s", alias.Name, alias.Version) + input = digest.Canonical.FromString(input + aliasStr).String() + } + + return artifactIDFromDigest(input) +} + +func calculateChainID(lm []StereoscopeLayerMetadata) string { + if len(lm) < 1 { + return "" + } + + // DiffID(L0) = digest of layer 0 + // https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32 + chainID := lm[0].Digest + id := chain(chainID, lm[1:]) + + return id +} + +func chain(chainID string, layers []StereoscopeLayerMetadata) string { + if len(layers) < 1 { + return chainID + } + + chainID = digest.Canonical.FromString(layers[0].Digest + " " + chainID).String() + return chain(chainID, layers[1:]) +} + +func getImageExclusionFunction(exclusions []string) func(string) bool { + if len(exclusions) == 0 { + return nil + } + // add subpath exclusions + for _, exclusion := range exclusions { + exclusions = append(exclusions, exclusion+"/**") + } + return func(path string) bool { + for _, exclusion := range exclusions { + matches, err := doublestar.Match(exclusion, path) + if err != nil { + return false + } + if matches { + return true + } + } + return false + } +} diff --git a/syft/source/stereoscope_image_source_test.go b/syft/source/stereoscope_image_source_test.go new file mode 100644 index 000000000..8f1b8d4b2 --- /dev/null +++ b/syft/source/stereoscope_image_source_test.go @@ -0,0 +1,243 @@ +package source + +import ( + "crypto/sha256" + "fmt" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/stereoscope/pkg/image" + "github.com/anchore/stereoscope/pkg/imagetest" + "github.com/anchore/syft/syft/artifact" +) + +func Test_StereoscopeImage_Exclusions(t *testing.T) { + testCases := []struct { + desc string + input string + glob string + expected int + exclusions []string + }{ + // NOTE: in the Dockerfile, /target is moved to /, which makes /really a top-level dir + { + input: "image-simple", + desc: "a single path excluded", + glob: "**", + expected: 2, + exclusions: []string{"/really/**"}, + }, + { + input: "image-simple", + desc: "a directly referenced directory is excluded", + glob: "**", + expected: 2, + exclusions: []string{"/really"}, + }, + { + input: "image-simple", + desc: "a partial directory is not excluded", + glob: "**", + expected: 3, + exclusions: []string{"/reall"}, + }, + { + input: "image-simple", + desc: "exclude files deeper", + glob: "**", + expected: 2, + exclusions: []string{"**/nested/**"}, + }, + { + input: "image-simple", + desc: "files excluded with extension", + glob: "**", + expected: 2, + exclusions: []string{"**/*1.txt"}, + }, + { + input: "image-simple", + desc: "keep files with different extensions", + glob: "**", + expected: 3, + exclusions: []string{"**/target/**/*.jar"}, + }, + { + input: "image-simple", + desc: "file directly excluded", + glob: "**", + expected: 2, + exclusions: []string{"**/somefile-1.txt"}, // file-1 renamed to somefile-1 in Dockerfile + }, + } + + for _, test := range testCases { + t.Run(test.desc, func(t *testing.T) { + src, err := NewFromStereoscopeImage( + StereoscopeImageConfig{ + Reference: strings.SplitN(imagetest.PrepareFixtureImage(t, "docker-archive", test.input), ":", 2)[1], + From: image.DockerTarballSource, + Exclude: ExcludeConfig{ + Paths: test.exclusions, + }, + }, + ) + + require.NoError(t, err) + t.Cleanup(func() { + require.NoError(t, src.Close()) + }) + + res, err := src.FileResolver(SquashedScope) + require.NoError(t, err) + + contents, err := res.FilesByGlob(test.glob) + require.NoError(t, err) + + assert.Len(t, contents, test.expected) + }) + } +} + +func Test_StereoscopeImageSource_ID(t *testing.T) { + tests := []struct { + name string + alias Alias + metadata StereoscopeImageSourceMetadata + want artifact.ID + }{ + { + name: "use raw manifest over chain ID or user input", + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + Layers: []StereoscopeLayerMetadata{ + { + Digest: "a", + }, + { + Digest: "b", + }, + { + Digest: "c", + }, + }, + RawManifest: []byte("raw-manifest"), + }, + want: func() artifact.ID { + hasher := sha256.New() + hasher.Write([]byte("raw-manifest")) + return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil))) + }(), + }, + { + name: "use chain ID over user input", + metadata: StereoscopeImageSourceMetadata{ + //UserInput: "user-input", + Layers: []StereoscopeLayerMetadata{ + { + Digest: "a", + }, + { + Digest: "b", + }, + { + Digest: "c", + }, + }, + }, + want: func() artifact.ID { + metadata := []StereoscopeLayerMetadata{ + { + Digest: "a", + }, + { + Digest: "b", + }, + { + Digest: "c", + }, + } + return artifact.ID(strings.TrimPrefix(calculateChainID(metadata), "sha256:")) + }(), + }, + { + name: "use user input last", + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + }, + want: func() artifact.ID { + hasher := sha256.New() + hasher.Write([]byte("user-input")) + return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil))) + }(), + }, + { + name: "without alias (first)", + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + Layers: []StereoscopeLayerMetadata{ + { + Digest: "a", + }, + { + Digest: "b", + }, + { + Digest: "c", + }, + }, + RawManifest: []byte("raw-manifest"), + }, + want: "85298926ecd92ed57688f13039017160cd728f04dd0d2d10a10629007106f107", + }, + { + name: "always consider alias (first)", + alias: Alias{ + Name: "alias", + Version: "version", + }, + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + Layers: []StereoscopeLayerMetadata{ + { + Digest: "a", + }, + { + Digest: "b", + }, + { + Digest: "c", + }, + }, + RawManifest: []byte("raw-manifest"), + }, + want: "a8717e42449960c1dd4963f2f22bd69c7c105e7e82445be0a65aa1825d62ff0d", + }, + { + name: "without alias (last)", + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + }, + want: "ab0dff627d80b9753193d7280bec8f45e8ec6b4cb0912c6fffcf7cd782d9739e", + }, + { + name: "always consider alias (last)", + alias: Alias{ + Name: "alias", + Version: "version", + }, + metadata: StereoscopeImageSourceMetadata{ + UserInput: "user-input", + }, + want: "fe86c0eecd5654d3c0c0b2176aa394aef6440347c241aa8d9b628dfdde4287cf", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, deriveIDFromStereoscopeImage(tt.alias, tt.metadata)) + }) + } +} diff --git a/syft/source/test-fixtures/file-index-filter/.1/something b/syft/source/test-fixtures/file-index-filter/.1/something new file mode 100644 index 000000000..6b584e8ec --- /dev/null +++ b/syft/source/test-fixtures/file-index-filter/.1/something @@ -0,0 +1 @@ +content \ No newline at end of file diff --git a/syft/source/test-fixtures/file-index-filter/.2 b/syft/source/test-fixtures/file-index-filter/.2 new file mode 100644 index 000000000..a459bc245 --- /dev/null +++ b/syft/source/test-fixtures/file-index-filter/.2 @@ -0,0 +1 @@ +something \ No newline at end of file diff --git a/syft/source/test-fixtures/file-index-filter/.vimrc b/syft/source/test-fixtures/file-index-filter/.vimrc new file mode 100644 index 000000000..7f865a925 --- /dev/null +++ b/syft/source/test-fixtures/file-index-filter/.vimrc @@ -0,0 +1 @@ +Another .vimrc file \ No newline at end of file diff --git a/syft/source/test-fixtures/file-index-filter/empty b/syft/source/test-fixtures/file-index-filter/empty new file mode 100644 index 000000000..e69de29bb diff --git a/test/integration/catalog_packages_test.go b/test/integration/catalog_packages_test.go index 2c88c0615..f2f153a52 100644 --- a/test/integration/catalog_packages_test.go +++ b/test/integration/catalog_packages_test.go @@ -26,13 +26,15 @@ func BenchmarkImagePackageCatalogers(b *testing.B) { for _, c := range cataloger.ImageCatalogers(cataloger.DefaultConfig()) { // in case of future alteration where state is persisted, assume no dependency is safe to reuse userInput := "docker-archive:" + tarPath - sourceInput, err := source.ParseInput(userInput, "") + detection, err := source.Detect(userInput, source.DefaultDetectConfig()) require.NoError(b, err) - theSource, cleanupSource, err := source.New(*sourceInput, nil, nil) - b.Cleanup(cleanupSource) + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) if err != nil { b.Fatalf("unable to get source: %+v", err) } + b.Cleanup(func() { + theSource.Close() + }) resolver, err := theSource.FileResolver(source.SquashedScope) if err != nil { diff --git a/test/integration/utils_test.go b/test/integration/utils_test.go index 77f500450..eeb583f65 100644 --- a/test/integration/utils_test.go +++ b/test/integration/utils_test.go @@ -12,15 +12,17 @@ import ( "github.com/anchore/syft/syft/source" ) -func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope, catalogerCfg []string) (sbom.SBOM, *source.Source) { +func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Scope, catalogerCfg []string) (sbom.SBOM, source.Source) { imagetest.GetFixtureImage(t, "docker-archive", fixtureImageName) tarPath := imagetest.GetFixtureImageTarPath(t, fixtureImageName) userInput := "docker-archive:" + tarPath - sourceInput, err := source.ParseInput(userInput, "") + detection, err := source.Detect(userInput, source.DefaultDetectConfig()) require.NoError(t, err) - theSource, cleanupSource, err := source.New(*sourceInput, nil, nil) - t.Cleanup(cleanupSource) + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) require.NoError(t, err) + t.Cleanup(func() { + theSource.Close() + }) c := cataloger.DefaultConfig() c.Catalogers = catalogerCfg @@ -37,7 +39,7 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco LinuxDistribution: actualDistro, }, Relationships: relationships, - Source: theSource.Metadata, + Source: theSource.Describe(), Descriptor: sbom.Descriptor{ Name: "syft", Version: "v0.42.0-bogus", @@ -50,13 +52,15 @@ func catalogFixtureImage(t *testing.T, fixtureImageName string, scope source.Sco }, theSource } -func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) { +func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, source.Source) { userInput := "dir:" + dir - sourceInput, err := source.ParseInput(userInput, "") + detection, err := source.Detect(userInput, source.DefaultDetectConfig()) require.NoError(t, err) - theSource, cleanupSource, err := source.New(*sourceInput, nil, nil) - t.Cleanup(cleanupSource) + theSource, err := detection.NewSource(source.DefaultDetectionSourceConfig()) require.NoError(t, err) + t.Cleanup(func() { + theSource.Close() + }) // TODO: this would be better with functional options (after/during API refactor) c := cataloger.DefaultConfig() @@ -72,6 +76,6 @@ func catalogDirectory(t *testing.T, dir string) (sbom.SBOM, *source.Source) { LinuxDistribution: actualDistro, }, Relationships: relationships, - Source: theSource.Metadata, + Source: theSource.Describe(), }, theSource }