diff --git a/cmd/syft/cli/cli.go b/cmd/syft/cli/cli.go index 9372ce775..53cc2eba8 100644 --- a/cmd/syft/cli/cli.go +++ b/cmd/syft/cli/cli.go @@ -48,6 +48,7 @@ func create(id clio.Identification, out io.Writer) (clio.Application, *cobra.Com commands.Cataloger(app), commands.Attest(app), commands.Convert(app), + commands.Split(app), clio.VersionCommand(id, schemaVersion), clio.ConfigCommand(app, nil), cranecmd.NewCmdAuthLogin(id.Name), // syft login uses the same command as crane diff --git a/cmd/syft/internal/commands/split.go b/cmd/syft/internal/commands/split.go new file mode 100644 index 000000000..316fce0b1 --- /dev/null +++ b/cmd/syft/internal/commands/split.go @@ -0,0 +1,264 @@ +package commands + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/anchore/fangs" + "github.com/spf13/cobra" + + "github.com/anchore/clio" + "github.com/anchore/syft/cmd/syft/internal/options" + "github.com/anchore/syft/cmd/syft/internal/split" + "github.com/anchore/syft/cmd/syft/internal/ui" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/format" + "github.com/anchore/syft/syft/format/syftjson" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +const ( + splitExample = ` {{.appName}} {{.command}} image.sbom.json split SBOM into one file per package + {{.appName}} {{.command}} image.sbom.json --pkg musl split only the musl package + {{.appName}} {{.command}} image.sbom.json --pkg musl --dir /tmp write output to /tmp directory + {{.appName}} {{.command}} image.sbom.json --drop source --drop location:fsid drop source and filesystem IDs +` +) + +// SplitOptions defines the options for the split command +type SplitOptions struct { + options.Config `yaml:",inline" mapstructure:",squash"` + options.UpdateCheck `yaml:",inline" mapstructure:",squash"` + Packages []string `yaml:"packages" json:"packages" mapstructure:"packages"` + OutputDir string `yaml:"output-dir" json:"output-dir" mapstructure:"output-dir"` + Drop []string `yaml:"drop" json:"drop" mapstructure:"drop"` +} + +var _ interface { + clio.FlagAdder + fangs.FieldDescriber +} = (*SplitOptions)(nil) + +func (o *SplitOptions) AddFlags(flags clio.FlagSet) { + flags.StringArrayVarP(&o.Packages, "pkg", "p", "package(s) to split (can be specified multiple times)") + flags.StringVarP(&o.OutputDir, "dir", "d", "output directory for split SBOMs (default: current directory)") + flags.StringArrayVarP(&o.Drop, "drop", "", "drop options to apply (source, descriptor, distro, pkg:*, file:*, location:*, all)") +} + +func (o *SplitOptions) DescribeFields(descriptions fangs.FieldDescriptionSet) { + descriptions.Add(&o.Packages, "package queries to match for splitting (ID, PURL, name, or name@version)") + descriptions.Add(&o.OutputDir, "output directory for split SBOM files") + descriptions.Add(&o.Drop, "options for dropping SBOM sections (source, descriptor, distro, pkg:licenses, pkg:metadata.files, file:metadata, file:digests, file:executable, file:unknowns, file:licenses, file:contents, location:fsid, location:non-primary-evidence, all)") +} + +// Split creates the split command +func Split(app clio.Application) *cobra.Command { + id := app.ID() + + opts := &SplitOptions{ + UpdateCheck: options.DefaultUpdateCheck(), + OutputDir: ".", + } + + return app.SetupCommand(&cobra.Command{ + Use: "split [SOURCE-SBOM] [flags]", + Short: "Split an SBOM into separate SBOMs per package", + Long: `[Experimental] Split a syft-format SBOM into one or more SBOMs, one per package. +Each output SBOM contains only the target package, its related packages (connected via +ownership-by-file-overlap and evident-by relationships), and their associated files. + +If no --pkg flags are specified, creates one SBOM file per package in the source SBOM. +If --pkg flags are specified, only creates SBOM files for the matching packages. + +Package matching (in order of precedence): + 1. Exact package ID + 2. Exact PURL or PURL prefix + 3. Case-insensitive package name + 4. name@version format + +Drop options: + source Drop the source object entirely + descriptor Drop the descriptor object + distro Drop the distro (Linux distribution) object + pkg:licenses Drop package licenses + pkg:metadata.files Drop files from package metadata (for FileOwner types) + file:metadata Drop file metadata (size, permissions, etc.) + file:digests Drop file digests + file:executable Drop executable metadata + file:unknowns Drop unknown file entries + file:licenses Drop file-level licenses + file:contents Drop file contents + location:fsid Clear FileSystemID from all coordinates + location:non-primary-evidence Drop locations without "evidence": "primary" annotation + all Apply all drop options above`, + Example: internal.Tprintf(splitExample, map[string]interface{}{ + "appName": id.Name, + "command": "split", + }), + Args: validateSplitArgs, + PreRunE: applicationUpdateCheck(id, &opts.UpdateCheck), + RunE: func(_ *cobra.Command, args []string) error { + restoreStdout := ui.CaptureStdoutToTraceLog() + defer restoreStdout() + + return RunSplit(opts, args[0]) + }, + }, opts) +} + +func validateSplitArgs(cmd *cobra.Command, args []string) error { + return validateArgs(cmd, args, "an SBOM argument is required") +} + +// RunSplit executes the split operation +func RunSplit(opts *SplitOptions, userInput string) error { + log.Warn("split is an experimental feature, run `syft split -h` for help") + + // validate drop options + for _, d := range opts.Drop { + if !split.ValidDropOption(d) { + return fmt.Errorf("invalid drop option: %q", d) + } + } + + // parse drop options + dropOpts := split.ParseDropOptions(opts.Drop) + dropLocationFSID := split.HasDropLocationFSID(dropOpts) + dropNonPrimaryEvidence := split.HasDropLocationNonPrimaryEvidence(dropOpts) + + // read SBOM + var reader io.ReadSeekCloser + if userInput == "-" { + reader = internal.NewBufferedSeeker(os.Stdin) + } else { + f, err := os.Open(userInput) + if err != nil { + return fmt.Errorf("failed to open SBOM file: %w", err) + } + defer func() { + _ = f.Close() + }() + reader = f + } + + s, formatID, _, err := format.Decode(reader) + if err != nil { + return fmt.Errorf("failed to decode SBOM: %w", err) + } + + if s == nil { + return fmt.Errorf("no SBOM produced") + } + + // warn if not syft-json format + if formatID != syftjson.ID { + log.Warnf("input SBOM is not syft-json format (detected: %s), some data may be lost", formatID) + } + + // determine target packages + var targetPackages []pkg.Package + if len(opts.Packages) == 0 { + // split all packages + targetPackages = s.Artifacts.Packages.Sorted() + } else { + // match specified packages + targetPackages = split.MatchPackages(s.Artifacts.Packages, opts.Packages) + if len(targetPackages) == 0 { + return fmt.Errorf("no packages matched the specified queries: %v", opts.Packages) + } + log.Infof("matched %d package(s) for splitting", len(targetPackages)) + } + + // ensure output directory exists + if err := os.MkdirAll(opts.OutputDir, 0o755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // split SBOM + results := split.Split(*s, targetPackages, dropLocationFSID, dropNonPrimaryEvidence) + + // create encoder + encoder, err := syftjson.NewFormatEncoderWithConfig(syftjson.EncoderConfig{ + Pretty: true, + }) + if err != nil { + return fmt.Errorf("failed to create encoder: %w", err) + } + + // get fields to remove from JSON output + fieldsToRemove := split.GetJSONFieldsToRemove(dropOpts) + + // write output files + for _, result := range results { + // apply drop options (excluding location:* options which are handled in Split) + filteredDropOpts := make([]split.DropOption, 0, len(dropOpts)) + for _, opt := range dropOpts { + if opt != split.DropLocationFSID && opt != split.DropLocationNonPrimaryEvidence { + filteredDropOpts = append(filteredDropOpts, opt) + } + } + split.ApplyDropOptions(&result.SBOM, filteredDropOpts) + + // generate output filename using package ID + outputFile := filepath.Join(opts.OutputDir, fmt.Sprintf("%s.json", result.TargetPackage.ID())) + + if err := writeSBOMWithFieldRemoval(encoder, result.SBOM, outputFile, fieldsToRemove); err != nil { + return fmt.Errorf("failed to write SBOM for package %s: %w", result.TargetPackage.Name, err) + } + + log.Infof("wrote %s (%s@%s)", outputFile, result.TargetPackage.Name, result.TargetPackage.Version) + } + + log.Infof("split complete: %d SBOM(s) written to %s", len(results), opts.OutputDir) + return nil +} + +func writeSBOMWithFieldRemoval(encoder sbom.FormatEncoder, s sbom.SBOM, outputFile string, fieldsToRemove []string) error { + // if no fields to remove, use direct encoding + if len(fieldsToRemove) == 0 { + f, err := os.Create(outputFile) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer func() { + _ = f.Close() + }() + return encoder.Encode(f, s) + } + + // encode to buffer first + var buf bytes.Buffer + if err := encoder.Encode(&buf, s); err != nil { + return fmt.Errorf("failed to encode SBOM: %w", err) + } + + // parse as generic map to remove fields + var doc map[string]interface{} + if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { + return fmt.Errorf("failed to parse encoded SBOM: %w", err) + } + + // remove specified fields + for _, field := range fieldsToRemove { + delete(doc, field) + } + + // re-encode with pretty printing + output, err := json.MarshalIndent(doc, "", " ") + if err != nil { + return fmt.Errorf("failed to re-encode SBOM: %w", err) + } + + // write to file + if err := os.WriteFile(outputFile, append(output, '\n'), 0o644); err != nil { + return fmt.Errorf("failed to write file: %w", err) + } + + return nil +} diff --git a/cmd/syft/internal/commands/split_test.go b/cmd/syft/internal/commands/split_test.go new file mode 100644 index 000000000..2d1c2d1b2 --- /dev/null +++ b/cmd/syft/internal/commands/split_test.go @@ -0,0 +1,119 @@ +package commands + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestRunSplit_BasicExecution(t *testing.T) { + // use the example SBOM + sbomPath := "../../../../examples/decode_sbom/alpine.syft.json" + if _, err := os.Stat(sbomPath); os.IsNotExist(err) { + t.Skip("example SBOM not found, skipping integration test") + } + + // create temporary output directory + tmpDir, err := os.MkdirTemp("", "syft-split-test-*") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + opts := &SplitOptions{ + Packages: []string{"alpine-baselayout"}, + OutputDir: tmpDir, + Drop: []string{}, + } + + err = RunSplit(opts, sbomPath) + require.NoError(t, err) + + // verify output files were created + entries, err := os.ReadDir(tmpDir) + require.NoError(t, err) + assert.NotEmpty(t, entries, "expected at least one output file") + + // verify output file is valid JSON + for _, entry := range entries { + if entry.IsDir() { + continue + } + content, err := os.ReadFile(filepath.Join(tmpDir, entry.Name())) + require.NoError(t, err) + assert.NotEmpty(t, content) + // basic JSON validation - should start with { + assert.True(t, len(content) > 0 && content[0] == '{', "output should be valid JSON") + } +} + +func TestRunSplit_InvalidDropOption(t *testing.T) { + opts := &SplitOptions{ + Packages: []string{"test"}, + OutputDir: ".", + Drop: []string{"invalid-option"}, + } + + err := RunSplit(opts, "some-sbom.json") + require.Error(t, err) + assert.Contains(t, err.Error(), "invalid drop option") +} + +func TestRunSplit_NoMatchingPackages(t *testing.T) { + sbomPath := "../../../../examples/decode_sbom/alpine.syft.json" + if _, err := os.Stat(sbomPath); os.IsNotExist(err) { + t.Skip("example SBOM not found, skipping integration test") + } + + tmpDir, err := os.MkdirTemp("", "syft-split-test-*") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + opts := &SplitOptions{ + Packages: []string{"nonexistent-package-xyz"}, + OutputDir: tmpDir, + Drop: []string{}, + } + + err = RunSplit(opts, sbomPath) + require.Error(t, err) + assert.Contains(t, err.Error(), "no packages matched") +} + +func TestRunSplit_WithDropOptions(t *testing.T) { + sbomPath := "../../../../examples/decode_sbom/alpine.syft.json" + if _, err := os.Stat(sbomPath); os.IsNotExist(err) { + t.Skip("example SBOM not found, skipping integration test") + } + + tmpDir, err := os.MkdirTemp("", "syft-split-test-*") + require.NoError(t, err) + defer os.RemoveAll(tmpDir) + + opts := &SplitOptions{ + Packages: []string{"alpine-baselayout"}, + OutputDir: tmpDir, + Drop: []string{"source", "location:fsid"}, + } + + err = RunSplit(opts, sbomPath) + require.NoError(t, err) + + // verify output file was created + entries, err := os.ReadDir(tmpDir) + require.NoError(t, err) + assert.NotEmpty(t, entries) +} + +func TestRunSplit_FileNotFound(t *testing.T) { + opts := &SplitOptions{ + Packages: []string{"test"}, + OutputDir: ".", + Drop: []string{}, + } + + err := RunSplit(opts, "/nonexistent/path/sbom.json") + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to open SBOM file") +} diff --git a/cmd/syft/internal/split/dropper.go b/cmd/syft/internal/split/dropper.go new file mode 100644 index 000000000..1a43adeb3 --- /dev/null +++ b/cmd/syft/internal/split/dropper.go @@ -0,0 +1,307 @@ +package split + +import ( + "slices" + "strings" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +// DropOption represents a drop option for filtering SBOM contents +type DropOption string + +const ( + DropSource DropOption = "source" + DropDescriptor DropOption = "descriptor" + DropDistro DropOption = "distro" + DropPkgLicenses DropOption = "pkg:licenses" + DropPkgMetadataFiles DropOption = "pkg:metadata.files" + DropFileMetadata DropOption = "file:metadata" + DropFileDigests DropOption = "file:digests" + DropFileExecutable DropOption = "file:executable" + DropFileUnknowns DropOption = "file:unknowns" + DropFileLicenses DropOption = "file:licenses" + DropFileContents DropOption = "file:contents" + DropLocationFSID DropOption = "location:fsid" + DropLocationNonPrimaryEvidence DropOption = "location:non-primary-evidence" + DropAll DropOption = "all" +) + +// AllDropOptions returns all valid drop options (excluding "all") +func AllDropOptions() []DropOption { + return []DropOption{ + DropSource, + DropDescriptor, + DropDistro, + DropPkgLicenses, + DropPkgMetadataFiles, + DropFileMetadata, + DropFileDigests, + DropFileExecutable, + DropFileUnknowns, + DropFileLicenses, + DropFileContents, + DropLocationFSID, + DropLocationNonPrimaryEvidence, + } +} + +// ParseDropOptions parses string values into DropOption values +func ParseDropOptions(values []string) []DropOption { + var opts []DropOption + for _, v := range values { + opt := DropOption(strings.ToLower(strings.TrimSpace(v))) + if opt == DropAll { + return AllDropOptions() + } + opts = append(opts, opt) + } + return opts +} + +// ApplyDropOptions applies the specified drop options to the SBOM +func ApplyDropOptions(s *sbom.SBOM, opts []DropOption) { + if s == nil || len(opts) == 0 { + return + } + + for _, opt := range opts { + switch opt { + case DropSource: + s.Source = source.Description{} + case DropDescriptor: + s.Descriptor = sbom.Descriptor{} + case DropDistro: + s.Artifacts.LinuxDistribution = nil + case DropPkgLicenses: + clearPackageLicenses(s) + case DropPkgMetadataFiles: + clearPackageMetadataFiles(s) + case DropFileMetadata: + s.Artifacts.FileMetadata = nil + case DropFileDigests: + s.Artifacts.FileDigests = nil + case DropFileExecutable: + s.Artifacts.Executables = nil + case DropFileUnknowns: + s.Artifacts.Unknowns = nil + case DropFileLicenses: + s.Artifacts.FileLicenses = nil + case DropFileContents: + s.Artifacts.FileContents = nil + case DropLocationFSID: + clearFileSystemIDs(s) + case DropLocationNonPrimaryEvidence: + clearNonPrimaryEvidenceLocations(s) + } + } +} + +// GetJSONFieldsToRemove returns the JSON field names that should be completely removed from output +func GetJSONFieldsToRemove(opts []DropOption) []string { + var fields []string + for _, opt := range opts { + switch opt { + case DropSource: + fields = append(fields, "source") + case DropDescriptor: + fields = append(fields, "descriptor") + case DropDistro: + fields = append(fields, "distro") + } + } + return fields +} + +// clearFileSystemIDs clears FileSystemID from all coordinates in file artifacts and relationships. +// Note: package locations are handled separately in the splitter when creating new packages. +func clearFileSystemIDs(s *sbom.SBOM) { + // clear from file metadata + if s.Artifacts.FileMetadata != nil { + newMetadata := make(map[file.Coordinates]file.Metadata) + for coord, meta := range s.Artifacts.FileMetadata { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newMetadata[newCoord] = meta + } + s.Artifacts.FileMetadata = newMetadata + } + + // clear from file digests + if s.Artifacts.FileDigests != nil { + newDigests := make(map[file.Coordinates][]file.Digest) + for coord, digests := range s.Artifacts.FileDigests { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newDigests[newCoord] = digests + } + s.Artifacts.FileDigests = newDigests + } + + // clear from file contents + if s.Artifacts.FileContents != nil { + newContents := make(map[file.Coordinates]string) + for coord, contents := range s.Artifacts.FileContents { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newContents[newCoord] = contents + } + s.Artifacts.FileContents = newContents + } + + // clear from file licenses + if s.Artifacts.FileLicenses != nil { + newLicenses := make(map[file.Coordinates][]file.License) + for coord, licenses := range s.Artifacts.FileLicenses { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newLicenses[newCoord] = licenses + } + s.Artifacts.FileLicenses = newLicenses + } + + // clear from executables + if s.Artifacts.Executables != nil { + newExec := make(map[file.Coordinates]file.Executable) + for coord, exec := range s.Artifacts.Executables { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newExec[newCoord] = exec + } + s.Artifacts.Executables = newExec + } + + // clear from unknowns + if s.Artifacts.Unknowns != nil { + newUnknowns := make(map[file.Coordinates][]string) + for coord, unknowns := range s.Artifacts.Unknowns { + newCoord := file.Coordinates{RealPath: coord.RealPath} + newUnknowns[newCoord] = unknowns + } + s.Artifacts.Unknowns = newUnknowns + } + + // clear from relationships that reference file coordinates + newRelationships := make([]artifact.Relationship, 0, len(s.Relationships)) + for _, rel := range s.Relationships { + newRel := rel + + if coord, ok := rel.From.(file.Coordinates); ok { + newRel.From = file.Coordinates{RealPath: coord.RealPath} + } + if coord, ok := rel.To.(file.Coordinates); ok { + newRel.To = file.Coordinates{RealPath: coord.RealPath} + } + + newRelationships = append(newRelationships, newRel) + } + s.Relationships = newRelationships +} + +// clearPackageLicenses removes licenses from all packages in the SBOM +func clearPackageLicenses(s *sbom.SBOM) { + if s.Artifacts.Packages == nil { + return + } + + for p := range s.Artifacts.Packages.Enumerate() { + p.Licenses = pkg.NewLicenseSet() + s.Artifacts.Packages.Delete(p.ID()) + s.Artifacts.Packages.Add(p) + } +} + +// clearNonPrimaryEvidenceLocations removes locations that don't have "evidence": "primary" annotation +func clearNonPrimaryEvidenceLocations(s *sbom.SBOM) { + if s.Artifacts.Packages == nil { + return + } + + for p := range s.Artifacts.Packages.Enumerate() { + newLocations := file.NewLocationSet() + for _, loc := range p.Locations.ToSlice() { + if loc.Annotations != nil && loc.Annotations["evidence"] == "primary" { + newLocations.Add(loc) + } + } + p.Locations = newLocations + s.Artifacts.Packages.Delete(p.ID()) + s.Artifacts.Packages.Add(p) + } +} + +// clearPackageMetadataFiles clears the Files field from any package metadata that implements FileOwner +func clearPackageMetadataFiles(s *sbom.SBOM) { + if s.Artifacts.Packages == nil { + return + } + + for p := range s.Artifacts.Packages.Enumerate() { + if p.Metadata == nil { + continue + } + + newMetadata := clearMetadataFiles(p.Metadata) + if newMetadata != nil { + p.Metadata = newMetadata + s.Artifacts.Packages.Delete(p.ID()) + s.Artifacts.Packages.Add(p) + } + } +} + +// clearMetadataFiles returns a copy of the metadata with Files field cleared if it implements FileOwner. +// Returns nil if the metadata type is not recognized as a FileOwner implementer. +func clearMetadataFiles(metadata any) any { + switch m := metadata.(type) { + case pkg.ApkDBEntry: + m.Files = nil + return m + case pkg.RpmDBEntry: + m.Files = nil + return m + case pkg.DpkgDBEntry: + m.Files = nil + return m + case pkg.AlpmDBEntry: + m.Files = nil + return m + case pkg.PortageEntry: + m.Files = nil + return m + case pkg.NixStoreEntry: + m.Files = nil + return m + case pkg.PythonPackage: + m.Files = nil + return m + case pkg.CondaMetaPackage: + m.Files = nil + return m + case pkg.BitnamiSBOMEntry: + m.Files = nil + return m + case pkg.JavaVMInstallation: + m.Files = nil + return m + } + return nil +} + +// ValidDropOption checks if a string is a valid drop option +func ValidDropOption(s string) bool { + opt := DropOption(strings.ToLower(strings.TrimSpace(s))) + if opt == DropAll { + return true + } + return slices.Contains(AllDropOptions(), opt) +} + +// HasDropLocationFSID checks if the drop options include location:fsid +func HasDropLocationFSID(opts []DropOption) bool { + return slices.Contains(opts, DropLocationFSID) +} + +// HasDropLocationNonPrimaryEvidence checks if the drop options include location:non-primary-evidence +func HasDropLocationNonPrimaryEvidence(opts []DropOption) bool { + return slices.Contains(opts, DropLocationNonPrimaryEvidence) +} diff --git a/cmd/syft/internal/split/dropper_test.go b/cmd/syft/internal/split/dropper_test.go new file mode 100644 index 000000000..d41ab1af3 --- /dev/null +++ b/cmd/syft/internal/split/dropper_test.go @@ -0,0 +1,499 @@ +package split + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func TestParseDropOptions(t *testing.T) { + tests := []struct { + name string + values []string + want []DropOption + }{ + { + name: "single option", + values: []string{"source"}, + want: []DropOption{DropSource}, + }, + { + name: "multiple options", + values: []string{"source", "descriptor", "file:digests"}, + want: []DropOption{DropSource, DropDescriptor, DropFileDigests}, + }, + { + name: "all option expands", + values: []string{"all"}, + want: AllDropOptions(), + }, + { + name: "case insensitive", + values: []string{"SOURCE", "File:Digests"}, + want: []DropOption{DropSource, DropFileDigests}, + }, + { + name: "with whitespace", + values: []string{" source ", " location:fsid"}, + want: []DropOption{DropSource, DropLocationFSID}, + }, + { + name: "empty values", + values: []string{}, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ParseDropOptions(tt.values) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestValidDropOption(t *testing.T) { + tests := []struct { + name string + input string + want bool + }{ + {name: "valid source", input: "source", want: true}, + {name: "valid descriptor", input: "descriptor", want: true}, + {name: "valid distro", input: "distro", want: true}, + {name: "valid file:metadata", input: "file:metadata", want: true}, + {name: "valid file:digests", input: "file:digests", want: true}, + {name: "valid file:executable", input: "file:executable", want: true}, + {name: "valid file:unknowns", input: "file:unknowns", want: true}, + {name: "valid file:licenses", input: "file:licenses", want: true}, + {name: "valid file:contents", input: "file:contents", want: true}, + {name: "valid location:fsid", input: "location:fsid", want: true}, + {name: "valid location:non-primary-evidence", input: "location:non-primary-evidence", want: true}, + {name: "valid pkg:licenses", input: "pkg:licenses", want: true}, + {name: "valid pkg:metadata.files", input: "pkg:metadata.files", want: true}, + {name: "valid all", input: "all", want: true}, + {name: "case insensitive", input: "SOURCE", want: true}, + {name: "invalid option", input: "invalid", want: false}, + {name: "partial match", input: "file", want: false}, + {name: "empty string", input: "", want: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := ValidDropOption(tt.input) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestApplyDropOptions(t *testing.T) { + coord := file.Coordinates{ + RealPath: "/test/path", + FileSystemID: "layer123", + } + + baseSBOM := func() *sbom.SBOM { + return &sbom.SBOM{ + Source: source.Description{ + ID: "test-id", + Name: "test-source", + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "1.0.0", + }, + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(), + FileMetadata: map[file.Coordinates]file.Metadata{coord: {}}, + FileDigests: map[file.Coordinates][]file.Digest{coord: {{Algorithm: "sha256", Value: "abc"}}}, + FileContents: map[file.Coordinates]string{coord: "contents"}, + FileLicenses: map[file.Coordinates][]file.License{coord: {{Value: "MIT"}}}, + Executables: map[file.Coordinates]file.Executable{coord: {}}, + Unknowns: map[file.Coordinates][]string{coord: {"unknown"}}, + }, + Relationships: []artifact.Relationship{ + { + From: coord, + To: coord, + Type: artifact.EvidentByRelationship, + }, + }, + } + } + + tests := []struct { + name string + opts []DropOption + verify func(t *testing.T, s *sbom.SBOM) + }{ + { + name: "drop source", + opts: []DropOption{DropSource}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Empty(t, s.Source.ID) + assert.Empty(t, s.Source.Name) + // other fields should be preserved + assert.Equal(t, "syft", s.Descriptor.Name) + }, + }, + { + name: "drop descriptor", + opts: []DropOption{DropDescriptor}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Empty(t, s.Descriptor.Name) + assert.Empty(t, s.Descriptor.Version) + // other fields should be preserved + assert.Equal(t, "test-id", s.Source.ID) + }, + }, + { + name: "drop distro", + opts: []DropOption{DropDistro}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.LinuxDistribution) + // other fields should be preserved + assert.Equal(t, "test-id", s.Source.ID) + }, + }, + { + name: "drop file:metadata", + opts: []DropOption{DropFileMetadata}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.FileMetadata) + // other file artifacts should be preserved + assert.NotNil(t, s.Artifacts.FileDigests) + }, + }, + { + name: "drop file:digests", + opts: []DropOption{DropFileDigests}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.FileDigests) + // other file artifacts should be preserved + assert.NotNil(t, s.Artifacts.FileMetadata) + }, + }, + { + name: "drop file:executable", + opts: []DropOption{DropFileExecutable}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.Executables) + }, + }, + { + name: "drop file:unknowns", + opts: []DropOption{DropFileUnknowns}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.Unknowns) + }, + }, + { + name: "drop file:licenses", + opts: []DropOption{DropFileLicenses}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.FileLicenses) + }, + }, + { + name: "drop file:contents", + opts: []DropOption{DropFileContents}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Nil(t, s.Artifacts.FileContents) + }, + }, + { + name: "drop location:fsid clears FileSystemID", + opts: []DropOption{DropLocationFSID}, + verify: func(t *testing.T, s *sbom.SBOM) { + // check that FileSystemID is cleared from file metadata + for coord := range s.Artifacts.FileMetadata { + assert.Empty(t, coord.FileSystemID, "FileSystemID should be empty in FileMetadata") + assert.Equal(t, "/test/path", coord.RealPath) + } + // check relationships + for _, rel := range s.Relationships { + if c, ok := rel.From.(file.Coordinates); ok { + assert.Empty(t, c.FileSystemID, "FileSystemID should be empty in relationship From") + } + if c, ok := rel.To.(file.Coordinates); ok { + assert.Empty(t, c.FileSystemID, "FileSystemID should be empty in relationship To") + } + } + }, + }, + { + name: "drop pkg:metadata.files clears files from ApkDBEntry", + opts: []DropOption{DropPkgMetadataFiles}, + verify: func(t *testing.T, s *sbom.SBOM) { + // first add a package with ApkDBEntry metadata + p := pkg.Package{ + Name: "test-apk", + Version: "1.0.0", + Type: pkg.ApkPkg, + Metadata: pkg.ApkDBEntry{ + Package: "test-apk", + Files: []pkg.ApkFileRecord{ + {Path: "/usr/bin/test"}, + }, + }, + } + s.Artifacts.Packages.Add(p) + + ApplyDropOptions(s, []DropOption{DropPkgMetadataFiles}) + + // verify files were cleared + for p := range s.Artifacts.Packages.Enumerate() { + if p.Name == "test-apk" { + meta, ok := p.Metadata.(pkg.ApkDBEntry) + require.True(t, ok) + assert.Nil(t, meta.Files) + } + } + }, + }, + { + name: "drop pkg:metadata.files clears files from RpmDBEntry", + opts: []DropOption{DropPkgMetadataFiles}, + verify: func(t *testing.T, s *sbom.SBOM) { + p := pkg.Package{ + Name: "test-rpm", + Version: "1.0.0", + Type: pkg.RpmPkg, + Metadata: pkg.RpmDBEntry{ + Name: "test-rpm", + Files: []pkg.RpmFileRecord{ + {Path: "/usr/bin/test"}, + }, + }, + } + s.Artifacts.Packages.Add(p) + + ApplyDropOptions(s, []DropOption{DropPkgMetadataFiles}) + + for p := range s.Artifacts.Packages.Enumerate() { + if p.Name == "test-rpm" { + meta, ok := p.Metadata.(pkg.RpmDBEntry) + require.True(t, ok) + assert.Nil(t, meta.Files) + } + } + }, + }, + { + name: "drop pkg:metadata.files clears files from DpkgDBEntry", + opts: []DropOption{DropPkgMetadataFiles}, + verify: func(t *testing.T, s *sbom.SBOM) { + p := pkg.Package{ + Name: "test-deb", + Version: "1.0.0", + Type: pkg.DebPkg, + Metadata: pkg.DpkgDBEntry{ + Package: "test-deb", + Files: []pkg.DpkgFileRecord{ + {Path: "/usr/bin/test"}, + }, + }, + } + s.Artifacts.Packages.Add(p) + + ApplyDropOptions(s, []DropOption{DropPkgMetadataFiles}) + + for p := range s.Artifacts.Packages.Enumerate() { + if p.Name == "test-deb" { + meta, ok := p.Metadata.(pkg.DpkgDBEntry) + require.True(t, ok) + assert.Nil(t, meta.Files) + } + } + }, + }, + { + name: "drop pkg:metadata.files clears files from PythonPackage", + opts: []DropOption{DropPkgMetadataFiles}, + verify: func(t *testing.T, s *sbom.SBOM) { + p := pkg.Package{ + Name: "test-python", + Version: "1.0.0", + Type: pkg.PythonPkg, + Metadata: pkg.PythonPackage{ + Name: "test-python", + Files: []pkg.PythonFileRecord{ + {Path: "/usr/lib/python/test.py"}, + }, + }, + } + s.Artifacts.Packages.Add(p) + + ApplyDropOptions(s, []DropOption{DropPkgMetadataFiles}) + + for p := range s.Artifacts.Packages.Enumerate() { + if p.Name == "test-python" { + meta, ok := p.Metadata.(pkg.PythonPackage) + require.True(t, ok) + assert.Nil(t, meta.Files) + } + } + }, + }, + { + name: "drop pkg:metadata.files does nothing for non-FileOwner metadata", + opts: []DropOption{DropPkgMetadataFiles}, + verify: func(t *testing.T, s *sbom.SBOM) { + // add a package with metadata that doesn't implement FileOwner + p := pkg.Package{ + Name: "test-npm", + Version: "1.0.0", + Type: pkg.NpmPkg, + Metadata: pkg.NpmPackage{ + Name: "test-npm", + Version: "1.0.0", + }, + } + s.Artifacts.Packages.Add(p) + + ApplyDropOptions(s, []DropOption{DropPkgMetadataFiles}) + + // verify package still exists with metadata intact + for p := range s.Artifacts.Packages.Enumerate() { + if p.Name == "test-npm" { + meta, ok := p.Metadata.(pkg.NpmPackage) + require.True(t, ok) + assert.Equal(t, "test-npm", meta.Name) + } + } + }, + }, + { + name: "nil SBOM does not panic", + opts: []DropOption{DropSource}, + verify: func(t *testing.T, s *sbom.SBOM) { + // nothing to verify, just ensure no panic + }, + }, + { + name: "empty options does nothing", + opts: []DropOption{}, + verify: func(t *testing.T, s *sbom.SBOM) { + assert.Equal(t, "test-id", s.Source.ID) + assert.Equal(t, "syft", s.Descriptor.Name) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var s *sbom.SBOM + if tt.name != "nil SBOM does not panic" { + s = baseSBOM() + } + + // should not panic + require.NotPanics(t, func() { + ApplyDropOptions(s, tt.opts) + }) + + if s != nil { + tt.verify(t, s) + } + }) + } +} + +func TestHasDropLocationFSID(t *testing.T) { + tests := []struct { + name string + opts []DropOption + want bool + }{ + { + name: "has location:fsid", + opts: []DropOption{DropSource, DropLocationFSID, DropDescriptor}, + want: true, + }, + { + name: "no location:fsid", + opts: []DropOption{DropSource, DropDescriptor}, + want: false, + }, + { + name: "empty opts", + opts: []DropOption{}, + want: false, + }, + { + name: "nil opts", + opts: nil, + want: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := HasDropLocationFSID(tt.opts) + assert.Equal(t, tt.want, got) + }) + } +} + +func TestGetJSONFieldsToRemove(t *testing.T) { + tests := []struct { + name string + opts []DropOption + want []string + }{ + { + name: "source only", + opts: []DropOption{DropSource}, + want: []string{"source"}, + }, + { + name: "descriptor only", + opts: []DropOption{DropDescriptor}, + want: []string{"descriptor"}, + }, + { + name: "distro only", + opts: []DropOption{DropDistro}, + want: []string{"distro"}, + }, + { + name: "all three", + opts: []DropOption{DropSource, DropDescriptor, DropDistro}, + want: []string{"source", "descriptor", "distro"}, + }, + { + name: "mixed with file options", + opts: []DropOption{DropSource, DropFileDigests, DropDescriptor, DropFileMetadata}, + want: []string{"source", "descriptor"}, + }, + { + name: "file options only returns empty", + opts: []DropOption{DropFileDigests, DropFileMetadata, DropLocationFSID}, + want: nil, + }, + { + name: "empty opts", + opts: []DropOption{}, + want: nil, + }, + { + name: "nil opts", + opts: nil, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := GetJSONFieldsToRemove(tt.opts) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/cmd/syft/internal/split/matcher.go b/cmd/syft/internal/split/matcher.go new file mode 100644 index 000000000..44a010bd6 --- /dev/null +++ b/cmd/syft/internal/split/matcher.go @@ -0,0 +1,92 @@ +package split + +import ( + "strings" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/pkg" +) + +// MatchPackages finds packages in the collection that match the given queries. +// Match order (first match wins): +// 1. Exact package ID match +// 2. Exact PURL match or PURL prefix match +// 3. Case-insensitive name match +// 4. name@version format match +func MatchPackages(collection *pkg.Collection, queries []string) []pkg.Package { + if collection == nil || len(queries) == 0 { + return nil + } + + // build indexes for efficient lookups + byID := make(map[artifact.ID]pkg.Package) + byPURL := make(map[string]pkg.Package) + byNameLower := make(map[string][]pkg.Package) + byNameVersion := make(map[string]pkg.Package) + + for p := range collection.Enumerate() { + byID[p.ID()] = p + if p.PURL != "" { + byPURL[p.PURL] = p + } + nameLower := strings.ToLower(p.Name) + byNameLower[nameLower] = append(byNameLower[nameLower], p) + nameVersion := strings.ToLower(p.Name + "@" + p.Version) + byNameVersion[nameVersion] = p + } + + // track matched packages to avoid duplicates + matched := make(map[artifact.ID]pkg.Package) + + for _, query := range queries { + // 1. exact package ID match + if p, ok := byID[artifact.ID(query)]; ok { + matched[p.ID()] = p + continue + } + + // 2. exact PURL match + if p, ok := byPURL[query]; ok { + matched[p.ID()] = p + continue + } + + // 2b. PURL prefix match (e.g., "pkg:apk/alpine/musl" matches "pkg:apk/alpine/musl@1.2.2") + if strings.HasPrefix(query, "pkg:") { + for purl, p := range byPURL { + if strings.HasPrefix(purl, query) { + matched[p.ID()] = p + } + } + if len(matched) > 0 { + continue + } + } + + queryLower := strings.ToLower(query) + + // 3. case-insensitive name match + if pkgs, ok := byNameLower[queryLower]; ok { + for _, p := range pkgs { + matched[p.ID()] = p + } + continue + } + + // 4. name@version format match + if p, ok := byNameVersion[queryLower]; ok { + matched[p.ID()] = p + continue + } + } + + // convert map to slice + result := make([]pkg.Package, 0, len(matched)) + for _, p := range matched { + result = append(result, p) + } + + // sort for stable output + pkg.Sort(result) + return result +} diff --git a/cmd/syft/internal/split/matcher_test.go b/cmd/syft/internal/split/matcher_test.go new file mode 100644 index 000000000..54911209a --- /dev/null +++ b/cmd/syft/internal/split/matcher_test.go @@ -0,0 +1,124 @@ +package split + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/pkg" +) + +func TestMatchPackages(t *testing.T) { + // create test packages + pkgA := pkg.Package{ + Name: "alpine-baselayout", + Version: "3.2.0-r7", + PURL: "pkg:apk/alpine/alpine-baselayout@3.2.0-r7", + } + pkgA.SetID() + + pkgB := pkg.Package{ + Name: "musl", + Version: "1.2.2-r0", + PURL: "pkg:apk/alpine/musl@1.2.2-r0", + } + pkgB.SetID() + + pkgC := pkg.Package{ + Name: "Musl", // different case + Version: "2.0.0", + PURL: "pkg:apk/alpine/Musl@2.0.0", + } + pkgC.SetID() + + collection := pkg.NewCollection(pkgA, pkgB, pkgC) + + tests := []struct { + name string + queries []string + want []pkg.Package + }{ + { + name: "match by exact package ID", + queries: []string{string(pkgA.ID())}, + want: []pkg.Package{pkgA}, + }, + { + name: "match by exact PURL", + queries: []string{"pkg:apk/alpine/musl@1.2.2-r0"}, + want: []pkg.Package{pkgB}, + }, + { + name: "match by PURL prefix", + queries: []string{"pkg:apk/alpine/musl"}, + want: []pkg.Package{pkgB}, + }, + { + name: "match by case-insensitive name", + queries: []string{"musl"}, + want: []pkg.Package{pkgB, pkgC}, + }, + { + name: "match by name@version", + queries: []string{"musl@1.2.2-r0"}, + want: []pkg.Package{pkgB}, + }, + { + name: "multiple queries", + queries: []string{"alpine-baselayout", "musl@1.2.2-r0"}, + want: []pkg.Package{pkgA, pkgB}, + }, + { + name: "no match", + queries: []string{"nonexistent"}, + want: []pkg.Package{}, + }, + { + name: "empty queries", + queries: []string{}, + want: nil, + }, + { + name: "nil collection returns nil", + queries: []string{"musl"}, + want: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + coll := collection + if tt.name == "nil collection returns nil" { + coll = nil + } + + got := MatchPackages(coll, tt.queries) + + if tt.want == nil { + require.Nil(t, got) + return + } + + require.Len(t, got, len(tt.want)) + + if len(tt.want) == 0 { + return + } + + // sort both for comparison + pkg.Sort(tt.want) + + // compare using cmp.Diff, ignoring unexported fields + opts := []cmp.Option{ + cmpopts.IgnoreUnexported(pkg.Package{}), + cmpopts.IgnoreFields(pkg.Package{}, "Locations", "Licenses", "CPEs"), + } + + if diff := cmp.Diff(tt.want, got, opts...); diff != "" { + t.Errorf("MatchPackages() mismatch (-want +got):\n%s", diff) + } + }) + } +} diff --git a/cmd/syft/internal/split/relationship_index.go b/cmd/syft/internal/split/relationship_index.go new file mode 100644 index 000000000..2d9fbffb7 --- /dev/null +++ b/cmd/syft/internal/split/relationship_index.go @@ -0,0 +1,158 @@ +package split + +import ( + "slices" + "strings" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" +) + +// relationshipIndex indexes relationships for efficient lookup by from/to IDs. +// This is a simplified version tailored for the split functionality. +type relationshipIndex struct { + relationships []*sortableRelationship + fromID map[artifact.ID]*mappedRelationships + toID map[artifact.ID]*mappedRelationships +} + +// newRelationshipIndex creates a new relationship index from the given relationships +func newRelationshipIndex(relationships ...artifact.Relationship) *relationshipIndex { + idx := &relationshipIndex{ + fromID: make(map[artifact.ID]*mappedRelationships), + toID: make(map[artifact.ID]*mappedRelationships), + } + + for _, r := range relationships { + // prevent duplicates + if idx.contains(r) { + continue + } + + fromID := r.From.ID() + toID := r.To.ID() + + sr := &sortableRelationship{ + from: fromID, + to: toID, + relationship: r, + } + + idx.relationships = append(idx.relationships, sr) + + // add from -> to mapping + if idx.fromID[fromID] == nil { + idx.fromID[fromID] = &mappedRelationships{} + } + idx.fromID[fromID].add(toID, sr) + + // add to -> from mapping + if idx.toID[toID] == nil { + idx.toID[toID] = &mappedRelationships{} + } + idx.toID[toID].add(fromID, sr) + } + + return idx +} + +// from returns all relationships from the given identifiable, filtered by types +func (i *relationshipIndex) from(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return toSortedSlice(fromMapped(i.fromID, identifiable), types) +} + +// to returns all relationships to the given identifiable, filtered by types +func (i *relationshipIndex) to(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []artifact.Relationship { + return toSortedSlice(fromMapped(i.toID, identifiable), types) +} + +// coordinates returns all file coordinates referenced by relationships for the given identifiable +func (i *relationshipIndex) coordinates(identifiable artifact.Identifiable, types ...artifact.RelationshipType) []file.Coordinates { + // get relationships in both directions + rels := append(fromMapped(i.fromID, identifiable), fromMapped(i.toID, identifiable)...) + sorted := toSortedSlice(rels, types) + + var coords []file.Coordinates + for _, rel := range sorted { + if c, ok := rel.From.(file.Coordinates); ok { + coords = append(coords, c) + } + if c, ok := rel.To.(file.Coordinates); ok { + coords = append(coords, c) + } + } + return coords +} + +// all returns all relationships, optionally filtered by types +func (i *relationshipIndex) all(types ...artifact.RelationshipType) []artifact.Relationship { + return toSortedSlice(i.relationships, types) +} + +// contains checks if the relationship is already in the index +func (i *relationshipIndex) contains(r artifact.Relationship) bool { + mapped := i.fromID[r.From.ID()] + if mapped == nil { + return false + } + typeMap := mapped.typeMap[r.Type] + if typeMap == nil { + return false + } + return typeMap[r.To.ID()] != nil +} + +type mappedRelationships struct { + typeMap map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship + allRelated []*sortableRelationship +} + +func (m *mappedRelationships) add(id artifact.ID, sr *sortableRelationship) { + m.allRelated = append(m.allRelated, sr) + if m.typeMap == nil { + m.typeMap = make(map[artifact.RelationshipType]map[artifact.ID]*sortableRelationship) + } + if m.typeMap[sr.relationship.Type] == nil { + m.typeMap[sr.relationship.Type] = make(map[artifact.ID]*sortableRelationship) + } + m.typeMap[sr.relationship.Type][id] = sr +} + +type sortableRelationship struct { + from artifact.ID + to artifact.ID + relationship artifact.Relationship +} + +func fromMapped(idMap map[artifact.ID]*mappedRelationships, identifiable artifact.Identifiable) []*sortableRelationship { + if identifiable == nil { + return nil + } + mapped := idMap[identifiable.ID()] + if mapped == nil { + return nil + } + return mapped.allRelated +} + +func toSortedSlice(relationships []*sortableRelationship, types []artifact.RelationshipType) []artifact.Relationship { + slices.SortFunc(relationships, func(a, b *sortableRelationship) int { + cmp := strings.Compare(string(a.relationship.Type), string(b.relationship.Type)) + if cmp != 0 { + return cmp + } + cmp = strings.Compare(string(a.from), string(b.from)) + if cmp != 0 { + return cmp + } + return strings.Compare(string(a.to), string(b.to)) + }) + + var out []artifact.Relationship + for _, r := range relationships { + if len(types) == 0 || slices.Contains(types, r.relationship.Type) { + out = append(out, r.relationship) + } + } + return out +} diff --git a/cmd/syft/internal/split/splitter.go b/cmd/syft/internal/split/splitter.go new file mode 100644 index 000000000..0615e229e --- /dev/null +++ b/cmd/syft/internal/split/splitter.go @@ -0,0 +1,314 @@ +package split + +import ( + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" +) + +// allowedRelationshipTypes are the only relationship types allowed in split output +var allowedRelationshipTypes = []artifact.RelationshipType{ + artifact.OwnershipByFileOverlapRelationship, + artifact.EvidentByRelationship, +} + +// Result represents the result of splitting an SBOM for a single target package +type Result struct { + TargetPackage pkg.Package + SBOM sbom.SBOM +} + +// Split splits the source SBOM into separate SBOMs, one for each target package. +// Each output SBOM contains the target package, its connected packages (via ownership-by-file-overlap +// and evident-by relationships), and all related files. +func Split(source sbom.SBOM, targetPackages []pkg.Package, dropLocationFSID, dropNonPrimaryEvidence bool) []Result { + if len(targetPackages) == 0 { + return nil + } + + relIndex := newRelationshipIndex(source.Relationships...) + results := make([]Result, 0, len(targetPackages)) + + for _, target := range targetPackages { + result := splitForPackage(source, target, relIndex, dropLocationFSID, dropNonPrimaryEvidence) + results = append(results, result) + } + + return results +} + +// splitForPackage creates a new SBOM containing only the target package and its related artifacts +func splitForPackage(source sbom.SBOM, target pkg.Package, relIndex *relationshipIndex, dropLocationFSID, dropNonPrimaryEvidence bool) Result { + // find all connected packages via BFS + connectedPkgs := findConnectedPackages(source, target, relIndex) + + // collect all kept package IDs for relationship filtering + keptPkgIDs := make(map[artifact.ID]bool) + for _, p := range connectedPkgs { + keptPkgIDs[p.ID()] = true + } + + // find related file coordinates from relationships and package locations + keptCoords := collectFileCoordinates(connectedPkgs, relIndex, dropLocationFSID, dropNonPrimaryEvidence) + + // build filtered SBOM + filteredSBOM := buildFilteredSBOM(source, connectedPkgs, keptPkgIDs, keptCoords, relIndex, dropLocationFSID, dropNonPrimaryEvidence) + + return Result{ + TargetPackage: target, + SBOM: filteredSBOM, + } +} + +// findConnectedPackages uses BFS to find all packages connected to the target via traversal relationships +func findConnectedPackages(source sbom.SBOM, target pkg.Package, relIndex *relationshipIndex) []pkg.Package { + visited := make(map[artifact.ID]bool) + var result []pkg.Package + + queue := []pkg.Package{target} + visited[target.ID()] = true + + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + result = append(result, current) + + // find connected packages through relationships (both directions) + for _, rel := range relIndex.from(current, allowedRelationshipTypes...) { + toID := rel.To.ID() + if visited[toID] { + continue + } + // check if the target is a package in the source collection + if p := source.Artifacts.Packages.Package(toID); p != nil { + visited[toID] = true + queue = append(queue, *p) + } + } + + for _, rel := range relIndex.to(current, allowedRelationshipTypes...) { + fromID := rel.From.ID() + if visited[fromID] { + continue + } + // check if the source is a package in the source collection + if p := source.Artifacts.Packages.Package(fromID); p != nil { + visited[fromID] = true + queue = append(queue, *p) + } + } + } + + return result +} + +// collectFileCoordinates gathers all file coordinates related to the kept packages +func collectFileCoordinates(packages []pkg.Package, relIndex *relationshipIndex, dropLocationFSID, dropNonPrimaryEvidence bool) map[file.Coordinates]bool { + coords := make(map[file.Coordinates]bool) + + for _, p := range packages { + // collect coordinates from package locations + for _, loc := range p.Locations.ToSlice() { + // skip non-primary evidence locations if requested + if dropNonPrimaryEvidence { + if loc.Annotations == nil || loc.Annotations["evidence"] != "primary" { + continue + } + } + coord := loc.Coordinates + if dropLocationFSID { + coord = file.Coordinates{RealPath: coord.RealPath} + } + coords[coord] = true + } + + // collect coordinates from allowed relationship types only + for _, c := range relIndex.coordinates(p, allowedRelationshipTypes...) { + coord := c + if dropLocationFSID { + coord = file.Coordinates{RealPath: c.RealPath} + } + coords[coord] = true + } + } + + return coords +} + +// buildFilteredSBOM creates a new SBOM with only the kept packages, files, and relationships +func buildFilteredSBOM(source sbom.SBOM, packages []pkg.Package, keptPkgIDs map[artifact.ID]bool, keptCoords map[file.Coordinates]bool, relIndex *relationshipIndex, dropLocationFSID, dropNonPrimaryEvidence bool) sbom.SBOM { + // create new package collection + newPkgCollection := pkg.NewCollection() + for _, p := range packages { + // filter non-primary evidence locations if requested + if dropNonPrimaryEvidence { + p = filterPackageNonPrimaryLocations(p) + } + // if dropLocationFSID is enabled, we need to clear FileSystemID from package locations + if dropLocationFSID { + p = clearPackageFileSystemIDs(p) + } + newPkgCollection.Add(p) + } + + // filter file artifacts + filteredSBOM := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: newPkgCollection, + FileMetadata: filterFileMap(source.Artifacts.FileMetadata, keptCoords, dropLocationFSID), + FileDigests: filterFileMap(source.Artifacts.FileDigests, keptCoords, dropLocationFSID), + FileContents: filterFileMap(source.Artifacts.FileContents, keptCoords, dropLocationFSID), + FileLicenses: filterFileMap(source.Artifacts.FileLicenses, keptCoords, dropLocationFSID), + Executables: filterFileMap(source.Artifacts.Executables, keptCoords, dropLocationFSID), + Unknowns: filterFileMap(source.Artifacts.Unknowns, keptCoords, dropLocationFSID), + LinuxDistribution: source.Artifacts.LinuxDistribution, + }, + Relationships: filterRelationships(relIndex.all(), keptPkgIDs, keptCoords, dropLocationFSID), + Source: source.Source, + Descriptor: source.Descriptor, + } + + return filteredSBOM +} + +// filterPackageNonPrimaryLocations creates a copy of the package with only primary evidence locations +func filterPackageNonPrimaryLocations(p pkg.Package) pkg.Package { + newLocations := file.NewLocationSet() + for _, loc := range p.Locations.ToSlice() { + if loc.Annotations != nil && loc.Annotations["evidence"] == "primary" { + newLocations.Add(loc) + } + } + p.Locations = newLocations + return p +} + +// filterFileMap filters a map of file.Coordinates to only include kept coordinates +func filterFileMap[T any](m map[file.Coordinates]T, keptCoords map[file.Coordinates]bool, dropLocationFSID bool) map[file.Coordinates]T { + if m == nil { + return nil + } + + result := make(map[file.Coordinates]T) + for coord, value := range m { + checkCoord := coord + if dropLocationFSID { + checkCoord = file.Coordinates{RealPath: coord.RealPath} + } + if keptCoords[checkCoord] { + outputCoord := coord + if dropLocationFSID { + outputCoord = file.Coordinates{RealPath: coord.RealPath} + } + result[outputCoord] = value + } + } + + if len(result) == 0 { + return nil + } + return result +} + +// filterRelationships filters relationships to only include allowed types referencing kept artifacts +func filterRelationships(relationships []artifact.Relationship, keptPkgIDs map[artifact.ID]bool, keptCoords map[file.Coordinates]bool, dropLocationFSID bool) []artifact.Relationship { + var result []artifact.Relationship + + for _, rel := range relationships { + // only keep allowed relationship types + if !isAllowedRelationshipType(rel.Type) { + continue + } + + // check if both ends of the relationship reference kept artifacts + fromKept := isArtifactKept(rel.From, keptPkgIDs, keptCoords, dropLocationFSID) + toKept := isArtifactKept(rel.To, keptPkgIDs, keptCoords, dropLocationFSID) + + if fromKept && toKept { + newRel := rel + if dropLocationFSID { + if coord, ok := rel.From.(file.Coordinates); ok { + newRel.From = file.Coordinates{RealPath: coord.RealPath} + } + if coord, ok := rel.To.(file.Coordinates); ok { + newRel.To = file.Coordinates{RealPath: coord.RealPath} + } + } + result = append(result, newRel) + } + } + + return result +} + +// isAllowedRelationshipType checks if a relationship type is in the allowed list +func isAllowedRelationshipType(t artifact.RelationshipType) bool { + for _, allowed := range allowedRelationshipTypes { + if t == allowed { + return true + } + } + return false +} + +// isArtifactKept checks if an artifact (package or file) is in the kept set +func isArtifactKept(a artifact.Identifiable, keptPkgIDs map[artifact.ID]bool, keptCoords map[file.Coordinates]bool, dropLocationFSID bool) bool { + if a == nil { + return false + } + + // check if it's a file coordinate + if coord, ok := a.(file.Coordinates); ok { + checkCoord := coord + if dropLocationFSID { + checkCoord = file.Coordinates{RealPath: coord.RealPath} + } + return keptCoords[checkCoord] + } + + // otherwise check package ID + return keptPkgIDs[a.ID()] +} + +// clearPackageFileSystemIDs creates a copy of the package with FileSystemID cleared from all locations +func clearPackageFileSystemIDs(p pkg.Package) pkg.Package { + newLocations := file.NewLocationSet() + for _, loc := range p.Locations.ToSlice() { + newLoc := file.Location{ + LocationData: file.LocationData{ + Coordinates: file.Coordinates{ + RealPath: loc.RealPath, + // FileSystemID intentionally left empty + }, + AccessPath: loc.AccessPath, + }, + LocationMetadata: loc.LocationMetadata, + } + newLocations.Add(newLoc) + } + p.Locations = newLocations + + // also clear from license locations + newLicenses := pkg.NewLicenseSet() + for _, lic := range p.Licenses.ToSlice() { + newLicLocs := file.NewLocationSet() + for _, loc := range lic.Locations.ToSlice() { + newLoc := file.Location{ + LocationData: file.LocationData{ + Coordinates: file.Coordinates{ + RealPath: loc.RealPath, + }, + AccessPath: loc.AccessPath, + }, + LocationMetadata: loc.LocationMetadata, + } + newLicLocs.Add(newLoc) + } + lic.Locations = newLicLocs + newLicenses.Add(lic) + } + p.Licenses = newLicenses + + return p +} diff --git a/cmd/syft/internal/split/splitter_test.go b/cmd/syft/internal/split/splitter_test.go new file mode 100644 index 000000000..3cec5d6fd --- /dev/null +++ b/cmd/syft/internal/split/splitter_test.go @@ -0,0 +1,606 @@ +package split + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func TestSplit(t *testing.T) { + // create test coordinates + coord1 := file.Coordinates{RealPath: "/lib/apk/db/installed", FileSystemID: "layer1"} + coord2 := file.Coordinates{RealPath: "/usr/lib/libmusl.so", FileSystemID: "layer1"} + coord3 := file.Coordinates{RealPath: "/unrelated/file", FileSystemID: "layer1"} + + // create test packages + pkgA := pkg.Package{ + Name: "alpine-baselayout", + Version: "3.2.0-r7", + Type: pkg.ApkPkg, + Locations: file.NewLocationSet(file.NewLocationFromCoordinates(coord1)), + } + pkgA.SetID() + + pkgB := pkg.Package{ + Name: "musl", + Version: "1.2.2-r0", + Type: pkg.ApkPkg, + Locations: file.NewLocationSet(file.NewLocationFromCoordinates(coord2)), + } + pkgB.SetID() + + pkgC := pkg.Package{ + Name: "unrelated", + Version: "1.0.0", + Type: pkg.ApkPkg, + Locations: file.NewLocationSet(file.NewLocationFromCoordinates(coord3)), + } + pkgC.SetID() + + // create source SBOM with relationships + sourceSBOM := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkgA, pkgB, pkgC), + FileMetadata: map[file.Coordinates]file.Metadata{ + coord1: {MIMEType: "text/plain"}, + coord2: {MIMEType: "application/x-sharedlib"}, + coord3: {MIMEType: "text/plain"}, + }, + FileDigests: map[file.Coordinates][]file.Digest{ + coord1: {{Algorithm: "sha256", Value: "abc123"}}, + coord2: {{Algorithm: "sha256", Value: "def456"}}, + coord3: {{Algorithm: "sha256", Value: "ghi789"}}, + }, + LinuxDistribution: &linux.Release{ID: "alpine", VersionID: "3.12"}, + }, + Relationships: []artifact.Relationship{ + // pkgA owns pkgB via file overlap + { + From: pkgA, + To: pkgB, + Type: artifact.OwnershipByFileOverlapRelationship, + }, + // pkgA is evident by coord1 + { + From: pkgA, + To: coord1, + Type: artifact.EvidentByRelationship, + }, + // pkgB is evident by coord2 + { + From: pkgB, + To: coord2, + Type: artifact.EvidentByRelationship, + }, + // pkgC is evident by coord3 (unrelated) + { + From: pkgC, + To: coord3, + Type: artifact.EvidentByRelationship, + }, + }, + Source: source.Description{ + ID: "test-source-id", + Name: "test-image", + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "test", + }, + } + + tests := []struct { + name string + targetPackages []pkg.Package + dropLocationFSID bool + dropNonPrimaryEvidence bool + wantCount int + verify func(t *testing.T, results []Result) + }{ + { + name: "split single package with connected packages", + targetPackages: []pkg.Package{pkgA}, + dropLocationFSID: false, + wantCount: 1, + verify: func(t *testing.T, results []Result) { + require.Len(t, results, 1) + result := results[0] + + // target package should be pkgA + assert.Equal(t, pkgA.Name, result.TargetPackage.Name) + + // should include both pkgA and pkgB (connected via ownership) + assert.Equal(t, 2, result.SBOM.Artifacts.Packages.PackageCount()) + + // should include coord1 and coord2 (related to pkgA and pkgB) + assert.Contains(t, result.SBOM.Artifacts.FileMetadata, coord1) + assert.Contains(t, result.SBOM.Artifacts.FileMetadata, coord2) + + // should NOT include coord3 (unrelated) + assert.NotContains(t, result.SBOM.Artifacts.FileMetadata, coord3) + + // source and descriptor should be preserved + assert.Equal(t, "test-source-id", result.SBOM.Source.ID) + assert.Equal(t, "syft", result.SBOM.Descriptor.Name) + + // linux distribution should be preserved + require.NotNil(t, result.SBOM.Artifacts.LinuxDistribution) + assert.Equal(t, "alpine", result.SBOM.Artifacts.LinuxDistribution.ID) + }, + }, + { + name: "split unrelated package", + targetPackages: []pkg.Package{pkgC}, + dropLocationFSID: false, + wantCount: 1, + verify: func(t *testing.T, results []Result) { + require.Len(t, results, 1) + result := results[0] + + // should only include pkgC + assert.Equal(t, 1, result.SBOM.Artifacts.Packages.PackageCount()) + + // should only include coord3 + assert.Contains(t, result.SBOM.Artifacts.FileMetadata, coord3) + assert.NotContains(t, result.SBOM.Artifacts.FileMetadata, coord1) + assert.NotContains(t, result.SBOM.Artifacts.FileMetadata, coord2) + }, + }, + { + name: "split with dropLocationFSID", + targetPackages: []pkg.Package{pkgA}, + dropLocationFSID: true, + wantCount: 1, + verify: func(t *testing.T, results []Result) { + require.Len(t, results, 1) + result := results[0] + + // FileSystemID should be cleared from coordinates + for coord := range result.SBOM.Artifacts.FileMetadata { + assert.Empty(t, coord.FileSystemID, "FileSystemID should be empty") + } + + // package locations should also have FileSystemID cleared + for p := range result.SBOM.Artifacts.Packages.Enumerate() { + for _, loc := range p.Locations.ToSlice() { + assert.Empty(t, loc.FileSystemID, "package location FileSystemID should be empty") + } + } + }, + }, + { + name: "split multiple packages", + targetPackages: []pkg.Package{pkgA, pkgC}, + dropLocationFSID: false, + wantCount: 2, + verify: func(t *testing.T, results []Result) { + require.Len(t, results, 2) + + // first result should be for pkgA + assert.Equal(t, pkgA.Name, results[0].TargetPackage.Name) + + // second result should be for pkgC + assert.Equal(t, pkgC.Name, results[1].TargetPackage.Name) + }, + }, + { + name: "empty target packages", + targetPackages: []pkg.Package{}, + dropLocationFSID: false, + wantCount: 0, + verify: func(t *testing.T, results []Result) { + assert.Nil(t, results) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results := Split(sourceSBOM, tt.targetPackages, tt.dropLocationFSID, tt.dropNonPrimaryEvidence) + + if tt.wantCount == 0 { + assert.Nil(t, results) + } else { + require.Len(t, results, tt.wantCount) + } + + if tt.verify != nil { + tt.verify(t, results) + } + }) + } +} + +func TestFindConnectedPackages(t *testing.T) { + // create test packages + pkgA := pkg.Package{Name: "a", Version: "1.0"} + pkgA.SetID() + + pkgB := pkg.Package{Name: "b", Version: "1.0"} + pkgB.SetID() + + pkgC := pkg.Package{Name: "c", Version: "1.0"} + pkgC.SetID() + + pkgD := pkg.Package{Name: "d", Version: "1.0"} + pkgD.SetID() + + tests := []struct { + name string + target pkg.Package + relationships []artifact.Relationship + wantNames []string + }{ + { + name: "single package no relationships", + target: pkgA, + relationships: []artifact.Relationship{ + // pkgB -> pkgC (unrelated) + {From: pkgB, To: pkgC, Type: artifact.OwnershipByFileOverlapRelationship}, + }, + wantNames: []string{"a"}, + }, + { + name: "connected via ownership-by-file-overlap", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.OwnershipByFileOverlapRelationship}, + }, + wantNames: []string{"a", "b"}, + }, + { + name: "connected via evident-by (to package)", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.EvidentByRelationship}, + }, + wantNames: []string{"a", "b"}, + }, + { + name: "chain of connections", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.OwnershipByFileOverlapRelationship}, + {From: pkgB, To: pkgC, Type: artifact.OwnershipByFileOverlapRelationship}, + }, + wantNames: []string{"a", "b", "c"}, + }, + { + name: "circular relationships", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.OwnershipByFileOverlapRelationship}, + {From: pkgB, To: pkgC, Type: artifact.OwnershipByFileOverlapRelationship}, + {From: pkgC, To: pkgA, Type: artifact.OwnershipByFileOverlapRelationship}, + }, + wantNames: []string{"a", "b", "c"}, + }, + { + name: "ignores contains relationship", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.ContainsRelationship}, + }, + wantNames: []string{"a"}, + }, + { + name: "ignores dependency-of relationship", + target: pkgA, + relationships: []artifact.Relationship{ + {From: pkgA, To: pkgB, Type: artifact.DependencyOfRelationship}, + }, + wantNames: []string{"a"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sourceSBOM := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(pkgA, pkgB, pkgC, pkgD), + }, + Relationships: tt.relationships, + } + + relIndex := newRelationshipIndex(sourceSBOM.Relationships...) + got := findConnectedPackages(sourceSBOM, tt.target, relIndex) + + gotNames := make([]string, len(got)) + for i, p := range got { + gotNames[i] = p.Name + } + + // sort for comparison + assert.ElementsMatch(t, tt.wantNames, gotNames) + }) + } +} + +func TestClearPackageFileSystemIDs(t *testing.T) { + coord := file.Coordinates{RealPath: "/test/path", FileSystemID: "layer123"} + loc := file.NewLocationFromCoordinates(coord) + loc.AccessPath = "/test/access" + + licLoc := file.NewLocationFromCoordinates(coord) + lic := pkg.License{ + Value: "MIT", + Locations: file.NewLocationSet(licLoc), + } + + p := pkg.Package{ + Name: "test", + Version: "1.0", + Locations: file.NewLocationSet(loc), + Licenses: pkg.NewLicenseSet(lic), + } + + result := clearPackageFileSystemIDs(p) + + // check package locations + for _, l := range result.Locations.ToSlice() { + assert.Empty(t, l.FileSystemID, "package location FileSystemID should be empty") + assert.Equal(t, "/test/path", l.RealPath) + assert.Equal(t, "/test/access", l.AccessPath) + } + + // check license locations + for _, l := range result.Licenses.ToSlice() { + for _, ll := range l.Locations.ToSlice() { + assert.Empty(t, ll.FileSystemID, "license location FileSystemID should be empty") + } + } +} + +func TestPackageIDStabilityThroughSplit(t *testing.T) { + // this test verifies that package IDs remain stable through all split transformations, + // including dropping licenses, filtering locations, and clearing filesystem IDs. + // Package IDs are content-addressable and should NOT change when we modify packages + // during the split process. + + // create a package with multiple locations (primary and non-primary evidence) + primaryLoc := file.NewLocation("/lib/apk/db/installed") + primaryLoc.Annotations = map[string]string{"evidence": "primary"} + + nonPrimaryLoc := file.NewLocation("/some/other/path") + nonPrimaryLoc.Annotations = map[string]string{"evidence": "supporting"} + + // add filesystem IDs to both + primaryLoc.Coordinates.FileSystemID = "layer123" + nonPrimaryLoc.Coordinates.FileSystemID = "layer456" + + // create license with location + licLoc := file.NewLocation("/lib/apk/db/installed") + licLoc.Coordinates.FileSystemID = "layer123" + lic := pkg.License{ + Value: "MIT", + Locations: file.NewLocationSet(licLoc), + } + + testPkg := pkg.Package{ + Name: "test-package", + Version: "1.0.0", + Type: pkg.ApkPkg, + Locations: file.NewLocationSet(primaryLoc, nonPrimaryLoc), + Licenses: pkg.NewLicenseSet(lic), + } + testPkg.SetID() + originalID := testPkg.ID() + + // verify original ID is set + require.NotEmpty(t, originalID, "original package ID should be set") + + // create a simple SBOM for the split operation + sourceSBOM := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(testPkg), + FileMetadata: map[file.Coordinates]file.Metadata{ + primaryLoc.Coordinates: {MIMEType: "text/plain"}, + nonPrimaryLoc.Coordinates: {MIMEType: "text/plain"}, + }, + }, + Relationships: []artifact.Relationship{ + { + From: testPkg, + To: primaryLoc.Coordinates, + Type: artifact.EvidentByRelationship, + }, + }, + Source: source.Description{ + ID: "test-source", + Name: "test", + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "test", + }, + } + + tests := []struct { + name string + dropLocationFSID bool + dropNonPrimaryEvidence bool + }{ + { + name: "no drop options", + dropLocationFSID: false, + dropNonPrimaryEvidence: false, + }, + { + name: "drop location:fsid only", + dropLocationFSID: true, + dropNonPrimaryEvidence: false, + }, + { + name: "drop location:non-primary-evidence only", + dropLocationFSID: false, + dropNonPrimaryEvidence: true, + }, + { + name: "drop both location options", + dropLocationFSID: true, + dropNonPrimaryEvidence: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + results := Split(sourceSBOM, []pkg.Package{testPkg}, tt.dropLocationFSID, tt.dropNonPrimaryEvidence) + require.Len(t, results, 1) + + resultSBOM := results[0].SBOM + + // verify there's exactly one package in the result + require.Equal(t, 1, resultSBOM.Artifacts.Packages.PackageCount()) + + // get the package from the result + var resultPkg *pkg.Package + for p := range resultSBOM.Artifacts.Packages.Enumerate() { + resultPkg = &p + break + } + require.NotNil(t, resultPkg) + + // THE CRITICAL ASSERTION: package ID must remain stable + assert.Equal(t, originalID, resultPkg.ID(), + "package ID changed after split with dropLocationFSID=%v, dropNonPrimaryEvidence=%v", + tt.dropLocationFSID, tt.dropNonPrimaryEvidence) + + // verify the target package ID also matches + assert.Equal(t, originalID, results[0].TargetPackage.ID(), + "target package ID changed") + + // additional verification: if we dropped non-primary evidence, locations should be filtered + if tt.dropNonPrimaryEvidence { + locs := resultPkg.Locations.ToSlice() + for _, loc := range locs { + assert.Equal(t, "primary", loc.Annotations["evidence"], + "non-primary locations should be filtered out") + } + } + + // additional verification: if we dropped fsid, it should be empty + if tt.dropLocationFSID { + for _, loc := range resultPkg.Locations.ToSlice() { + assert.Empty(t, loc.FileSystemID, "FileSystemID should be cleared") + } + } + }) + } +} + +func TestPackageIDStabilityWithDropOptions(t *testing.T) { + // this test verifies that package IDs remain stable when using ApplyDropOptions + // which includes pkg:licenses + + coord := file.Coordinates{RealPath: "/test/path", FileSystemID: "layer123"} + loc := file.NewLocationFromCoordinates(coord) + loc.Annotations = map[string]string{"evidence": "primary"} + + lic := pkg.License{ + Value: "GPL-2.0", + Locations: file.NewLocationSet(loc), + } + + testPkg := pkg.Package{ + Name: "license-test-pkg", + Version: "2.0.0", + Type: pkg.ApkPkg, + Locations: file.NewLocationSet(loc), + Licenses: pkg.NewLicenseSet(lic), + Metadata: pkg.ApkDBEntry{ + Package: "license-test-pkg", + Files: []pkg.ApkFileRecord{ + {Path: "/usr/bin/test"}, + }, + }, + } + testPkg.SetID() + originalID := testPkg.ID() + + require.NotEmpty(t, originalID) + + // create SBOM + testSBOM := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(testPkg), + }, + Source: source.Description{ + ID: "test", + Name: "test", + }, + Descriptor: sbom.Descriptor{ + Name: "syft", + Version: "test", + }, + } + + tests := []struct { + name string + opts []DropOption + }{ + { + name: "drop pkg:licenses", + opts: []DropOption{DropPkgLicenses}, + }, + { + name: "drop location:fsid via ApplyDropOptions", + opts: []DropOption{DropLocationFSID}, + }, + { + name: "drop location:non-primary-evidence via ApplyDropOptions", + opts: []DropOption{DropLocationNonPrimaryEvidence}, + }, + { + name: "drop all location and pkg options", + opts: []DropOption{DropPkgLicenses, DropLocationFSID, DropLocationNonPrimaryEvidence}, + }, + { + name: "drop pkg:metadata.files", + opts: []DropOption{DropPkgMetadataFiles}, + }, + { + name: "drop all pkg options", + opts: []DropOption{DropPkgLicenses, DropPkgMetadataFiles}, + }, + { + name: "drop source and descriptor (should not affect package ID)", + opts: []DropOption{DropSource, DropDescriptor, DropDistro}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // make a copy of the SBOM for this test + sbomCopy := sbom.SBOM{ + Artifacts: sbom.Artifacts{ + Packages: pkg.NewCollection(testPkg), + }, + Source: testSBOM.Source, + Descriptor: testSBOM.Descriptor, + } + + // apply drop options + ApplyDropOptions(&sbomCopy, tt.opts) + + // get the package from the modified SBOM + var resultPkg *pkg.Package + for p := range sbomCopy.Artifacts.Packages.Enumerate() { + resultPkg = &p + break + } + require.NotNil(t, resultPkg) + + // THE CRITICAL ASSERTION: package ID must remain stable + assert.Equal(t, originalID, resultPkg.ID(), + "package ID changed after applying drop options: %v", tt.opts) + }) + } +}