diff --git a/internal/file/archive_aliases.go b/internal/file/archive_aliases.go new file mode 100644 index 000000000..8f6a9d6b6 --- /dev/null +++ b/internal/file/archive_aliases.go @@ -0,0 +1,28 @@ +package file + +import ( + "path/filepath" + "strings" +) + +// HandleCompoundArchiveAliases normalizes archive file paths that use compound extension +// aliases (like .tgz) to their full forms (like .tar.gz) for correct identification +// by the mholt/archives library. +// +// See: https://github.com/anchore/syft/issues/4416 +// Reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats +func HandleCompoundArchiveAliases(path string) string { + extMap := map[string]string{ + ".tgz": ".tar.gz", + ".tbz2": ".tar.bz2", + ".txz": ".tar.xz", + ".tlz": ".tar.lz", + ".tzst": ".tar.zst", + } + + ext := filepath.Ext(path) + if newExt, ok := extMap[ext]; ok { + return strings.TrimSuffix(path, ext) + newExt + } + return path +} diff --git a/internal/file/archive_aliases_test.go b/internal/file/archive_aliases_test.go new file mode 100644 index 000000000..2619cb060 --- /dev/null +++ b/internal/file/archive_aliases_test.go @@ -0,0 +1,73 @@ +package file + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHandleCompoundArchiveAliases(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "tgz to tar.gz", + input: "/path/to/archive.tgz", + expected: "/path/to/archive.tar.gz", + }, + { + name: "tbz2 to tar.bz2", + input: "/path/to/archive.tbz2", + expected: "/path/to/archive.tar.bz2", + }, + { + name: "txz to tar.xz", + input: "/path/to/archive.txz", + expected: "/path/to/archive.tar.xz", + }, + { + name: "tlz to tar.lz", + input: "/path/to/archive.tlz", + expected: "/path/to/archive.tar.lz", + }, + { + name: "tzst to tar.zst", + input: "/path/to/archive.tzst", + expected: "/path/to/archive.tar.zst", + }, + { + name: "standard tar.gz unchanged", + input: "/path/to/archive.tar.gz", + expected: "/path/to/archive.tar.gz", + }, + { + name: "zip unchanged", + input: "/path/to/archive.zip", + expected: "/path/to/archive.zip", + }, + { + name: "no extension unchanged", + input: "/path/to/archive", + expected: "/path/to/archive", + }, + { + name: "case sensitive - TGZ not matched", + input: "/path/to/archive.TGZ", + expected: "/path/to/archive.TGZ", + }, + { + name: "just filename with tgz", + input: "archive.tgz", + expected: "archive.tar.gz", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := HandleCompoundArchiveAliases(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go index c3511a1cc..8046ce00e 100644 --- a/internal/file/tar_file_traversal.go +++ b/internal/file/tar_file_traversal.go @@ -20,7 +20,7 @@ func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archive } defer internal.CloseAndLogError(tarReader, archivePath) - format, _, err := archives.Identify(ctx, archivePath, nil) + format, _, err := archives.Identify(ctx, HandleCompoundArchiveAliases(archivePath), nil) if err != nil { return fmt.Errorf("failed to identify tar compression format: %w", err) } diff --git a/internal/task/unknowns_tasks.go b/internal/task/unknowns_tasks.go index 2f63ce28e..ca251e389 100644 --- a/internal/task/unknowns_tasks.go +++ b/internal/task/unknowns_tasks.go @@ -6,6 +6,7 @@ import ( "github.com/mholt/archives" + intFile "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/syft/cataloging" @@ -60,7 +61,7 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) { if c.IncludeUnexpandedArchives { ctx := context.Background() for coords := range s.Artifacts.FileMetadata { - format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil) + format, _, notArchiveErr := archives.Identify(ctx, intFile.HandleCompoundArchiveAliases(coords.RealPath), nil) if format != nil && notArchiveErr == nil && !hasPackageReference(coords) { s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") } diff --git a/syft/format/github/internal/model/model.go b/syft/format/github/internal/model/model.go index b2aa0d23a..feeeab16c 100644 --- a/syft/format/github/internal/model/model.go +++ b/syft/format/github/internal/model/model.go @@ -9,6 +9,7 @@ import ( "github.com/mholt/archives" "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/sbom" @@ -155,7 +156,7 @@ func trimRelative(s string) string { // isArchive returns true if the path appears to be an archive func isArchive(path string) bool { - format, _, err := archives.Identify(context.Background(), path, nil) + format, _, err := archives.Identify(context.Background(), file.HandleCompoundArchiveAliases(path), nil) return err == nil && format != nil } diff --git a/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go index 089d19eb1..c0f3eef3c 100644 --- a/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go +++ b/syft/pkg/cataloger/java/tar_wrapped_archive_parser_test.go @@ -32,6 +32,13 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) { "joda-time", }, }, + { + fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tgz", + expected: []string{ + "example-java-app-maven", + "joda-time", + }, + }, } for _, test := range tests { t.Run(path.Base(test.fixture), func(t *testing.T) { diff --git a/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile b/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile index b3aae020a..1ef3fab78 100644 --- a/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile +++ b/syft/pkg/cataloger/java/test-fixtures/java-builds/Makefile @@ -16,7 +16,7 @@ fingerprint: $(FINGERPRINT_FILE) jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar -archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz +archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.tgz native-image: $(PKGSDIR)/example-java-app $(PKGSDIR)/gcc-amd64-darwin-exec-debug @@ -31,6 +31,9 @@ $(PKGSDIR)/example-java-app-maven-0.1.0.tar: $(PKGSDIR)/example-java-app-maven-0 $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar +$(PKGSDIR)/example-java-app-maven-0.1.0.tgz: $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz + tar -czf $(PKGSDIR)/example-java-app-maven-0.1.0.tgz $(PKGSDIR)/example-java-app-maven-0.1.0.jar + # Nested jar... $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar: diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index 136319efe..bdc0a3a61 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -8,7 +8,6 @@ import ( "os" "path" "path/filepath" - "strings" "sync" "github.com/mholt/archives" @@ -196,29 +195,6 @@ func deriveIDFromFile(cfg Config) (artifact.ID, string) { return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d } -// see: https://github.com/anchore/syft/issues/4416 -func handleCompoundAliases(path string) (pathAlias string) { - // reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats - extMap := map[string]string{ - ".tgz": ".tar.gz", - ".tbz2": ".tar.bz2", - ".txz": ".tar.xz", - ".tlz": ".tar.lz", - ".tzst": ".tar.zst", - } - - ext := filepath.Ext(path) - - newExt, ok := extMap[ext] - if ok { - base := strings.TrimSuffix(path, ext) - pathAlias = base + newExt - } else { - pathAlias = path - } - return -} - // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive // contents have been made available. A cleanup function is provided for any temp files created (if any). // Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where @@ -234,7 +210,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // unarchived. - envelopedUnarchiver, _, err := archives.Identify(context.Background(), handleCompoundAliases(path), nil) + envelopedUnarchiver, _, err := archives.Identify(context.Background(), intFile.HandleCompoundArchiveAliases(path), nil) if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil {