feat: apply HandleCompundArchiveAliases across syft

Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
Christopher Phillips 2025-12-01 11:05:59 -05:00
parent 4bbceb09c1
commit 57ec3a6561
No known key found for this signature in database
8 changed files with 118 additions and 29 deletions

View File

@ -0,0 +1,28 @@
package file
import (
"path/filepath"
"strings"
)
// HandleCompoundArchiveAliases normalizes archive file paths that use compound extension
// aliases (like .tgz) to their full forms (like .tar.gz) for correct identification
// by the mholt/archives library.
//
// See: https://github.com/anchore/syft/issues/4416
// Reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats
func HandleCompoundArchiveAliases(path string) string {
extMap := map[string]string{
".tgz": ".tar.gz",
".tbz2": ".tar.bz2",
".txz": ".tar.xz",
".tlz": ".tar.lz",
".tzst": ".tar.zst",
}
ext := filepath.Ext(path)
if newExt, ok := extMap[ext]; ok {
return strings.TrimSuffix(path, ext) + newExt
}
return path
}

View File

@ -0,0 +1,73 @@
package file
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestHandleCompoundArchiveAliases(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "tgz to tar.gz",
input: "/path/to/archive.tgz",
expected: "/path/to/archive.tar.gz",
},
{
name: "tbz2 to tar.bz2",
input: "/path/to/archive.tbz2",
expected: "/path/to/archive.tar.bz2",
},
{
name: "txz to tar.xz",
input: "/path/to/archive.txz",
expected: "/path/to/archive.tar.xz",
},
{
name: "tlz to tar.lz",
input: "/path/to/archive.tlz",
expected: "/path/to/archive.tar.lz",
},
{
name: "tzst to tar.zst",
input: "/path/to/archive.tzst",
expected: "/path/to/archive.tar.zst",
},
{
name: "standard tar.gz unchanged",
input: "/path/to/archive.tar.gz",
expected: "/path/to/archive.tar.gz",
},
{
name: "zip unchanged",
input: "/path/to/archive.zip",
expected: "/path/to/archive.zip",
},
{
name: "no extension unchanged",
input: "/path/to/archive",
expected: "/path/to/archive",
},
{
name: "case sensitive - TGZ not matched",
input: "/path/to/archive.TGZ",
expected: "/path/to/archive.TGZ",
},
{
name: "just filename with tgz",
input: "archive.tgz",
expected: "archive.tar.gz",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := HandleCompoundArchiveAliases(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}

View File

@ -20,7 +20,7 @@ func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archive
} }
defer internal.CloseAndLogError(tarReader, archivePath) defer internal.CloseAndLogError(tarReader, archivePath)
format, _, err := archives.Identify(ctx, archivePath, nil) format, _, err := archives.Identify(ctx, HandleCompoundArchiveAliases(archivePath), nil)
if err != nil { if err != nil {
return fmt.Errorf("failed to identify tar compression format: %w", err) return fmt.Errorf("failed to identify tar compression format: %w", err)
} }

View File

@ -6,6 +6,7 @@ import (
"github.com/mholt/archives" "github.com/mholt/archives"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/sbomsync" "github.com/anchore/syft/internal/sbomsync"
"github.com/anchore/syft/syft/cataloging" "github.com/anchore/syft/syft/cataloging"
@ -60,7 +61,7 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
if c.IncludeUnexpandedArchives { if c.IncludeUnexpandedArchives {
ctx := context.Background() ctx := context.Background()
for coords := range s.Artifacts.FileMetadata { for coords := range s.Artifacts.FileMetadata {
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil) format, _, notArchiveErr := archives.Identify(ctx, intFile.HandleCompoundArchiveAliases(coords.RealPath), nil)
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) { if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged") s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
} }

View File

@ -9,6 +9,7 @@ import (
"github.com/mholt/archives" "github.com/mholt/archives"
"github.com/anchore/packageurl-go" "github.com/anchore/packageurl-go"
"github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
@ -155,7 +156,7 @@ func trimRelative(s string) string {
// isArchive returns true if the path appears to be an archive // isArchive returns true if the path appears to be an archive
func isArchive(path string) bool { func isArchive(path string) bool {
format, _, err := archives.Identify(context.Background(), path, nil) format, _, err := archives.Identify(context.Background(), file.HandleCompoundArchiveAliases(path), nil)
return err == nil && format != nil return err == nil && format != nil
} }

View File

@ -32,6 +32,13 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
"joda-time", "joda-time",
}, },
}, },
{
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tgz",
expected: []string{
"example-java-app-maven",
"joda-time",
},
},
} }
for _, test := range tests { for _, test := range tests {
t.Run(path.Base(test.fixture), func(t *testing.T) { t.Run(path.Base(test.fixture), func(t *testing.T) {

View File

@ -16,7 +16,7 @@ fingerprint: $(FINGERPRINT_FILE)
jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar
archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.tgz
native-image: $(PKGSDIR)/example-java-app $(PKGSDIR)/gcc-amd64-darwin-exec-debug native-image: $(PKGSDIR)/example-java-app $(PKGSDIR)/gcc-amd64-darwin-exec-debug
@ -31,6 +31,9 @@ $(PKGSDIR)/example-java-app-maven-0.1.0.tar: $(PKGSDIR)/example-java-app-maven-0
$(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
$(PKGSDIR)/example-java-app-maven-0.1.0.tgz: $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz
tar -czf $(PKGSDIR)/example-java-app-maven-0.1.0.tgz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
# Nested jar... # Nested jar...
$(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar: $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar:

View File

@ -8,7 +8,6 @@ import (
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"strings"
"sync" "sync"
"github.com/mholt/archives" "github.com/mholt/archives"
@ -196,29 +195,6 @@ func deriveIDFromFile(cfg Config) (artifact.ID, string) {
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
} }
// see: https://github.com/anchore/syft/issues/4416
func handleCompoundAliases(path string) (pathAlias string) {
// reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats
extMap := map[string]string{
".tgz": ".tar.gz",
".tbz2": ".tar.bz2",
".txz": ".tar.xz",
".tlz": ".tar.lz",
".tzst": ".tar.zst",
}
ext := filepath.Ext(path)
newExt, ok := extMap[ext]
if ok {
base := strings.TrimSuffix(path, ext)
pathAlias = base + newExt
} else {
pathAlias = path
}
return
}
// fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive
// contents have been made available. A cleanup function is provided for any temp files created (if any). // contents have been made available. A cleanup function is provided for any temp files created (if any).
// Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where // Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where
@ -234,7 +210,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
// unarchived. // unarchived.
envelopedUnarchiver, _, err := archives.Identify(context.Background(), handleCompoundAliases(path), nil) envelopedUnarchiver, _, err := archives.Identify(context.Background(), intFile.HandleCompoundArchiveAliases(path), nil)
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
if err != nil { if err != nil {