mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 02:26:42 +01:00
fix:handle compound aliases like ``.tgz`` when cataloging archives (#4421)
--------- Signed-off-by: Yuntao Hu <victorhu493@gmail.com> Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Co-authored-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
d37ed567a8
commit
afe28a2fc0
46
internal/file/archive_aliases.go
Normal file
46
internal/file/archive_aliases.go
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/mholt/archives"
|
||||||
|
)
|
||||||
|
|
||||||
|
// compoundExtensionAliases maps shorthand archive extensions to their full forms.
|
||||||
|
// The mholt/archives library doesn't recognize these aliases natively.
|
||||||
|
//
|
||||||
|
// See: https://github.com/anchore/syft/issues/4416
|
||||||
|
// Reference: https://github.com/mholt/archives?tab=readme-ov-file#supported-compression-formats
|
||||||
|
var compoundExtensionAliases = map[string]string{
|
||||||
|
".tgz": ".tar.gz",
|
||||||
|
".tbz2": ".tar.bz2",
|
||||||
|
".txz": ".tar.xz",
|
||||||
|
".tlz": ".tar.lz",
|
||||||
|
".tzst": ".tar.zst",
|
||||||
|
}
|
||||||
|
|
||||||
|
// IdentifyArchive is a wrapper around archives.Identify that handles compound extension
|
||||||
|
// aliases (like .tgz -> .tar.gz) transparently. It first attempts filename-based detection
|
||||||
|
// using the alias map, and falls back to content-based detection if needed.
|
||||||
|
//
|
||||||
|
// This function is a drop-in replacement for archives.Identify that centralizes
|
||||||
|
// the compound alias handling logic in one place.
|
||||||
|
func IdentifyArchive(ctx context.Context, path string, r io.Reader) (archives.Format, io.Reader, error) {
|
||||||
|
// First, try to identify using the alias-mapped path (filename-based detection)
|
||||||
|
normalizedPath := handleCompoundArchiveAliases(path)
|
||||||
|
return archives.Identify(ctx, normalizedPath, r)
|
||||||
|
}
|
||||||
|
|
||||||
|
// handleCompoundArchiveAliases normalizes archive file paths that use compound extension
|
||||||
|
// aliases (like .tgz) to their full forms (like .tar.gz) for correct identification
|
||||||
|
// by the mholt/archives library.
|
||||||
|
func handleCompoundArchiveAliases(path string) string {
|
||||||
|
ext := filepath.Ext(path)
|
||||||
|
if newExt, ok := compoundExtensionAliases[ext]; ok {
|
||||||
|
return strings.TrimSuffix(path, ext) + newExt
|
||||||
|
}
|
||||||
|
return path
|
||||||
|
}
|
||||||
73
internal/file/archive_aliases_test.go
Normal file
73
internal/file/archive_aliases_test.go
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
package file
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestHandleCompoundArchiveAliases(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "tgz to tar.gz",
|
||||||
|
input: "/path/to/archive.tgz",
|
||||||
|
expected: "/path/to/archive.tar.gz",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tbz2 to tar.bz2",
|
||||||
|
input: "/path/to/archive.tbz2",
|
||||||
|
expected: "/path/to/archive.tar.bz2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "txz to tar.xz",
|
||||||
|
input: "/path/to/archive.txz",
|
||||||
|
expected: "/path/to/archive.tar.xz",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tlz to tar.lz",
|
||||||
|
input: "/path/to/archive.tlz",
|
||||||
|
expected: "/path/to/archive.tar.lz",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "tzst to tar.zst",
|
||||||
|
input: "/path/to/archive.tzst",
|
||||||
|
expected: "/path/to/archive.tar.zst",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "standard tar.gz unchanged",
|
||||||
|
input: "/path/to/archive.tar.gz",
|
||||||
|
expected: "/path/to/archive.tar.gz",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "zip unchanged",
|
||||||
|
input: "/path/to/archive.zip",
|
||||||
|
expected: "/path/to/archive.zip",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no extension unchanged",
|
||||||
|
input: "/path/to/archive",
|
||||||
|
expected: "/path/to/archive",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "case sensitive - TGZ not matched",
|
||||||
|
input: "/path/to/archive.TGZ",
|
||||||
|
expected: "/path/to/archive.TGZ",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "just filename with tgz",
|
||||||
|
input: "archive.tgz",
|
||||||
|
expected: "archive.tar.gz",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := handleCompoundArchiveAliases(tt.input)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -20,7 +20,7 @@ func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archive
|
|||||||
}
|
}
|
||||||
defer internal.CloseAndLogError(tarReader, archivePath)
|
defer internal.CloseAndLogError(tarReader, archivePath)
|
||||||
|
|
||||||
format, _, err := archives.Identify(ctx, archivePath, nil)
|
format, _, err := IdentifyArchive(ctx, archivePath, tarReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to identify tar compression format: %w", err)
|
return fmt.Errorf("failed to identify tar compression format: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -99,7 +99,8 @@ var jsonTypes = makeJSONTypes(
|
|||||||
jsonNames(pkg.PEBinary{}, "pe-binary"),
|
jsonNames(pkg.PEBinary{}, "pe-binary"),
|
||||||
jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"),
|
jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"),
|
||||||
jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred
|
jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred
|
||||||
jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), //nolint:staticcheck
|
//nolint:staticcheck
|
||||||
|
jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"),
|
||||||
jsonNames(pkg.PhpPearEntry{}, "php-pear-entry"),
|
jsonNames(pkg.PhpPearEntry{}, "php-pear-entry"),
|
||||||
jsonNames(pkg.PortageEntry{}, "portage-db-entry", "PortageMetadata"),
|
jsonNames(pkg.PortageEntry{}, "portage-db-entry", "PortageMetadata"),
|
||||||
jsonNames(pkg.PythonPackage{}, "python-package", "PythonPackageMetadata"),
|
jsonNames(pkg.PythonPackage{}, "python-package", "PythonPackageMetadata"),
|
||||||
|
|||||||
@ -4,8 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/mholt/archives"
|
intFile "github.com/anchore/syft/internal/file"
|
||||||
|
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/internal/sbomsync"
|
"github.com/anchore/syft/internal/sbomsync"
|
||||||
"github.com/anchore/syft/syft/cataloging"
|
"github.com/anchore/syft/syft/cataloging"
|
||||||
@ -60,7 +59,7 @@ func (c unknownsLabelerTask) finalize(resolver file.Resolver, s *sbom.SBOM) {
|
|||||||
if c.IncludeUnexpandedArchives {
|
if c.IncludeUnexpandedArchives {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
for coords := range s.Artifacts.FileMetadata {
|
for coords := range s.Artifacts.FileMetadata {
|
||||||
format, _, notArchiveErr := archives.Identify(ctx, coords.RealPath, nil)
|
format, _, notArchiveErr := intFile.IdentifyArchive(ctx, coords.RealPath, nil)
|
||||||
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
if format != nil && notArchiveErr == nil && !hasPackageReference(coords) {
|
||||||
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
s.Artifacts.Unknowns[coords] = append(s.Artifacts.Unknowns[coords], "archive not cataloged")
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,9 +6,8 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/mholt/archives"
|
|
||||||
|
|
||||||
"github.com/anchore/packageurl-go"
|
"github.com/anchore/packageurl-go"
|
||||||
|
"github.com/anchore/syft/internal/file"
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
"github.com/anchore/syft/syft/pkg"
|
"github.com/anchore/syft/syft/pkg"
|
||||||
"github.com/anchore/syft/syft/sbom"
|
"github.com/anchore/syft/syft/sbom"
|
||||||
@ -155,7 +154,7 @@ func trimRelative(s string) string {
|
|||||||
|
|
||||||
// isArchive returns true if the path appears to be an archive
|
// isArchive returns true if the path appears to be an archive
|
||||||
func isArchive(path string) bool {
|
func isArchive(path string) bool {
|
||||||
format, _, err := archives.Identify(context.Background(), path, nil)
|
format, _, err := file.IdentifyArchive(context.Background(), path, nil)
|
||||||
return err == nil && format != nil
|
return err == nil && format != nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -32,6 +32,13 @@ func Test_parseTarWrappedJavaArchive(t *testing.T) {
|
|||||||
"joda-time",
|
"joda-time",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
fixture: "test-fixtures/java-builds/packages/example-java-app-maven-0.1.0.tgz",
|
||||||
|
expected: []string{
|
||||||
|
"example-java-app-maven",
|
||||||
|
"joda-time",
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(path.Base(test.fixture), func(t *testing.T) {
|
t.Run(path.Base(test.fixture), func(t *testing.T) {
|
||||||
|
|||||||
@ -16,7 +16,7 @@ fingerprint: $(FINGERPRINT_FILE)
|
|||||||
|
|
||||||
jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar
|
jars: $(PKGSDIR)/example-java-app-maven-0.1.0.jar $(PKGSDIR)/example-java-app-gradle-0.1.0.jar $(PKGSDIR)/example-jenkins-plugin.hpi $(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar
|
||||||
|
|
||||||
archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz
|
archives: $(PKGSDIR)/example-java-app-maven-0.1.0.zip $(PKGSDIR)/example-java-app-maven-0.1.0.tar $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.tgz
|
||||||
|
|
||||||
native-image: $(PKGSDIR)/example-java-app $(PKGSDIR)/gcc-amd64-darwin-exec-debug
|
native-image: $(PKGSDIR)/example-java-app $(PKGSDIR)/gcc-amd64-darwin-exec-debug
|
||||||
|
|
||||||
@ -31,6 +31,9 @@ $(PKGSDIR)/example-java-app-maven-0.1.0.tar: $(PKGSDIR)/example-java-app-maven-0
|
|||||||
$(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
|
$(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz: $(PKGSDIR)/example-java-app-maven-0.1.0.jar
|
||||||
tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
|
tar -czvf $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
|
||||||
|
|
||||||
|
$(PKGSDIR)/example-java-app-maven-0.1.0.tgz: $(PKGSDIR)/example-java-app-maven-0.1.0.tar.gz
|
||||||
|
tar -czf $(PKGSDIR)/example-java-app-maven-0.1.0.tgz $(PKGSDIR)/example-java-app-maven-0.1.0.jar
|
||||||
|
|
||||||
# Nested jar...
|
# Nested jar...
|
||||||
|
|
||||||
$(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar:
|
$(PKGSDIR)/spring-boot-0.0.1-SNAPSHOT.jar:
|
||||||
|
|||||||
@ -207,10 +207,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
|
|||||||
return analysisPath, cleanupFn, nil
|
return analysisPath, cleanupFn, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and
|
envelopedUnarchiver, _, err := intFile.IdentifyArchive(context.Background(), path, nil)
|
||||||
// use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is
|
|
||||||
// unarchived.
|
|
||||||
envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, nil)
|
|
||||||
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
||||||
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user