From 5b42bfe017c5aa1f38ef71de7e999887a4b6b562 Mon Sep 17 00:00:00 2001 From: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:46:14 -0500 Subject: [PATCH] fix: update identify to steam based detections update: file_source.go:213 - switch to stream-based (already opens file later anyway) tar_file_traversal.go:23 - opens the file on line 17, so could pass tarReader defer: unknowns_tasks.go:64 only has coords.RealPath, would need to open files (potential perf hit for many files) model.go:159 isArchive() is a helper - opening files could be more expensive here Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> --- internal/file/tar_file_traversal.go | 2 +- internal/packagemetadata/names.go | 2 +- syft/pkg/cataloger/php/parse_pecl_pear.go | 4 ++-- syft/source/filesource/file_source.go | 10 ++++++++-- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/internal/file/tar_file_traversal.go b/internal/file/tar_file_traversal.go index 8046ce00e..3236f0dc9 100644 --- a/internal/file/tar_file_traversal.go +++ b/internal/file/tar_file_traversal.go @@ -20,7 +20,7 @@ func TraverseFilesInTar(ctx context.Context, archivePath string, visitor archive } defer internal.CloseAndLogError(tarReader, archivePath) - format, _, err := archives.Identify(ctx, HandleCompoundArchiveAliases(archivePath), nil) + format, _, err := archives.Identify(ctx, archivePath, tarReader) if err != nil { return fmt.Errorf("failed to identify tar compression format: %w", err) } diff --git a/internal/packagemetadata/names.go b/internal/packagemetadata/names.go index d7a1bfcf9..6686d4e25 100644 --- a/internal/packagemetadata/names.go +++ b/internal/packagemetadata/names.go @@ -99,7 +99,7 @@ var jsonTypes = makeJSONTypes( jsonNames(pkg.PEBinary{}, "pe-binary"), jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"), jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred - jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), //nolint:staticcheck + jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), jsonNames(pkg.PhpPearEntry{}, "php-pear-entry"), jsonNames(pkg.PortageEntry{}, "portage-db-entry", "PortageMetadata"), jsonNames(pkg.PythonPackage{}, "python-package", "PythonPackageMetadata"), diff --git a/syft/pkg/cataloger/php/parse_pecl_pear.go b/syft/pkg/cataloger/php/parse_pecl_pear.go index c4af6cebe..b3580ef57 100644 --- a/syft/pkg/cataloger/php/parse_pecl_pear.go +++ b/syft/pkg/cataloger/php/parse_pecl_pear.go @@ -30,8 +30,8 @@ func (p *peclPearData) ToPear() pkg.PhpPearEntry { } } -func (p *peclPearData) ToPecl() pkg.PhpPeclEntry { //nolint:staticcheck - return pkg.PhpPeclEntry(p.ToPear()) //nolint:staticcheck +func (p *peclPearData) ToPecl() pkg.PhpPeclEntry { + return pkg.PhpPeclEntry(p.ToPear()) } func parsePecl(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index bdc0a3a61..381773bb3 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -207,10 +207,16 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro return analysisPath, cleanupFn, nil } - // if the given file is an archive (as indicated by the file extension and not MIME type) then unarchive it and + // if the given file is an archive (as indicated by magic bytes) then unarchive it and // use the contents as the source. Note: this does NOT recursively unarchive contents, only the given path is // unarchived. - envelopedUnarchiver, _, err := archives.Identify(context.Background(), intFile.HandleCompoundArchiveAliases(path), nil) + f, err := os.Open(path) + if err != nil { + return analysisPath, cleanupFn, nil + } + defer f.Close() + + envelopedUnarchiver, _, err := archives.Identify(context.Background(), path, f) if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil {