mirror of
https://github.com/anchore/syft.git
synced 2026-03-29 21:23:24 +02:00
* fix(deb and snaps): prevent excess reads Previously, Syft could allocate excess memory or tempfile space if there were highly compressed objects in deb archives, or at paths where the kernel changelog was expected by the snap cataloger. Use io.LimitReaders for extracting parts of deb archives, and refactor the snap cataloger's reading of the kernel changelog to use a streaming parsing, eliminating the possibility of excess allocation. Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> * fix: always cleanup temp file from file source Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> * use streaming strategy for deb archives Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> --------- Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
298 lines
7.9 KiB
Go
298 lines
7.9 KiB
Go
package filesource
|
|
|
|
import (
|
|
"context"
|
|
"crypto"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"github.com/mholt/archives"
|
|
"github.com/opencontainers/go-digest"
|
|
|
|
stereoFile "github.com/anchore/stereoscope/pkg/file"
|
|
intFile "github.com/anchore/syft/internal/file"
|
|
"github.com/anchore/syft/internal/log"
|
|
"github.com/anchore/syft/syft/artifact"
|
|
"github.com/anchore/syft/syft/file"
|
|
"github.com/anchore/syft/syft/internal/fileresolver"
|
|
"github.com/anchore/syft/syft/source"
|
|
"github.com/anchore/syft/syft/source/directorysource"
|
|
"github.com/anchore/syft/syft/source/internal"
|
|
)
|
|
|
|
var _ source.Source = (*fileSource)(nil)
|
|
|
|
type Config struct {
|
|
Path string
|
|
Exclude source.ExcludeConfig
|
|
DigestAlgorithms []crypto.Hash
|
|
Alias source.Alias
|
|
SkipExtractArchive bool
|
|
}
|
|
|
|
type fileSource struct {
|
|
id artifact.ID
|
|
digestForVersion string
|
|
config Config
|
|
resolver file.Resolver
|
|
mutex *sync.Mutex
|
|
closer func() error
|
|
digests []file.Digest
|
|
mimeType string
|
|
analysisPath string
|
|
}
|
|
|
|
func NewFromPath(path string) (source.Source, error) {
|
|
return New(Config{Path: path})
|
|
}
|
|
|
|
func New(cfg Config) (source.Source, error) {
|
|
f, err := os.Open(cfg.Path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
fileMeta, err := f.Stat()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
|
|
}
|
|
|
|
if fileMeta.IsDir() {
|
|
return nil, fmt.Errorf("given path is a directory: %q", cfg.Path)
|
|
}
|
|
|
|
var digests []file.Digest
|
|
if len(cfg.DigestAlgorithms) > 0 {
|
|
digests, err = intFile.NewDigestsFromFile(context.TODO(), f, cfg.DigestAlgorithms)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
|
|
}
|
|
}
|
|
|
|
analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
|
|
if err != nil {
|
|
if cleanupFn != nil {
|
|
if cleanupErr := cleanupFn(); cleanupErr != nil {
|
|
log.Warnf("failed to cleanup temporary directory: %v", cleanupErr)
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
|
|
}
|
|
|
|
id, versionDigest := deriveIDFromFile(cfg)
|
|
|
|
return &fileSource{
|
|
id: id,
|
|
config: cfg,
|
|
mutex: &sync.Mutex{},
|
|
closer: cleanupFn,
|
|
analysisPath: analysisPath,
|
|
digestForVersion: versionDigest,
|
|
digests: digests,
|
|
mimeType: stereoFile.MIMEType(f),
|
|
}, nil
|
|
}
|
|
|
|
func (s fileSource) ID() artifact.ID {
|
|
return s.id
|
|
}
|
|
|
|
func (s fileSource) Describe() source.Description {
|
|
name := path.Base(s.config.Path)
|
|
version := s.digestForVersion
|
|
supplier := ""
|
|
if !s.config.Alias.IsEmpty() {
|
|
a := s.config.Alias
|
|
if a.Name != "" {
|
|
name = a.Name
|
|
}
|
|
|
|
if a.Version != "" {
|
|
version = a.Version
|
|
}
|
|
|
|
if a.Supplier != "" {
|
|
supplier = a.Supplier
|
|
}
|
|
}
|
|
return source.Description{
|
|
ID: string(s.id),
|
|
Name: name,
|
|
Version: version,
|
|
Supplier: supplier,
|
|
Metadata: source.FileMetadata{
|
|
Path: s.config.Path,
|
|
Digests: s.digests,
|
|
MIMEType: s.mimeType,
|
|
},
|
|
}
|
|
}
|
|
|
|
func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
if s.resolver != nil {
|
|
return s.resolver, nil
|
|
}
|
|
|
|
exclusionFunctions, err := directorysource.GetDirectoryExclusionFunctions(s.analysisPath, s.config.Exclude.Paths)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fi, err := os.Stat(s.analysisPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err)
|
|
}
|
|
|
|
if isArchiveAnalysis := fi.IsDir(); isArchiveAnalysis {
|
|
// this is an analysis of an archive file... we should scan the directory where the archive contents
|
|
res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
|
|
}
|
|
|
|
s.resolver = res
|
|
return s.resolver, nil
|
|
}
|
|
|
|
// This is analysis of a single file. Use file indexer.
|
|
res, err := fileresolver.NewFromFile(s.analysisPath, exclusionFunctions...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to create file resolver: %w", err)
|
|
}
|
|
|
|
s.resolver = res
|
|
return s.resolver, nil
|
|
}
|
|
|
|
func (s *fileSource) Close() error {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
if s.closer == nil {
|
|
return nil
|
|
}
|
|
|
|
s.resolver = nil
|
|
return s.closer()
|
|
}
|
|
|
|
// deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included
|
|
// in the ID derivation (along with contents). This way if the user scans the same item but is considered to be
|
|
// logically different, then ID will express that.
|
|
func deriveIDFromFile(cfg Config) (artifact.ID, string) {
|
|
d := digestOfFileContents(cfg.Path)
|
|
info := d
|
|
|
|
if !cfg.Alias.IsEmpty() {
|
|
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
|
|
// scans the same item but is considered to be logically different, then ID will express that.
|
|
info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version)
|
|
}
|
|
|
|
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
|
|
}
|
|
|
|
// fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive
|
|
// contents have been made available. A cleanup function is provided for any temp files created (if any).
|
|
// Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where
|
|
// supported)
|
|
func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() error, error) {
|
|
var cleanupFn = func() error { return nil }
|
|
var analysisPath = path
|
|
|
|
if skipExtractArchive {
|
|
return analysisPath, cleanupFn, nil
|
|
}
|
|
|
|
envelopedUnarchiver, _, err := intFile.IdentifyArchive(context.Background(), path, nil)
|
|
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
|
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
|
if err != nil {
|
|
return "", cleanupFn, fmt.Errorf("unable to unarchive source file: %w", err)
|
|
}
|
|
|
|
log.Debugf("source path is an archive")
|
|
}
|
|
|
|
return analysisPath, cleanupFn, nil
|
|
}
|
|
|
|
func digestOfFileContents(path string) string {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return digest.SHA256.FromString(path).String()
|
|
}
|
|
defer f.Close()
|
|
|
|
di, err := digest.SHA256.FromReader(f)
|
|
if err != nil {
|
|
return digest.SHA256.FromString(path).String()
|
|
}
|
|
|
|
return di.String()
|
|
}
|
|
|
|
func unarchiveToTmp(path string, unarchiver archives.Extractor) (string, func() error, error) {
|
|
var cleanupFn = func() error { return nil }
|
|
archive, err := os.Open(path)
|
|
if err != nil {
|
|
return "", cleanupFn, fmt.Errorf("unable to open archive: %v", err)
|
|
}
|
|
defer archive.Close()
|
|
|
|
tempDir, err := os.MkdirTemp("", "syft-archive-contents-")
|
|
if err != nil {
|
|
return "", cleanupFn, fmt.Errorf("unable to create tempdir for archive processing: %w", err)
|
|
}
|
|
|
|
visitor := func(_ context.Context, file archives.FileInfo) error {
|
|
// Protect against symlink attacks by ensuring path doesn't escape tempDir
|
|
destPath, err := intFile.SafeJoin(tempDir, file.NameInArchive)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if file.IsDir() {
|
|
return os.MkdirAll(destPath, file.Mode())
|
|
}
|
|
|
|
if err = os.MkdirAll(filepath.Dir(destPath), os.ModeDir|0755); err != nil {
|
|
return fmt.Errorf("failed to create parent directory: %w", err)
|
|
}
|
|
|
|
rc, err := file.Open()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to open file in archive: %w", err)
|
|
}
|
|
defer rc.Close()
|
|
|
|
destFile, err := os.Create(destPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create file in destination: %w", err)
|
|
}
|
|
defer destFile.Close()
|
|
|
|
if err := destFile.Chmod(file.Mode()); err != nil {
|
|
return fmt.Errorf("failed to change mode of destination file: %w", err)
|
|
}
|
|
|
|
if _, err := io.Copy(destFile, rc); err != nil {
|
|
return fmt.Errorf("failed to copy file contents: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
return tempDir, func() error {
|
|
return os.RemoveAll(tempDir)
|
|
}, unarchiver.Extract(context.Background(), archive, visitor)
|
|
}
|