Refactor fileresolver to not require base path (#4298)

* ref: close source in test and examples

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>

* ref: pretty file/directory source resolver (make them more similar)

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>

* ref: move absoluteSymlinkFreePathToParent to file resolver

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>

* revert breaking change

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>

---------

Signed-off-by: Kudryavcev Nikolay <kydry.nikolau@gmail.com>
This commit is contained in:
Kudryavcev Nikolay 2025-10-29 17:41:18 +03:00 committed by GitHub
parent 728feea620
commit f5c765192c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 177 additions and 160 deletions

View File

@ -253,7 +253,6 @@ func generateSBOMForAttestation(ctx context.Context, id clio.Identification, opt
} }
src, err := getSource(ctx, opts, userInput, stereoscope.RegistryTag) src, err := getSource(ctx, opts, userInput, stereoscope.RegistryTag)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -185,7 +185,6 @@ func runScan(ctx context.Context, id clio.Identification, opts *scanOptions, use
} }
src, err := getSource(ctx, &opts.Catalog, userInput, sources...) src, err := getSource(ctx, &opts.Catalog, userInput, sources...)
if err != nil { if err != nil {
return err return err
} }

View File

@ -25,7 +25,6 @@ func BenchmarkImagePackageCatalogers(b *testing.B) {
// get the source object for the image // get the source object for the image
theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive")) theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive"))
require.NoError(b, err) require.NoError(b, err)
b.Cleanup(func() { b.Cleanup(func() {
require.NoError(b, theSource.Close()) require.NoError(b, theSource.Close())
}) })

View File

@ -38,11 +38,11 @@ func catalogFixtureImageWithConfig(t *testing.T, fixtureImageName string, cfg *s
// get the source to build an SBOM against // get the source to build an SBOM against
theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive")) theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive"))
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { t.Cleanup(func() {
require.NoError(t, theSource.Close()) require.NoError(t, theSource.Close())
}) })
// build the SBOM
s, err := syft.CreateSBOM(context.Background(), theSource, cfg) s, err := syft.CreateSBOM(context.Background(), theSource, cfg)
require.NoError(t, err) require.NoError(t, err)
@ -66,7 +66,7 @@ func catalogDirectory(t *testing.T, dir string, catalogerSelection ...string) (s
func catalogDirectoryWithConfig(t *testing.T, dir string, cfg *syft.CreateSBOMConfig) (sbom.SBOM, source.Source) { func catalogDirectoryWithConfig(t *testing.T, dir string, cfg *syft.CreateSBOMConfig) (sbom.SBOM, source.Source) {
cfg.CatalogerSelection = cfg.CatalogerSelection.WithDefaults(pkgcataloging.DirectoryTag) cfg.CatalogerSelection = cfg.CatalogerSelection.WithDefaults(pkgcataloging.DirectoryTag)
// get the source to build an sbom against // get the source to build an SBOM against
theSource, err := syft.GetSource(context.Background(), dir, syft.DefaultGetSourceConfig().WithSources("dir")) theSource, err := syft.GetSource(context.Background(), dir, syft.DefaultGetSourceConfig().WithSources("dir"))
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { t.Cleanup(func() {

View File

@ -23,6 +23,7 @@ const defaultImage = "alpine:3.19"
func main() { func main() {
// automagically get a source.Source for arbitrary string input // automagically get a source.Source for arbitrary string input
src := getSource(imageReference()) src := getSource(imageReference())
defer src.Close()
// will catalog the given source and return a SBOM keeping in mind several configurable options // will catalog the given source and return a SBOM keeping in mind several configurable options
sbom := getSBOM(src) sbom := getSBOM(src)
@ -46,7 +47,6 @@ func getSource(input string) source.Source {
fmt.Println("detecting source type for input:", input, "...") fmt.Println("detecting source type for input:", input, "...")
src, err := syft.GetSource(context.Background(), input, nil) src, err := syft.GetSource(context.Background(), input, nil)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -19,6 +19,7 @@ const defaultImage = "alpine:3.19"
func main() { func main() {
// automagically get a source.Source for arbitrary string input // automagically get a source.Source for arbitrary string input
src := getSource(imageReference()) src := getSource(imageReference())
defer src.Close()
// catalog the given source and return a SBOM // catalog the given source and return a SBOM
sbom := getSBOM(src) sbom := getSBOM(src)
@ -40,7 +41,6 @@ func imageReference() string {
func getSource(input string) source.Source { func getSource(input string) source.Source {
src, err := syft.GetSource(context.Background(), input, nil) src, err := syft.GetSource(context.Background(), input, nil)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -19,6 +19,7 @@ const defaultImage = "alpine:3.19"
func main() { func main() {
// automagically get a source.Source for arbitrary string input // automagically get a source.Source for arbitrary string input
src := getSource(imageReference()) src := getSource(imageReference())
defer src.Close()
// catalog the given source and return a SBOM // catalog the given source and return a SBOM
// let's explicitly use catalogers that are: // let's explicitly use catalogers that are:
@ -44,7 +45,6 @@ func imageReference() string {
func getSource(input string) source.Source { func getSource(input string) source.Source {
src, err := syft.GetSource(context.Background(), input, nil) src, err := syft.GetSource(context.Background(), input, nil)
if err != nil { if err != nil {
panic(err) panic(err)
} }

View File

@ -15,6 +15,7 @@ func main() {
image := "alpine:3.19" image := "alpine:3.19"
src, _ := syft.GetSource(context.Background(), image, syft.DefaultGetSourceConfig().WithSources("registry")) src, _ := syft.GetSource(context.Background(), image, syft.DefaultGetSourceConfig().WithSources("registry"))
defer src.Close()
sbom, _ := syft.CreateSBOM(context.Background(), src, syft.DefaultCreateSBOMConfig()) sbom, _ := syft.CreateSBOM(context.Background(), src, syft.DefaultCreateSBOMConfig())

View File

@ -81,6 +81,10 @@ func Test_EnvironmentTask(t *testing.T) {
// get the source // get the source
theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive")) theSource, err := syft.GetSource(context.Background(), tarPath, syft.DefaultGetSourceConfig().WithSources("docker-archive"))
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() {
require.NoError(t, theSource.Close())
})
resolver, err := theSource.FileResolver(source.SquashedScope) resolver, err := theSource.FileResolver(source.SquashedScope)
require.NoError(t, err) require.NoError(t, err)

View File

@ -19,16 +19,16 @@ type Directory struct {
indexer *directoryIndexer indexer *directoryIndexer
} }
func NewFromDirectory(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { func NewFromDirectory(root, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
r, err := newFromDirectoryWithoutIndex(root, base, pathFilters...) resolver, err := newFromDirectoryWithoutIndex(root, base, pathFilters...)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return r, r.buildIndex() return resolver, resolver.buildIndex()
} }
func newFromDirectoryWithoutIndex(root string, base string, pathFilters ...PathIndexVisitor) (*Directory, error) { func newFromDirectoryWithoutIndex(root, base string, pathFilters ...PathIndexVisitor) (*Directory, error) {
chroot, err := NewChrootContextFromCWD(root, base) chroot, err := NewChrootContextFromCWD(root, base)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to interpret chroot context: %w", err) return nil, fmt.Errorf("unable to interpret chroot context: %w", err)
@ -66,6 +66,6 @@ func (r *Directory) buildIndex() error {
} }
// Stringer to represent a directory path data source // Stringer to represent a directory path data source
func (r Directory) String() string { func (r *Directory) String() string {
return fmt.Sprintf("dir:%s", r.path) return fmt.Sprintf("dir:%s", r.path)
} }

View File

@ -17,17 +17,31 @@ type File struct {
indexer *fileIndexer indexer *fileIndexer
} }
// parent should be the symlink free absolute path to the parent directory // NewFromFile single file analyser
// path is the filepath of the file we're creating content access for // path is the filepath of the file we're creating content access for
func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, error) { func NewFromFile(path string, pathFilters ...PathIndexVisitor) (*File, error) {
chroot, err := NewChrootContextFromCWD(parent, parent) resolver, err := newFromFileWithoutIndex(path, pathFilters...)
if err != nil {
return nil, err
}
return resolver, resolver.buildIndex()
}
func newFromFileWithoutIndex(path string, pathFilters ...PathIndexVisitor) (*File, error) {
absParentDir, err := absoluteSymlinkFreePathToParent(path)
if err != nil {
return nil, err
}
chroot, err := NewChrootContextFromCWD(absParentDir, absParentDir)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to interpret chroot context: %w", err) return nil, fmt.Errorf("unable to interpret chroot context: %w", err)
} }
cleanBase := chroot.Base() cleanBase := chroot.Base()
file := &File{ return &File{
path: path, path: path,
FiletreeResolver: FiletreeResolver{ FiletreeResolver: FiletreeResolver{
Chroot: *chroot, Chroot: *chroot,
@ -36,9 +50,7 @@ func NewFromFile(parent, path string, pathFilters ...PathIndexVisitor) (*File, e
Opener: nativeOSFileOpener, Opener: nativeOSFileOpener,
}, },
indexer: newFileIndexer(path, cleanBase, pathFilters...), indexer: newFileIndexer(path, cleanBase, pathFilters...),
} }, nil
return file, file.buildIndex()
} }
func (r *File) buildIndex() error { func (r *File) buildIndex() error {
@ -58,6 +70,6 @@ func (r *File) buildIndex() error {
} }
// Stringer to represent a file path data source // Stringer to represent a file path data source
func (r File) String() string { func (r *File) String() string {
return fmt.Sprintf("file:%s", r.path) return fmt.Sprintf("file:%s", r.path)
} }

View File

@ -1384,9 +1384,10 @@ func TestFileResolver_FilesByPath(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, tt.filePath) resolver, err := NewFromFile(tt.filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, resolver) require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
refs, err := resolver.FilesByPath(tt.fileByPathInput) refs, err := resolver.FilesByPath(tt.fileByPathInput)
require.NoError(t, err) require.NoError(t, err)
@ -1431,8 +1432,11 @@ func TestFileResolver_MultipleFilesByPath(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath) resolver, err := NewFromFile(filePath)
assert.NoError(t, err) assert.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
refs, err := resolver.FilesByPath(tt.input...) refs, err := resolver.FilesByPath(tt.input...)
assert.NoError(t, err) assert.NoError(t, err)
@ -1449,8 +1453,11 @@ func TestFileResolver_FilesByGlob(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath) resolver, err := NewFromFile(filePath)
assert.NoError(t, err) assert.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
refs, err := resolver.FilesByGlob("**/*.txt") refs, err := resolver.FilesByGlob("**/*.txt")
assert.NoError(t, err) assert.NoError(t, err)
@ -1476,8 +1483,11 @@ func Test_fileResolver_FilesByMIMEType(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath) resolver, err := NewFromFile(filePath)
assert.NoError(t, err) assert.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
locations, err := resolver.FilesByMIMEType(test.mimeType) locations, err := resolver.FilesByMIMEType(test.mimeType)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, test.expectedPaths.Size(), len(locations)) assert.Equal(t, test.expectedPaths.Size(), len(locations))
@ -1497,10 +1507,12 @@ func Test_fileResolver_FileContentsByLocation(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
r, err := NewFromFile(parentPath, filePath) resolver, err := NewFromFile(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
exists, existingPath, err := r.Tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt"))) exists, existingPath, err := resolver.Tree.File(stereoscopeFile.Path(filepath.Join(cwd, "test-fixtures/image-simple/file-1.txt")))
require.True(t, exists) require.True(t, exists)
require.NoError(t, err) require.NoError(t, err)
require.True(t, existingPath.HasReference()) require.True(t, existingPath.HasReference())
@ -1525,7 +1537,7 @@ func Test_fileResolver_FileContentsByLocation(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
actual, err := r.FileContentsByLocation(test.location) actual, err := resolver.FileContentsByLocation(test.location)
if test.err { if test.err {
require.Error(t, err) require.Error(t, err)
return return
@ -1546,8 +1558,11 @@ func TestFileResolver_AllLocations_errorOnDirRequest(t *testing.T) {
parentPath, err := absoluteSymlinkFreePathToParent(filePath) parentPath, err := absoluteSymlinkFreePathToParent(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath)
resolver, err := NewFromFile(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
var dirLoc *file.Location var dirLoc *file.Location
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())
@ -1575,8 +1590,11 @@ func TestFileResolver_AllLocations(t *testing.T) {
parentPath, err := absoluteSymlinkFreePathToParent(filePath) parentPath, err := absoluteSymlinkFreePathToParent(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath)
resolver, err := NewFromFile(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
paths := strset.New() paths := strset.New()
for loc := range resolver.AllLocations(context.Background()) { for loc := range resolver.AllLocations(context.Background()) {
@ -1600,8 +1618,11 @@ func Test_FileResolver_AllLocationsDoesNotLeakGoRoutine(t *testing.T) {
parentPath, err := absoluteSymlinkFreePathToParent(filePath) parentPath, err := absoluteSymlinkFreePathToParent(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, parentPath) require.NotNil(t, parentPath)
resolver, err := NewFromFile(parentPath, filePath)
resolver, err := NewFromFile(filePath)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, resolver)
assert.Equal(t, resolver.Chroot.Base(), parentPath)
require.NoError(t, err) require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())

View File

@ -13,7 +13,7 @@ import (
func Test_noSQLiteDriverError(t *testing.T) { func Test_noSQLiteDriverError(t *testing.T) {
// this test package does must not import the sqlite library // this test package does must not import the sqlite library
file := "../test-fixtures/Packages" file := "../test-fixtures/Packages"
resolver, err := fileresolver.NewFromFile(file, file) resolver, err := fileresolver.NewFromFile(file)
require.NoError(t, err) require.NoError(t, err)
cataloger := redhat.NewDBCataloger() cataloger := redhat.NewDBCataloger()

View File

@ -30,24 +30,21 @@ type Config struct {
type directorySource struct { type directorySource struct {
id artifact.ID id artifact.ID
config Config config Config
resolver *fileresolver.Directory resolver file.Resolver
mutex *sync.Mutex mutex *sync.Mutex
} }
func NewFromPath(path string) (source.Source, error) { func NewFromPath(path string) (source.Source, error) {
cfg := Config{ return New(Config{Path: path})
Path: path,
}
return New(cfg)
} }
func New(cfg Config) (source.Source, error) { func New(cfg Config) (source.Source, error) {
fi, err := os.Stat(cfg.Path) fileMeta, err := os.Stat(cfg.Path)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
} }
if !fi.IsDir() { if !fileMeta.IsDir() {
return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path) return nil, fmt.Errorf("given path is not a directory: %q", cfg.Path)
} }
@ -58,53 +55,6 @@ func New(cfg Config) (source.Source, error) {
}, nil }, nil
} }
// deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then
// the artifact ID is derived exclusively from the alias name and version. Otherwise, the artifact ID is derived
// from the path provided with an attempt to prune a prefix if a base is given. Since the contents of the directory
// are not considered, there is no semantic meaning to the artifact ID -- this is why the alias is preferred without
// consideration for the path.
func deriveIDFromDirectory(cfg Config) artifact.ID {
var info string
if !cfg.Alias.IsEmpty() {
// don't use any of the path information -- instead use the alias name and version as the artifact ID.
// why? this allows the user to set a dependable stable value for the artifact ID in case the
// scanning root changes (e.g. a user scans a directory, then moves it to a new location and scans again).
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
} else {
log.Warn("no explicit name and version provided for directory source, deriving artifact ID from the given path (which is not ideal)")
info = cleanDirPath(cfg.Path, cfg.Base)
}
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(filepath.Clean(info)).String())
}
func cleanDirPath(path, base string) string {
if path == base {
return path
}
if base != "" {
cleanRoot, rootErr := fileresolver.NormalizeRootDirectory(path)
cleanBase, baseErr := fileresolver.NormalizeBaseDirectory(base)
if rootErr == nil && baseErr == nil {
// allows for normalizing inputs:
// cleanRoot: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001/some/path
// cleanBase: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001
// normalized: some/path
relPath, err := filepath.Rel(cleanBase, cleanRoot)
if err == nil {
path = relPath
}
// this is odd, but this means we can't use base
}
// if the base is not a valid chroot, then just use the path as-is
}
return path
}
func (s directorySource) ID() artifact.ID { func (s directorySource) ID() artifact.ID {
return s.id return s.id
} }
@ -118,9 +68,11 @@ func (s directorySource) Describe() source.Description {
if a.Name != "" { if a.Name != "" {
name = a.Name name = a.Name
} }
if a.Version != "" { if a.Version != "" {
version = a.Version version = a.Version
} }
if a.Supplier != "" { if a.Supplier != "" {
supplier = a.Supplier supplier = a.Supplier
} }
@ -141,29 +93,31 @@ func (s *directorySource) FileResolver(_ source.Scope) (file.Resolver, error) {
s.mutex.Lock() s.mutex.Lock()
defer s.mutex.Unlock() defer s.mutex.Unlock()
if s.resolver == nil { if s.resolver != nil {
exclusionFunctions, err := GetDirectoryExclusionFunctions(s.config.Path, s.config.Exclude.Paths) return s.resolver, nil
if err != nil {
return nil, err
}
// this should be the only file resolver that might have overlap with where files are cached
exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)
res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
}
s.resolver = res
} }
exclusionFunctions, err := GetDirectoryExclusionFunctions(s.config.Path, s.config.Exclude.Paths)
if err != nil {
return nil, err
}
// this should be the only file resolver that might have overlap with where files are cached
exclusionFunctions = append(exclusionFunctions, excludeCachePathVisitors()...)
res, err := fileresolver.NewFromDirectory(s.config.Path, s.config.Base, exclusionFunctions...)
if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err)
}
s.resolver = res
return s.resolver, nil return s.resolver, nil
} }
func (s *directorySource) Close() error { func (s *directorySource) Close() error {
s.mutex.Lock() s.mutex.Lock()
defer s.mutex.Unlock() defer s.mutex.Unlock()
s.resolver = nil s.resolver = nil
return nil return nil
} }
@ -221,3 +175,50 @@ func GetDirectoryExclusionFunctions(root string, exclusions []string) ([]fileres
}, },
}, nil }, nil
} }
// deriveIDFromDirectory generates an artifact ID from the given directory config. If an alias is provided, then
// the artifact ID is derived exclusively from the alias name and version. Otherwise, the artifact ID is derived
// from the path provided with an attempt to prune a prefix if a base is given. Since the contents of the directory
// are not considered, there is no semantic meaning to the artifact ID -- this is why the alias is preferred without
// consideration for the path.
func deriveIDFromDirectory(cfg Config) artifact.ID {
var info string
if !cfg.Alias.IsEmpty() {
// don't use any of the path information -- instead use the alias name and version as the artifact ID.
// why? this allows the user to set a dependable stable value for the artifact ID in case the
// scanning root changes (e.g. a user scans a directory, then moves it to a new location and scans again).
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
} else {
log.Warn("no explicit name and version provided for directory source, deriving artifact ID from the given path (which is not ideal)")
info = cleanDirPath(cfg.Path, cfg.Base)
}
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(filepath.Clean(info)).String())
}
func cleanDirPath(path, base string) string {
if path == base {
return path
}
if base != "" {
cleanRoot, rootErr := fileresolver.NormalizeRootDirectory(path)
cleanBase, baseErr := fileresolver.NormalizeBaseDirectory(base)
if rootErr == nil && baseErr == nil {
// allows for normalizing inputs:
// cleanRoot: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001/some/path
// cleanBase: /var/folders/8x/gw98pp6535s4r8drc374tb1r0000gn/T/TestDirectoryEncoder1121632790/001
// normalized: some/path
relPath, err := filepath.Rel(cleanBase, cleanRoot)
if err == nil {
path = relPath
}
// this is odd, but this means we can't use base
}
// if the base is not a valid chroot, then just use the path as-is
}
return path
}

View File

@ -6,7 +6,6 @@ import (
"fmt" "fmt"
"os" "os"
"path" "path"
"path/filepath"
"sync" "sync"
"github.com/opencontainers/go-digest" "github.com/opencontainers/go-digest"
@ -50,7 +49,13 @@ func NewFromPath(path string) (source.Source, error) {
} }
func New(cfg Config) (source.Source, error) { func New(cfg Config) (source.Source, error) {
fileMeta, err := os.Stat(cfg.Path) f, err := os.Open(cfg.Path)
if err != nil {
return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
}
defer f.Close()
fileMeta, err := f.Stat()
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err) return nil, fmt.Errorf("unable to stat path=%q: %w", cfg.Path, err)
} }
@ -59,33 +64,19 @@ func New(cfg Config) (source.Source, error) {
return nil, fmt.Errorf("given path is a directory: %q", cfg.Path) return nil, fmt.Errorf("given path is a directory: %q", cfg.Path)
} }
analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
if err != nil {
return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
}
var digests []file.Digest var digests []file.Digest
if len(cfg.DigestAlgorithms) > 0 { if len(cfg.DigestAlgorithms) > 0 {
fh, err := os.Open(cfg.Path) digests, err = intFile.NewDigestsFromFile(context.TODO(), f, cfg.DigestAlgorithms)
if err != nil {
return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err)
}
defer fh.Close()
digests, err = intFile.NewDigestsFromFile(context.TODO(), fh, cfg.DigestAlgorithms)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err) return nil, fmt.Errorf("unable to calculate digests for file=%q: %w", cfg.Path, err)
} }
} }
fh, err := os.Open(cfg.Path) analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to open file=%q: %w", cfg.Path, err) return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
} }
defer fh.Close()
id, versionDigest := deriveIDFromFile(cfg) id, versionDigest := deriveIDFromFile(cfg)
return &fileSource{ return &fileSource{
@ -96,26 +87,10 @@ func New(cfg Config) (source.Source, error) {
analysisPath: analysisPath, analysisPath: analysisPath,
digestForVersion: versionDigest, digestForVersion: versionDigest,
digests: digests, digests: digests,
mimeType: stereoFile.MIMEType(fh), mimeType: stereoFile.MIMEType(f),
}, nil }, nil
} }
// deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included
// in the ID derivation (along with contents). This way if the user scans the same item but is considered to be
// logically different, then ID will express that.
func deriveIDFromFile(cfg Config) (artifact.ID, string) {
d := digestOfFileContents(cfg.Path)
info := d
if !cfg.Alias.IsEmpty() {
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
// scans the same item but is considered to be logically different, then ID will express that.
info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version)
}
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
}
func (s fileSource) ID() artifact.ID { func (s fileSource) ID() artifact.ID {
return s.id return s.id
} }
@ -168,52 +143,56 @@ func (s fileSource) FileResolver(_ source.Scope) (file.Resolver, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err) return nil, fmt.Errorf("unable to stat path=%q: %w", s.analysisPath, err)
} }
isArchiveAnalysis := fi.IsDir()
absParentDir, err := absoluteSymlinkFreePathToParent(s.analysisPath) if isArchiveAnalysis := fi.IsDir(); isArchiveAnalysis {
if err != nil {
return nil, err
}
if isArchiveAnalysis {
// this is an analysis of an archive file... we should scan the directory where the archive contents // this is an analysis of an archive file... we should scan the directory where the archive contents
res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...) res, err := fileresolver.NewFromDirectory(s.analysisPath, "", exclusionFunctions...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create directory resolver: %w", err) return nil, fmt.Errorf("unable to create directory resolver: %w", err)
} }
s.resolver = res s.resolver = res
return s.resolver, nil return s.resolver, nil
} }
// This is analysis of a single file. Use file indexer. // This is analysis of a single file. Use file indexer.
res, err := fileresolver.NewFromFile(absParentDir, s.analysisPath, exclusionFunctions...) res, err := fileresolver.NewFromFile(s.analysisPath, exclusionFunctions...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to create file resolver: %w", err) return nil, fmt.Errorf("unable to create file resolver: %w", err)
} }
s.resolver = res s.resolver = res
return s.resolver, nil return s.resolver, nil
} }
func absoluteSymlinkFreePathToParent(path string) (string, error) {
absAnalysisPath, err := filepath.Abs(path)
if err != nil {
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
}
dereferencedAbsAnalysisPath, err := filepath.EvalSymlinks(absAnalysisPath)
if err != nil {
return "", fmt.Errorf("unable to get absolute path for analysis path=%q: %w", path, err)
}
return filepath.Dir(dereferencedAbsAnalysisPath), nil
}
func (s *fileSource) Close() error { func (s *fileSource) Close() error {
s.mutex.Lock()
defer s.mutex.Unlock()
if s.closer == nil { if s.closer == nil {
return nil return nil
} }
s.resolver = nil s.resolver = nil
return s.closer() return s.closer()
} }
// deriveIDFromFile derives an artifact ID from the contents of a file. If an alias is provided, it will be included
// in the ID derivation (along with contents). This way if the user scans the same item but is considered to be
// logically different, then ID will express that.
func deriveIDFromFile(cfg Config) (artifact.ID, string) {
d := digestOfFileContents(cfg.Path)
info := d
if !cfg.Alias.IsEmpty() {
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
// scans the same item but is considered to be logically different, then ID will express that.
info += fmt.Sprintf(":%s@%s", cfg.Alias.Name, cfg.Alias.Version)
}
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String()), d
}
// fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive // fileAnalysisPath returns the path given, or in the case the path is an archive, the location where the archive
// contents have been made available. A cleanup function is provided for any temp files created (if any). // contents have been made available. A cleanup function is provided for any temp files created (if any).
// Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where // Users can disable unpacking archives, allowing individual cataloguers to extract them instead (where
@ -253,15 +232,17 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
} }
func digestOfFileContents(path string) string { func digestOfFileContents(path string) string {
file, err := os.Open(path) f, err := os.Open(path)
if err != nil { if err != nil {
return digest.SHA256.FromString(path).String() return digest.SHA256.FromString(path).String()
} }
defer file.Close() defer f.Close()
di, err := digest.SHA256.FromReader(file)
di, err := digest.SHA256.FromReader(f)
if err != nil { if err != nil {
return digest.SHA256.FromString(path).String() return digest.SHA256.FromString(path).String()
} }
return di.String() return di.String()
} }