syft/syft/file/digest_cataloger.go
Alex Goodman 3323ce2b6b
[wip] api refactor
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
2022-03-11 21:26:33 -05:00

104 lines
2.8 KiB
Go

package file
import (
"crypto"
"errors"
"fmt"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/event/monitor"
"hash"
"io"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/source"
)
var errUndigestableFile = errors.New("undigestable file")
type DigestsCataloger struct {
hashes []crypto.Hash
}
func NewDigestsCataloger(hashes []crypto.Hash) (*DigestsCataloger, error) {
return &DigestsCataloger{
hashes: hashes,
}, nil
}
func (i *DigestsCataloger) Catalog(resolver source.FileResolver) (map[source.Coordinates][]Digest, error) {
results := make(map[source.Coordinates][]Digest)
locations := allRegularFiles(resolver)
stage, prog := monitor.FileDigesterMonitor(int64(len(locations)))
for _, location := range locations {
stage.Current = location.RealPath
result, err := i.catalogLocation(resolver, location)
if errors.Is(err, errUndigestableFile) {
continue
}
if internal.IsErrPathPermission(err) {
log.Debugf("file digests cataloger skipping %q: %+v", location.RealPath, err)
continue
}
if err != nil {
return nil, err
}
prog.N++
results[location.Coordinates] = result
}
log.Debugf("file digests cataloger processed %d files", prog.N)
prog.SetCompleted()
return results, nil
}
func (i *DigestsCataloger) catalogLocation(resolver source.FileResolver, location source.Location) ([]Digest, error) {
meta, err := resolver.FileMetadataByLocation(location)
if err != nil {
return nil, err
}
// we should only attempt to report digests for files that are regular files (don't attempt to resolve links)
if meta.Type != source.RegularFile {
return nil, errUndigestableFile
}
contentReader, err := resolver.FileContentsByLocation(location)
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(contentReader, location.VirtualPath)
// create a set of hasher objects tied together with a single writer to feed content into
hashers := make([]hash.Hash, len(i.hashes))
writers := make([]io.Writer, len(i.hashes))
for idx, hashObj := range i.hashes {
hashers[idx] = hashObj.New()
writers[idx] = hashers[idx]
}
size, err := io.Copy(io.MultiWriter(writers...), contentReader)
if err != nil {
return nil, internal.ErrPath{Context: "digests-cataloger", Path: location.RealPath, Err: err}
}
if size == 0 {
return make([]Digest, 0), nil
}
result := make([]Digest, len(i.hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only
// file type but a body is still allowed.
for idx, hasher := range hashers {
result[idx] = Digest{
Algorithm: DigestAlgorithmName(i.hashes[idx]),
Value: fmt.Sprintf("%+x", hasher.Sum(nil)),
}
}
return result, nil
}