mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
migrate syft/cataloger to syft/pkg/cataloger
Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
parent
cb5e7d0e08
commit
4666ca8469
@ -1,118 +0,0 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
type testResolverMock struct {
|
||||
contents map[source.Location]io.ReadCloser
|
||||
}
|
||||
|
||||
func newTestResolver() *testResolverMock {
|
||||
return &testResolverMock{
|
||||
contents: make(map[source.Location]io.ReadCloser),
|
||||
}
|
||||
}
|
||||
|
||||
func (r testResolverMock) HasPath(path string) bool {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (r *testResolverMock) FileContentsByLocation(_ source.Location) (io.ReadCloser, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (r *testResolverMock) MultipleFileContentsByLocation([]source.Location) (map[source.Location]io.ReadCloser, error) {
|
||||
return r.contents, nil
|
||||
}
|
||||
|
||||
func (r *testResolverMock) FilesByPath(paths ...string) ([]source.Location, error) {
|
||||
results := make([]source.Location, len(paths))
|
||||
|
||||
for idx, p := range paths {
|
||||
results[idx] = source.NewLocation(p)
|
||||
r.contents[results[idx]] = ioutil.NopCloser(strings.NewReader(fmt.Sprintf("%s file contents!", p)))
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (r *testResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
|
||||
path := "/a-path.txt"
|
||||
location := source.NewLocation(path)
|
||||
r.contents[location] = ioutil.NopCloser(strings.NewReader(fmt.Sprintf("%s file contents!", path)))
|
||||
return []source.Location{location}, nil
|
||||
}
|
||||
|
||||
func (r *testResolverMock) RelativeFileByPath(_ source.Location, _ string) *source.Location {
|
||||
panic(fmt.Errorf("not implemented"))
|
||||
return nil
|
||||
}
|
||||
|
||||
func parser(_ string, reader io.Reader) ([]pkg.Package, error) {
|
||||
contents, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return []pkg.Package{
|
||||
{
|
||||
Name: string(contents),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func TestGenericCataloger(t *testing.T) {
|
||||
|
||||
globParsers := map[string]ParserFn{
|
||||
"**a-path.txt": parser,
|
||||
}
|
||||
pathParsers := map[string]ParserFn{
|
||||
"/another-path.txt": parser,
|
||||
"/last/path.txt": parser,
|
||||
}
|
||||
upstream := "some-other-cataloger"
|
||||
resolver := newTestResolver()
|
||||
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
|
||||
|
||||
expectedSelection := []string{"/last/path.txt", "/another-path.txt", "/a-path.txt"}
|
||||
expectedPkgs := make(map[string]pkg.Package)
|
||||
for _, path := range expectedSelection {
|
||||
expectedPkgs[path] = pkg.Package{
|
||||
FoundBy: upstream,
|
||||
Name: fmt.Sprintf("%s file contents!", path),
|
||||
}
|
||||
}
|
||||
|
||||
actualPkgs, err := cataloger.Catalog(resolver)
|
||||
if err != nil {
|
||||
t.Fatalf("cataloger catalog action failed: %+v", err)
|
||||
}
|
||||
|
||||
if len(actualPkgs) != len(expectedPkgs) {
|
||||
t.Fatalf("unexpected packages len: %d", len(actualPkgs))
|
||||
}
|
||||
|
||||
for _, p := range actualPkgs {
|
||||
ref := p.Locations[0]
|
||||
exP, ok := expectedPkgs[ref.RealPath]
|
||||
if !ok {
|
||||
t.Errorf("missing expected pkg: ref=%+v", ref)
|
||||
continue
|
||||
}
|
||||
|
||||
if p.FoundBy != exP.FoundBy {
|
||||
t.Errorf("bad upstream: %s", p.FoundBy)
|
||||
}
|
||||
|
||||
if exP.Name != p.Name {
|
||||
t.Errorf("bad contents mapping: %+v", p.Locations)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,190 +0,0 @@
|
||||
/*
|
||||
Package dpkg provides a concrete Cataloger implementation for Debian package DB status files.
|
||||
*/
|
||||
package deb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
const (
|
||||
md5sumsExt = ".md5sums"
|
||||
docsPath = "/usr/share/doc"
|
||||
)
|
||||
|
||||
type Cataloger struct{}
|
||||
|
||||
// NewDpkgdbCataloger returns a new Deb package cataloger object.
|
||||
func NewDpkgdbCataloger() *Cataloger {
|
||||
return &Cataloger{}
|
||||
}
|
||||
|
||||
// Name returns a string that uniquely describes a cataloger
|
||||
func (c *Cataloger) Name() string {
|
||||
return "dpkgdb-cataloger"
|
||||
}
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files.
|
||||
// nolint:funlen
|
||||
func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
||||
dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDbGlob)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err)
|
||||
}
|
||||
|
||||
var results []pkg.Package
|
||||
var pkgs []pkg.Package
|
||||
for _, dbLocation := range dbFileMatches {
|
||||
dbContents, err := resolver.FileContentsByLocation(dbLocation)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pkgs, err = parseDpkgStatus(dbContents)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
|
||||
}
|
||||
|
||||
md5ContentsByName, md5RefsByName, err := fetchMd5Contents(resolver, dbLocation, pkgs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err)
|
||||
}
|
||||
|
||||
copyrightContentsByName, copyrightLocationByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err)
|
||||
}
|
||||
|
||||
for i := range pkgs {
|
||||
p := &pkgs[i]
|
||||
p.FoundBy = c.Name()
|
||||
p.Locations = []source.Location{dbLocation}
|
||||
|
||||
metadata := p.Metadata.(pkg.DpkgMetadata)
|
||||
|
||||
if md5Reader, ok := md5ContentsByName[md5Key(*p)]; ok {
|
||||
// attach the file list
|
||||
metadata.Files = parseDpkgMD5Info(md5Reader)
|
||||
|
||||
// keep a record of the file where this was discovered
|
||||
if ref, ok := md5RefsByName[md5Key(*p)]; ok {
|
||||
p.Locations = append(p.Locations, ref)
|
||||
}
|
||||
} else {
|
||||
// ensure the file list is an empty collection (not nil)
|
||||
metadata.Files = make([]pkg.DpkgFileRecord, 0)
|
||||
}
|
||||
|
||||
// persist alterations
|
||||
p.Metadata = metadata
|
||||
|
||||
copyrightReader, ok := copyrightContentsByName[p.Name]
|
||||
if ok {
|
||||
// attach the licenses
|
||||
p.Licenses = parseLicensesFromCopyright(copyrightReader)
|
||||
|
||||
// keep a record of the file where this was discovered
|
||||
if ref, ok := copyrightLocationByName[p.Name]; ok {
|
||||
p.Locations = append(p.Locations, ref)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, pkgs...)
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) {
|
||||
// fetch all MD5 file contents. This approach is more efficient than fetching each MD5 file one at a time
|
||||
|
||||
var md5FileMatches []source.Location
|
||||
var nameByRef = make(map[source.Location]string)
|
||||
parentPath := filepath.Dir(dbLocation.RealPath)
|
||||
|
||||
for _, p := range pkgs {
|
||||
// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
|
||||
name := md5Key(p)
|
||||
md5SumLocation := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt))
|
||||
|
||||
if md5SumLocation == nil {
|
||||
// the most specific key did not work, fallback to just the name
|
||||
// look for /var/lib/dpkg/info/NAME.md5sums
|
||||
md5SumLocation = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+md5sumsExt))
|
||||
}
|
||||
// we should have at least one reference
|
||||
if md5SumLocation != nil {
|
||||
md5FileMatches = append(md5FileMatches, *md5SumLocation)
|
||||
nameByRef[*md5SumLocation] = name
|
||||
}
|
||||
}
|
||||
|
||||
// fetch the md5 contents
|
||||
md5ContentsByLocation, err := resolver.MultipleFileContentsByLocation(md5FileMatches)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// organize content results and refs by a combination of name and architecture
|
||||
var contentsByName = make(map[string]io.Reader)
|
||||
var locationByName = make(map[string]source.Location)
|
||||
for location, contents := range md5ContentsByLocation {
|
||||
name := nameByRef[location]
|
||||
contentsByName[name] = contents
|
||||
locationByName[name] = location
|
||||
}
|
||||
|
||||
return contentsByName, locationByName, nil
|
||||
}
|
||||
|
||||
func fetchCopyrightContents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) {
|
||||
// fetch all copyright file contents. This approach is more efficient than fetching each copyright file one at a time
|
||||
|
||||
var copyrightFileMatches []source.Location
|
||||
var nameByLocation = make(map[source.Location]string)
|
||||
for _, p := range pkgs {
|
||||
// look for /usr/share/docs/NAME/copyright files
|
||||
name := p.Name
|
||||
copyrightPath := path.Join(docsPath, name, "copyright")
|
||||
copyrightLocation := resolver.RelativeFileByPath(dbLocation, copyrightPath)
|
||||
|
||||
// we may not have a copyright file for each package, ignore missing files
|
||||
if copyrightLocation != nil {
|
||||
copyrightFileMatches = append(copyrightFileMatches, *copyrightLocation)
|
||||
nameByLocation[*copyrightLocation] = name
|
||||
}
|
||||
}
|
||||
|
||||
// fetch the copyright contents
|
||||
copyrightContentsByLocation, err := resolver.MultipleFileContentsByLocation(copyrightFileMatches)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// organize content results and refs by package name
|
||||
var contentsByName = make(map[string]io.Reader)
|
||||
var refsByName = make(map[string]source.Location)
|
||||
for location, contents := range copyrightContentsByLocation {
|
||||
name := nameByLocation[location]
|
||||
contentsByName[name] = contents
|
||||
refsByName[name] = location
|
||||
}
|
||||
|
||||
return contentsByName, refsByName, nil
|
||||
}
|
||||
|
||||
func md5Key(p pkg.Package) string {
|
||||
metadata := p.Metadata.(pkg.DpkgMetadata)
|
||||
|
||||
contentKey := p.Name
|
||||
if metadata.Architecture != "" && metadata.Architecture != "all" {
|
||||
contentKey = contentKey + ":" + metadata.Architecture
|
||||
}
|
||||
return contentKey
|
||||
}
|
||||
@ -1,5 +0,0 @@
|
||||
#Generated by Maven
|
||||
#Tue Jul 07 18:59:56 GMT 2020
|
||||
groupId:org.anchore
|
||||
artifactId: example-java=app-maven
|
||||
version: 0.1.0=something
|
||||
@ -1,5 +0,0 @@
|
||||
#Generated by Maven
|
||||
#Tue Jul 07 18:59:56 GMT 2020
|
||||
groupId:org.anchore
|
||||
artifactId: example-java-app-maven
|
||||
version: 0.1.0
|
||||
@ -1,5 +0,0 @@
|
||||
#Generated by Maven
|
||||
#Tue Jul 07 18:59:56 GMT 2020
|
||||
groupId=org.anchore
|
||||
artifactId= example-java:app-maven
|
||||
version= 0.1.0:something
|
||||
@ -1,49 +0,0 @@
|
||||
package python
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
type packageEntry struct {
|
||||
Metadata source.FileData
|
||||
FileRecord *source.FileData
|
||||
TopPackage *source.FileData
|
||||
}
|
||||
|
||||
// newPackageEntry returns a new packageEntry to be processed relative to what information is available in the given FileResolver.
|
||||
func newPackageEntry(resolver source.FileResolver, metadataLocation source.Location) *packageEntry {
|
||||
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
|
||||
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
|
||||
// to reconcile the RECORD path to the same layer (or a lower layer). The same is true with the top_level.txt file.
|
||||
|
||||
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
|
||||
recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
|
||||
recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath)
|
||||
|
||||
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
|
||||
parentDir := filepath.Dir(metadataLocation.RealPath)
|
||||
topLevelPath := filepath.Join(parentDir, "top_level.txt")
|
||||
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
|
||||
|
||||
// build an entry that will later be populated with contents when the request is executed
|
||||
entry := &packageEntry{
|
||||
Metadata: source.FileData{
|
||||
Location: metadataLocation,
|
||||
},
|
||||
}
|
||||
|
||||
if recordLocation != nil {
|
||||
entry.FileRecord = &source.FileData{
|
||||
Location: *recordLocation,
|
||||
}
|
||||
}
|
||||
|
||||
if topLevelLocation != nil {
|
||||
entry.TopPackage = &source.FileData{
|
||||
Location: *topLevelLocation,
|
||||
}
|
||||
}
|
||||
return entry
|
||||
}
|
||||
@ -4,8 +4,8 @@ Package apkdb provides a concrete Cataloger implementation for Alpine DB files.
|
||||
package apkdb
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// NewApkdbCataloger returns a new Alpine DB cataloger object.
|
||||
@ -9,8 +9,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
"github.com/mitchellh/mapstructure"
|
||||
)
|
||||
|
||||
@ -18,13 +18,13 @@ type Monitor struct {
|
||||
PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers
|
||||
}
|
||||
|
||||
// newMonitor creates a new Monitor object and publishes the object on the bus as a CatalogerStarted event.
|
||||
// newMonitor creates a new Monitor object and publishes the object on the bus as a PackageCatalogerStarted event.
|
||||
func newMonitor() (*progress.Manual, *progress.Manual) {
|
||||
filesProcessed := progress.Manual{}
|
||||
packagesDiscovered := progress.Manual{}
|
||||
|
||||
bus.Publish(partybus.Event{
|
||||
Type: event.CatalogerStarted,
|
||||
Type: event.PackageCatalogerStarted,
|
||||
Value: Monitor{
|
||||
FilesProcessed: progress.Monitorable(&filesProcessed),
|
||||
PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
|
||||
@ -37,7 +37,7 @@ func newMonitor() (*progress.Manual, *progress.Manual) {
|
||||
// In order to efficiently retrieve contents from a underlying container image the content fetch requests are
|
||||
// done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single
|
||||
// request.
|
||||
func Catalog(resolver source.Resolver, theDistro *distro.Distro, catalogers ...Cataloger) (*pkg.Catalog, error) {
|
||||
func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers ...Cataloger) (*pkg.Catalog, error) {
|
||||
catalog := pkg.NewCatalog()
|
||||
|
||||
filesProcessed, packagesDiscovered := newMonitor()
|
||||
@ -6,16 +6,16 @@ catalogers defined in child packages as well as the interface definition to impl
|
||||
package cataloger
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloger/apkdb"
|
||||
"github.com/anchore/syft/syft/cataloger/deb"
|
||||
"github.com/anchore/syft/syft/cataloger/golang"
|
||||
"github.com/anchore/syft/syft/cataloger/java"
|
||||
"github.com/anchore/syft/syft/cataloger/javascript"
|
||||
"github.com/anchore/syft/syft/cataloger/python"
|
||||
"github.com/anchore/syft/syft/cataloger/rpmdb"
|
||||
"github.com/anchore/syft/syft/cataloger/ruby"
|
||||
"github.com/anchore/syft/syft/cataloger/rust"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/deb"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/golang"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/java"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/javascript"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/python"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/rpmdb"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/ruby"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/rust"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
@ -26,7 +26,7 @@ type Cataloger interface {
|
||||
// Name returns a string that uniquely describes a cataloger
|
||||
Name() string
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
|
||||
Catalog(resolver source.Resolver) ([]pkg.Package, error)
|
||||
Catalog(resolver source.FileResolver) ([]pkg.Package, error)
|
||||
}
|
||||
|
||||
// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages.
|
||||
@ -4,7 +4,7 @@ Package common provides generic utilities used by multiple catalogers.
|
||||
package common
|
||||
|
||||
import (
|
||||
"io"
|
||||
"fmt"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -16,8 +16,6 @@ import (
|
||||
type GenericCataloger struct {
|
||||
globParsers map[string]ParserFn
|
||||
pathParsers map[string]ParserFn
|
||||
selectedFiles []source.Location
|
||||
parsers map[source.Location]ParserFn
|
||||
upstreamCataloger string
|
||||
}
|
||||
|
||||
@ -26,8 +24,6 @@ func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string
|
||||
return &GenericCataloger{
|
||||
globParsers: globParsers,
|
||||
pathParsers: pathParsers,
|
||||
selectedFiles: make([]source.Location, 0),
|
||||
parsers: make(map[source.Location]ParserFn),
|
||||
upstreamCataloger: upstreamCataloger,
|
||||
}
|
||||
}
|
||||
@ -37,74 +33,22 @@ func (c *GenericCataloger) Name() string {
|
||||
return c.upstreamCataloger
|
||||
}
|
||||
|
||||
// register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved)
|
||||
func (c *GenericCataloger) register(files []source.Location, parser ParserFn) {
|
||||
c.selectedFiles = append(c.selectedFiles, files...)
|
||||
for _, f := range files {
|
||||
c.parsers[f] = parser
|
||||
}
|
||||
}
|
||||
|
||||
// clear deletes all registered file-reference-to-parser-function pairings from former SelectFiles() and register() calls
|
||||
func (c *GenericCataloger) clear() {
|
||||
c.selectedFiles = make([]source.Location, 0)
|
||||
c.parsers = make(map[source.Location]ParserFn)
|
||||
}
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
|
||||
func (c *GenericCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
|
||||
fileSelection := c.selectFiles(resolver)
|
||||
contents, err := resolver.MultipleFileContentsByLocation(fileSelection)
|
||||
func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, error) {
|
||||
var packages []pkg.Package
|
||||
parserByLocation := c.selectFiles(resolver)
|
||||
|
||||
for location, parser := range parserByLocation {
|
||||
content, err := resolver.FileContentsByLocation(location)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return c.catalog(contents)
|
||||
}
|
||||
|
||||
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
|
||||
func (c *GenericCataloger) selectFiles(resolver source.FileResolver) []source.Location {
|
||||
// select by exact path
|
||||
for path, parser := range c.pathParsers {
|
||||
files, err := resolver.FilesByPath(path)
|
||||
if err != nil {
|
||||
log.Warnf("cataloger failed to select files by path: %+v", err)
|
||||
}
|
||||
if files != nil {
|
||||
c.register(files, parser)
|
||||
}
|
||||
}
|
||||
|
||||
// select by glob pattern
|
||||
for globPattern, parser := range c.globParsers {
|
||||
fileMatches, err := resolver.FilesByGlob(globPattern)
|
||||
if err != nil {
|
||||
log.Warnf("failed to find files by glob: %s", globPattern)
|
||||
}
|
||||
if fileMatches != nil {
|
||||
c.register(fileMatches, parser)
|
||||
}
|
||||
}
|
||||
|
||||
return c.selectedFiles
|
||||
}
|
||||
|
||||
// catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages
|
||||
func (c *GenericCataloger) catalog(contents map[source.Location]io.ReadCloser) ([]pkg.Package, error) {
|
||||
defer c.clear()
|
||||
|
||||
packages := make([]pkg.Package, 0)
|
||||
|
||||
for location, parser := range c.parsers {
|
||||
content, ok := contents[location]
|
||||
if !ok {
|
||||
log.Warnf("cataloger '%s' missing file content: %+v", c.upstreamCataloger, location)
|
||||
continue
|
||||
// TODO: fail or log?
|
||||
return nil, fmt.Errorf("unable to fetch contents for location=%v : %w", location, err)
|
||||
}
|
||||
|
||||
entries, err := parser(location.RealPath, content)
|
||||
if err != nil {
|
||||
// TODO: should we fail? or only log?
|
||||
log.Warnf("cataloger '%s' failed to parse entries (%+v): %+v", c.upstreamCataloger, location, err)
|
||||
log.Warnf("cataloger '%s' failed to parse entries (location=%+v): %+v", c.upstreamCataloger, location, err)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -115,6 +59,34 @@ func (c *GenericCataloger) catalog(contents map[source.Location]io.ReadCloser) (
|
||||
packages = append(packages, entry)
|
||||
}
|
||||
}
|
||||
|
||||
return packages, nil
|
||||
}
|
||||
|
||||
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
|
||||
func (c *GenericCataloger) selectFiles(resolver source.FilePathResolver) map[source.Location]ParserFn {
|
||||
var parserByLocation = make(map[source.Location]ParserFn)
|
||||
|
||||
// select by exact path
|
||||
for path, parser := range c.pathParsers {
|
||||
files, err := resolver.FilesByPath(path)
|
||||
if err != nil {
|
||||
log.Warnf("cataloger failed to select files by path: %+v", err)
|
||||
}
|
||||
for _, f := range files {
|
||||
parserByLocation[f] = parser
|
||||
}
|
||||
}
|
||||
|
||||
// select by glob pattern
|
||||
for globPattern, parser := range c.globParsers {
|
||||
fileMatches, err := resolver.FilesByGlob(globPattern)
|
||||
if err != nil {
|
||||
log.Warnf("failed to find files by glob: %s", globPattern)
|
||||
}
|
||||
for _, f := range fileMatches {
|
||||
parserByLocation[f] = parser
|
||||
}
|
||||
}
|
||||
|
||||
return parserByLocation
|
||||
}
|
||||
73
syft/pkg/cataloger/common/generic_cataloger_test.go
Normal file
73
syft/pkg/cataloger/common/generic_cataloger_test.go
Normal file
@ -0,0 +1,73 @@
|
||||
package common
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"testing"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func parser(_ string, reader io.Reader) ([]pkg.Package, error) {
|
||||
contents, err := ioutil.ReadAll(reader)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return []pkg.Package{
|
||||
{
|
||||
Name: string(contents),
|
||||
},
|
||||
}, nil
|
||||
}
|
||||
|
||||
func TestGenericCataloger(t *testing.T) {
|
||||
|
||||
globParsers := map[string]ParserFn{
|
||||
"**a-path.txt": parser,
|
||||
}
|
||||
pathParsers := map[string]ParserFn{
|
||||
"test-fixtures/another-path.txt": parser,
|
||||
"test-fixtures/last/path.txt": parser,
|
||||
}
|
||||
upstream := "some-other-cataloger"
|
||||
|
||||
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}
|
||||
resolver := source.NewMockResolverForPaths(expectedSelection...)
|
||||
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
|
||||
|
||||
expectedPkgs := make(map[string]pkg.Package)
|
||||
for _, path := range expectedSelection {
|
||||
expectedPkgs[path] = pkg.Package{
|
||||
FoundBy: upstream,
|
||||
Name: fmt.Sprintf("%s file contents!", path),
|
||||
}
|
||||
}
|
||||
|
||||
actualPkgs, err := cataloger.Catalog(resolver)
|
||||
if err != nil {
|
||||
t.Fatalf("cataloger catalog action failed: %+v", err)
|
||||
}
|
||||
|
||||
if len(actualPkgs) != len(expectedPkgs) {
|
||||
t.Fatalf("unexpected packages len: %d", len(actualPkgs))
|
||||
}
|
||||
|
||||
for _, p := range actualPkgs {
|
||||
ref := p.Locations[0]
|
||||
exP, ok := expectedPkgs[ref.RealPath]
|
||||
if !ok {
|
||||
t.Errorf("missing expected pkg: ref=%+v", ref)
|
||||
continue
|
||||
}
|
||||
|
||||
if p.FoundBy != exP.FoundBy {
|
||||
t.Errorf("bad upstream: %s", p.FoundBy)
|
||||
}
|
||||
|
||||
if exP.Name != p.Name {
|
||||
t.Errorf("bad contents mapping: %+v", p.Locations)
|
||||
}
|
||||
}
|
||||
}
|
||||
1
syft/pkg/cataloger/common/test-fixtures/a-path.txt
Normal file
1
syft/pkg/cataloger/common/test-fixtures/a-path.txt
Normal file
@ -0,0 +1 @@
|
||||
test-fixtures/a-path.txt file contents!
|
||||
1
syft/pkg/cataloger/common/test-fixtures/another-path.txt
Normal file
1
syft/pkg/cataloger/common/test-fixtures/another-path.txt
Normal file
@ -0,0 +1 @@
|
||||
test-fixtures/another-path.txt file contents!
|
||||
1
syft/pkg/cataloger/common/test-fixtures/last/path.txt
Normal file
1
syft/pkg/cataloger/common/test-fixtures/last/path.txt
Normal file
@ -0,0 +1 @@
|
||||
test-fixtures/last/path.txt file contents!
|
||||
156
syft/pkg/cataloger/deb/cataloger.go
Normal file
156
syft/pkg/cataloger/deb/cataloger.go
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
Package dpkg provides a concrete Cataloger implementation for Debian package DB status files.
|
||||
*/
|
||||
package deb
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
const (
|
||||
md5sumsExt = ".md5sums"
|
||||
docsPath = "/usr/share/doc"
|
||||
)
|
||||
|
||||
type Cataloger struct{}
|
||||
|
||||
// NewDpkgdbCataloger returns a new Deb package cataloger object.
|
||||
func NewDpkgdbCataloger() *Cataloger {
|
||||
return &Cataloger{}
|
||||
}
|
||||
|
||||
// Name returns a string that uniquely describes a cataloger
|
||||
func (c *Cataloger) Name() string {
|
||||
return "dpkgdb-cataloger"
|
||||
}
|
||||
|
||||
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files.
|
||||
// nolint:funlen
|
||||
func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, error) {
|
||||
dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDbGlob)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err)
|
||||
}
|
||||
|
||||
var results []pkg.Package
|
||||
var pkgs []pkg.Package
|
||||
for _, dbLocation := range dbFileMatches {
|
||||
dbContents, err := resolver.FileContentsByLocation(dbLocation)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pkgs, err = parseDpkgStatus(dbContents)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
|
||||
}
|
||||
|
||||
for i := range pkgs {
|
||||
p := &pkgs[i]
|
||||
p.FoundBy = c.Name()
|
||||
p.Locations = []source.Location{dbLocation}
|
||||
|
||||
metadata := p.Metadata.(pkg.DpkgMetadata)
|
||||
|
||||
md5Reader, md5Location, err := fetchMd5Contents(resolver, dbLocation, p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err)
|
||||
}
|
||||
|
||||
if md5Reader != nil {
|
||||
// attach the file list
|
||||
metadata.Files = parseDpkgMD5Info(md5Reader)
|
||||
|
||||
// keep a record of the file where this was discovered
|
||||
if md5Location != nil {
|
||||
p.Locations = append(p.Locations, *md5Location)
|
||||
}
|
||||
} else {
|
||||
// ensure the file list is an empty collection (not nil)
|
||||
metadata.Files = make([]pkg.DpkgFileRecord, 0)
|
||||
}
|
||||
|
||||
// persist alterations
|
||||
p.Metadata = metadata
|
||||
|
||||
// get license information from the copyright file
|
||||
copyrightReader, copyrightLocation, err := fetchCopyrightContents(resolver, dbLocation, p)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err)
|
||||
}
|
||||
|
||||
if copyrightReader != nil {
|
||||
// attach the licenses
|
||||
p.Licenses = parseLicensesFromCopyright(copyrightReader)
|
||||
|
||||
// keep a record of the file where this was discovered
|
||||
if copyrightLocation != nil {
|
||||
p.Locations = append(p.Locations, *copyrightLocation)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results = append(results, pkgs...)
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func fetchMd5Contents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.Reader, *source.Location, error) {
|
||||
parentPath := filepath.Dir(dbLocation.RealPath)
|
||||
|
||||
// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
|
||||
name := md5Key(p)
|
||||
md5SumLocation := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt))
|
||||
|
||||
if md5SumLocation == nil {
|
||||
// the most specific key did not work, fallback to just the name
|
||||
// look for /var/lib/dpkg/info/NAME.md5sums
|
||||
md5SumLocation = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+md5sumsExt))
|
||||
}
|
||||
|
||||
// this is unexpected, but not a show-stopper
|
||||
if md5SumLocation == nil {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
reader, err := resolver.FileContentsByLocation(*md5SumLocation)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to fetch deb md5 contents (%+v): %w", p, err)
|
||||
}
|
||||
return reader, md5SumLocation, nil
|
||||
}
|
||||
|
||||
func fetchCopyrightContents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.Reader, *source.Location, error) {
|
||||
// look for /usr/share/docs/NAME/copyright files
|
||||
name := p.Name
|
||||
copyrightPath := path.Join(docsPath, name, "copyright")
|
||||
copyrightLocation := resolver.RelativeFileByPath(dbLocation, copyrightPath)
|
||||
|
||||
// we may not have a copyright file for each package, ignore missing files
|
||||
if copyrightLocation == nil {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
reader, err := resolver.FileContentsByLocation(*copyrightLocation)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to fetch deb copyright contents (%+v): %w", p, err)
|
||||
}
|
||||
|
||||
return reader, copyrightLocation, nil
|
||||
}
|
||||
|
||||
func md5Key(p *pkg.Package) string {
|
||||
metadata := p.Metadata.(pkg.DpkgMetadata)
|
||||
|
||||
contentKey := p.Name
|
||||
if metadata.Architecture != "" && metadata.Architecture != "all" {
|
||||
contentKey = contentKey + ":" + metadata.Architecture
|
||||
}
|
||||
return contentKey
|
||||
}
|
||||
@ -51,17 +51,21 @@ func TestDpkgCataloger(t *testing.T) {
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-dpkg")
|
||||
defer cleanup()
|
||||
img := imagetest.GetFixtureImage(t, "docker-archive", "image-dpkg")
|
||||
|
||||
s, err := source.NewFromImage(img, source.SquashedScope, "")
|
||||
s, err := source.NewFromImage(img, "")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
c := NewDpkgdbCataloger()
|
||||
|
||||
actual, err := c.Catalog(s.Resolver)
|
||||
resolver, err := s.FileResolver(source.SquashedScope)
|
||||
if err != nil {
|
||||
t.Errorf("could not get resolver error: %+v", err)
|
||||
}
|
||||
|
||||
actual, err := c.Catalog(resolver)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to catalog: %+v", err)
|
||||
}
|
||||
@ -4,7 +4,7 @@ Package golang provides a concrete Cataloger implementation for go.mod files.
|
||||
package golang
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// NewGoModCataloger returns a new Go module cataloger object.
|
||||
@ -9,8 +9,8 @@ import (
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// integrity check
|
||||
@ -4,7 +4,7 @@ Package java provides a concrete Cataloger implementation for Java archives (jar
|
||||
package java
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// NewJavaCataloger returns a new Java archive cataloger object.
|
||||
@ -1,4 +1,4 @@
|
||||
/packages/*
|
||||
/packages/sb
|
||||
*.fingerprint
|
||||
# maven when running in a volume may spit out directories like this
|
||||
**/\?/
|
||||
@ -4,7 +4,7 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e
|
||||
package javascript
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// NewJavascriptPackageCataloger returns a new JavaScript cataloger object based on detection of npm based packages.
|
||||
@ -13,8 +13,8 @@ import (
|
||||
|
||||
"github.com/mitchellh/mapstructure"
|
||||
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// integrity check
|
||||
@ -161,7 +161,7 @@ func licensesFromJSON(p PackageJSON) ([]string, error) {
|
||||
return nil, fmt.Errorf("unable to parse license field: %w", err)
|
||||
}
|
||||
|
||||
// parsePackageJson parses a package.json and returns the discovered JavaScript packages.
|
||||
// parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
|
||||
func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
|
||||
packages := make([]pkg.Package, 0)
|
||||
dec := json.NewDecoder(reader)
|
||||
@ -5,8 +5,8 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// integrity check
|
||||
@ -8,8 +8,8 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/cataloger/common"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/common"
|
||||
)
|
||||
|
||||
// integrity check
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user