migrate syft/cataloger to syft/pkg/cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-03-18 08:48:20 -04:00
parent cb5e7d0e08
commit 4666ca8469
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
157 changed files with 394 additions and 563 deletions

View File

@ -1,118 +0,0 @@
package common
import (
"fmt"
"io"
"io/ioutil"
"strings"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
type testResolverMock struct {
contents map[source.Location]io.ReadCloser
}
func newTestResolver() *testResolverMock {
return &testResolverMock{
contents: make(map[source.Location]io.ReadCloser),
}
}
func (r testResolverMock) HasPath(path string) bool {
panic("not implemented")
}
func (r *testResolverMock) FileContentsByLocation(_ source.Location) (io.ReadCloser, error) {
return nil, fmt.Errorf("not implemented")
}
func (r *testResolverMock) MultipleFileContentsByLocation([]source.Location) (map[source.Location]io.ReadCloser, error) {
return r.contents, nil
}
func (r *testResolverMock) FilesByPath(paths ...string) ([]source.Location, error) {
results := make([]source.Location, len(paths))
for idx, p := range paths {
results[idx] = source.NewLocation(p)
r.contents[results[idx]] = ioutil.NopCloser(strings.NewReader(fmt.Sprintf("%s file contents!", p)))
}
return results, nil
}
func (r *testResolverMock) FilesByGlob(_ ...string) ([]source.Location, error) {
path := "/a-path.txt"
location := source.NewLocation(path)
r.contents[location] = ioutil.NopCloser(strings.NewReader(fmt.Sprintf("%s file contents!", path)))
return []source.Location{location}, nil
}
func (r *testResolverMock) RelativeFileByPath(_ source.Location, _ string) *source.Location {
panic(fmt.Errorf("not implemented"))
return nil
}
func parser(_ string, reader io.Reader) ([]pkg.Package, error) {
contents, err := ioutil.ReadAll(reader)
if err != nil {
panic(err)
}
return []pkg.Package{
{
Name: string(contents),
},
}, nil
}
func TestGenericCataloger(t *testing.T) {
globParsers := map[string]ParserFn{
"**a-path.txt": parser,
}
pathParsers := map[string]ParserFn{
"/another-path.txt": parser,
"/last/path.txt": parser,
}
upstream := "some-other-cataloger"
resolver := newTestResolver()
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
expectedSelection := []string{"/last/path.txt", "/another-path.txt", "/a-path.txt"}
expectedPkgs := make(map[string]pkg.Package)
for _, path := range expectedSelection {
expectedPkgs[path] = pkg.Package{
FoundBy: upstream,
Name: fmt.Sprintf("%s file contents!", path),
}
}
actualPkgs, err := cataloger.Catalog(resolver)
if err != nil {
t.Fatalf("cataloger catalog action failed: %+v", err)
}
if len(actualPkgs) != len(expectedPkgs) {
t.Fatalf("unexpected packages len: %d", len(actualPkgs))
}
for _, p := range actualPkgs {
ref := p.Locations[0]
exP, ok := expectedPkgs[ref.RealPath]
if !ok {
t.Errorf("missing expected pkg: ref=%+v", ref)
continue
}
if p.FoundBy != exP.FoundBy {
t.Errorf("bad upstream: %s", p.FoundBy)
}
if exP.Name != p.Name {
t.Errorf("bad contents mapping: %+v", p.Locations)
}
}
}

View File

@ -1,190 +0,0 @@
/*
Package dpkg provides a concrete Cataloger implementation for Debian package DB status files.
*/
package deb
import (
"fmt"
"io"
"path"
"path/filepath"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
const (
md5sumsExt = ".md5sums"
docsPath = "/usr/share/doc"
)
type Cataloger struct{}
// NewDpkgdbCataloger returns a new Deb package cataloger object.
func NewDpkgdbCataloger() *Cataloger {
return &Cataloger{}
}
// Name returns a string that uniquely describes a cataloger
func (c *Cataloger) Name() string {
return "dpkgdb-cataloger"
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files.
// nolint:funlen
func (c *Cataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) {
dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDbGlob)
if err != nil {
return nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err)
}
var results []pkg.Package
var pkgs []pkg.Package
for _, dbLocation := range dbFileMatches {
dbContents, err := resolver.FileContentsByLocation(dbLocation)
if err != nil {
return nil, err
}
pkgs, err = parseDpkgStatus(dbContents)
if err != nil {
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
}
md5ContentsByName, md5RefsByName, err := fetchMd5Contents(resolver, dbLocation, pkgs)
if err != nil {
return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err)
}
copyrightContentsByName, copyrightLocationByName, err := fetchCopyrightContents(resolver, dbLocation, pkgs)
if err != nil {
return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err)
}
for i := range pkgs {
p := &pkgs[i]
p.FoundBy = c.Name()
p.Locations = []source.Location{dbLocation}
metadata := p.Metadata.(pkg.DpkgMetadata)
if md5Reader, ok := md5ContentsByName[md5Key(*p)]; ok {
// attach the file list
metadata.Files = parseDpkgMD5Info(md5Reader)
// keep a record of the file where this was discovered
if ref, ok := md5RefsByName[md5Key(*p)]; ok {
p.Locations = append(p.Locations, ref)
}
} else {
// ensure the file list is an empty collection (not nil)
metadata.Files = make([]pkg.DpkgFileRecord, 0)
}
// persist alterations
p.Metadata = metadata
copyrightReader, ok := copyrightContentsByName[p.Name]
if ok {
// attach the licenses
p.Licenses = parseLicensesFromCopyright(copyrightReader)
// keep a record of the file where this was discovered
if ref, ok := copyrightLocationByName[p.Name]; ok {
p.Locations = append(p.Locations, ref)
}
}
}
results = append(results, pkgs...)
}
return results, nil
}
func fetchMd5Contents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) {
// fetch all MD5 file contents. This approach is more efficient than fetching each MD5 file one at a time
var md5FileMatches []source.Location
var nameByRef = make(map[source.Location]string)
parentPath := filepath.Dir(dbLocation.RealPath)
for _, p := range pkgs {
// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
name := md5Key(p)
md5SumLocation := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt))
if md5SumLocation == nil {
// the most specific key did not work, fallback to just the name
// look for /var/lib/dpkg/info/NAME.md5sums
md5SumLocation = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+md5sumsExt))
}
// we should have at least one reference
if md5SumLocation != nil {
md5FileMatches = append(md5FileMatches, *md5SumLocation)
nameByRef[*md5SumLocation] = name
}
}
// fetch the md5 contents
md5ContentsByLocation, err := resolver.MultipleFileContentsByLocation(md5FileMatches)
if err != nil {
return nil, nil, err
}
// organize content results and refs by a combination of name and architecture
var contentsByName = make(map[string]io.Reader)
var locationByName = make(map[string]source.Location)
for location, contents := range md5ContentsByLocation {
name := nameByRef[location]
contentsByName[name] = contents
locationByName[name] = location
}
return contentsByName, locationByName, nil
}
func fetchCopyrightContents(resolver source.Resolver, dbLocation source.Location, pkgs []pkg.Package) (map[string]io.Reader, map[string]source.Location, error) {
// fetch all copyright file contents. This approach is more efficient than fetching each copyright file one at a time
var copyrightFileMatches []source.Location
var nameByLocation = make(map[source.Location]string)
for _, p := range pkgs {
// look for /usr/share/docs/NAME/copyright files
name := p.Name
copyrightPath := path.Join(docsPath, name, "copyright")
copyrightLocation := resolver.RelativeFileByPath(dbLocation, copyrightPath)
// we may not have a copyright file for each package, ignore missing files
if copyrightLocation != nil {
copyrightFileMatches = append(copyrightFileMatches, *copyrightLocation)
nameByLocation[*copyrightLocation] = name
}
}
// fetch the copyright contents
copyrightContentsByLocation, err := resolver.MultipleFileContentsByLocation(copyrightFileMatches)
if err != nil {
return nil, nil, err
}
// organize content results and refs by package name
var contentsByName = make(map[string]io.Reader)
var refsByName = make(map[string]source.Location)
for location, contents := range copyrightContentsByLocation {
name := nameByLocation[location]
contentsByName[name] = contents
refsByName[name] = location
}
return contentsByName, refsByName, nil
}
func md5Key(p pkg.Package) string {
metadata := p.Metadata.(pkg.DpkgMetadata)
contentKey := p.Name
if metadata.Architecture != "" && metadata.Architecture != "all" {
contentKey = contentKey + ":" + metadata.Architecture
}
return contentKey
}

View File

@ -1,5 +0,0 @@
#Generated by Maven
#Tue Jul 07 18:59:56 GMT 2020
groupId:org.anchore
artifactId: example-java=app-maven
version: 0.1.0=something

View File

@ -1,5 +0,0 @@
#Generated by Maven
#Tue Jul 07 18:59:56 GMT 2020
groupId:org.anchore
artifactId: example-java-app-maven
version: 0.1.0

View File

@ -1,5 +0,0 @@
#Generated by Maven
#Tue Jul 07 18:59:56 GMT 2020
groupId=org.anchore
artifactId= example-java:app-maven
version= 0.1.0:something

View File

@ -1,49 +0,0 @@
package python
import (
"path/filepath"
"github.com/anchore/syft/syft/source"
)
type packageEntry struct {
Metadata source.FileData
FileRecord *source.FileData
TopPackage *source.FileData
}
// newPackageEntry returns a new packageEntry to be processed relative to what information is available in the given FileResolver.
func newPackageEntry(resolver source.FileResolver, metadataLocation source.Location) *packageEntry {
// we've been given a file reference to a specific wheel METADATA file. note: this may be for a directory
// or for an image... for an image the METADATA file may be present within multiple layers, so it is important
// to reconcile the RECORD path to the same layer (or a lower layer). The same is true with the top_level.txt file.
// lets find the RECORD file relative to the directory where the METADATA file resides (in path AND layer structure)
recordPath := filepath.Join(filepath.Dir(metadataLocation.RealPath), "RECORD")
recordLocation := resolver.RelativeFileByPath(metadataLocation, recordPath)
// a top_level.txt file specifies the python top-level packages (provided by this python package) installed into site-packages
parentDir := filepath.Dir(metadataLocation.RealPath)
topLevelPath := filepath.Join(parentDir, "top_level.txt")
topLevelLocation := resolver.RelativeFileByPath(metadataLocation, topLevelPath)
// build an entry that will later be populated with contents when the request is executed
entry := &packageEntry{
Metadata: source.FileData{
Location: metadataLocation,
},
}
if recordLocation != nil {
entry.FileRecord = &source.FileData{
Location: *recordLocation,
}
}
if topLevelLocation != nil {
entry.TopPackage = &source.FileData{
Location: *topLevelLocation,
}
}
return entry
}

View File

@ -4,8 +4,8 @@ Package apkdb provides a concrete Cataloger implementation for Alpine DB files.
package apkdb package apkdb
import ( import (
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// NewApkdbCataloger returns a new Alpine DB cataloger object. // NewApkdbCataloger returns a new Alpine DB cataloger object.

View File

@ -9,8 +9,8 @@ import (
"strings" "strings"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
) )

View File

@ -18,13 +18,13 @@ type Monitor struct {
PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers PackagesDiscovered progress.Monitorable // the number of packages discovered from all registered catalogers
} }
// newMonitor creates a new Monitor object and publishes the object on the bus as a CatalogerStarted event. // newMonitor creates a new Monitor object and publishes the object on the bus as a PackageCatalogerStarted event.
func newMonitor() (*progress.Manual, *progress.Manual) { func newMonitor() (*progress.Manual, *progress.Manual) {
filesProcessed := progress.Manual{} filesProcessed := progress.Manual{}
packagesDiscovered := progress.Manual{} packagesDiscovered := progress.Manual{}
bus.Publish(partybus.Event{ bus.Publish(partybus.Event{
Type: event.CatalogerStarted, Type: event.PackageCatalogerStarted,
Value: Monitor{ Value: Monitor{
FilesProcessed: progress.Monitorable(&filesProcessed), FilesProcessed: progress.Monitorable(&filesProcessed),
PackagesDiscovered: progress.Monitorable(&packagesDiscovered), PackagesDiscovered: progress.Monitorable(&packagesDiscovered),
@ -37,7 +37,7 @@ func newMonitor() (*progress.Manual, *progress.Manual) {
// In order to efficiently retrieve contents from a underlying container image the content fetch requests are // In order to efficiently retrieve contents from a underlying container image the content fetch requests are
// done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single // done in bulk. Specifically, all files of interest are collected from each catalogers and accumulated into a single
// request. // request.
func Catalog(resolver source.Resolver, theDistro *distro.Distro, catalogers ...Cataloger) (*pkg.Catalog, error) { func Catalog(resolver source.FileResolver, theDistro *distro.Distro, catalogers ...Cataloger) (*pkg.Catalog, error) {
catalog := pkg.NewCatalog() catalog := pkg.NewCatalog()
filesProcessed, packagesDiscovered := newMonitor() filesProcessed, packagesDiscovered := newMonitor()

View File

@ -6,16 +6,16 @@ catalogers defined in child packages as well as the interface definition to impl
package cataloger package cataloger
import ( import (
"github.com/anchore/syft/syft/cataloger/apkdb"
"github.com/anchore/syft/syft/cataloger/deb"
"github.com/anchore/syft/syft/cataloger/golang"
"github.com/anchore/syft/syft/cataloger/java"
"github.com/anchore/syft/syft/cataloger/javascript"
"github.com/anchore/syft/syft/cataloger/python"
"github.com/anchore/syft/syft/cataloger/rpmdb"
"github.com/anchore/syft/syft/cataloger/ruby"
"github.com/anchore/syft/syft/cataloger/rust"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/apkdb"
"github.com/anchore/syft/syft/pkg/cataloger/deb"
"github.com/anchore/syft/syft/pkg/cataloger/golang"
"github.com/anchore/syft/syft/pkg/cataloger/java"
"github.com/anchore/syft/syft/pkg/cataloger/javascript"
"github.com/anchore/syft/syft/pkg/cataloger/python"
"github.com/anchore/syft/syft/pkg/cataloger/rpmdb"
"github.com/anchore/syft/syft/pkg/cataloger/ruby"
"github.com/anchore/syft/syft/pkg/cataloger/rust"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
) )
@ -26,7 +26,7 @@ type Cataloger interface {
// Name returns a string that uniquely describes a cataloger // Name returns a string that uniquely describes a cataloger
Name() string Name() string
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
Catalog(resolver source.Resolver) ([]pkg.Package, error) Catalog(resolver source.FileResolver) ([]pkg.Package, error)
} }
// ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages. // ImageCatalogers returns a slice of locally implemented catalogers that are fit for detecting installations of packages.

View File

@ -4,7 +4,7 @@ Package common provides generic utilities used by multiple catalogers.
package common package common
import ( import (
"io" "fmt"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -16,8 +16,6 @@ import (
type GenericCataloger struct { type GenericCataloger struct {
globParsers map[string]ParserFn globParsers map[string]ParserFn
pathParsers map[string]ParserFn pathParsers map[string]ParserFn
selectedFiles []source.Location
parsers map[source.Location]ParserFn
upstreamCataloger string upstreamCataloger string
} }
@ -26,8 +24,6 @@ func NewGenericCataloger(pathParsers map[string]ParserFn, globParsers map[string
return &GenericCataloger{ return &GenericCataloger{
globParsers: globParsers, globParsers: globParsers,
pathParsers: pathParsers, pathParsers: pathParsers,
selectedFiles: make([]source.Location, 0),
parsers: make(map[source.Location]ParserFn),
upstreamCataloger: upstreamCataloger, upstreamCataloger: upstreamCataloger,
} }
} }
@ -37,74 +33,22 @@ func (c *GenericCataloger) Name() string {
return c.upstreamCataloger return c.upstreamCataloger
} }
// register pairs a set of file references with a parser function for future cataloging (when the file contents are resolved)
func (c *GenericCataloger) register(files []source.Location, parser ParserFn) {
c.selectedFiles = append(c.selectedFiles, files...)
for _, f := range files {
c.parsers[f] = parser
}
}
// clear deletes all registered file-reference-to-parser-function pairings from former SelectFiles() and register() calls
func (c *GenericCataloger) clear() {
c.selectedFiles = make([]source.Location, 0)
c.parsers = make(map[source.Location]ParserFn)
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source. // Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing the catalog source.
func (c *GenericCataloger) Catalog(resolver source.Resolver) ([]pkg.Package, error) { func (c *GenericCataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, error) {
fileSelection := c.selectFiles(resolver) var packages []pkg.Package
contents, err := resolver.MultipleFileContentsByLocation(fileSelection) parserByLocation := c.selectFiles(resolver)
for location, parser := range parserByLocation {
content, err := resolver.FileContentsByLocation(location)
if err != nil { if err != nil {
return nil, err // TODO: fail or log?
} return nil, fmt.Errorf("unable to fetch contents for location=%v : %w", location, err)
return c.catalog(contents)
}
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
func (c *GenericCataloger) selectFiles(resolver source.FileResolver) []source.Location {
// select by exact path
for path, parser := range c.pathParsers {
files, err := resolver.FilesByPath(path)
if err != nil {
log.Warnf("cataloger failed to select files by path: %+v", err)
}
if files != nil {
c.register(files, parser)
}
}
// select by glob pattern
for globPattern, parser := range c.globParsers {
fileMatches, err := resolver.FilesByGlob(globPattern)
if err != nil {
log.Warnf("failed to find files by glob: %s", globPattern)
}
if fileMatches != nil {
c.register(fileMatches, parser)
}
}
return c.selectedFiles
}
// catalog takes a set of file contents and uses any configured parser functions to resolve and return discovered packages
func (c *GenericCataloger) catalog(contents map[source.Location]io.ReadCloser) ([]pkg.Package, error) {
defer c.clear()
packages := make([]pkg.Package, 0)
for location, parser := range c.parsers {
content, ok := contents[location]
if !ok {
log.Warnf("cataloger '%s' missing file content: %+v", c.upstreamCataloger, location)
continue
} }
entries, err := parser(location.RealPath, content) entries, err := parser(location.RealPath, content)
if err != nil { if err != nil {
// TODO: should we fail? or only log? // TODO: should we fail? or only log?
log.Warnf("cataloger '%s' failed to parse entries (%+v): %+v", c.upstreamCataloger, location, err) log.Warnf("cataloger '%s' failed to parse entries (location=%+v): %+v", c.upstreamCataloger, location, err)
continue continue
} }
@ -115,6 +59,34 @@ func (c *GenericCataloger) catalog(contents map[source.Location]io.ReadCloser) (
packages = append(packages, entry) packages = append(packages, entry)
} }
} }
return packages, nil return packages, nil
} }
// SelectFiles takes a set of file trees and resolves and file references of interest for future cataloging
func (c *GenericCataloger) selectFiles(resolver source.FilePathResolver) map[source.Location]ParserFn {
var parserByLocation = make(map[source.Location]ParserFn)
// select by exact path
for path, parser := range c.pathParsers {
files, err := resolver.FilesByPath(path)
if err != nil {
log.Warnf("cataloger failed to select files by path: %+v", err)
}
for _, f := range files {
parserByLocation[f] = parser
}
}
// select by glob pattern
for globPattern, parser := range c.globParsers {
fileMatches, err := resolver.FilesByGlob(globPattern)
if err != nil {
log.Warnf("failed to find files by glob: %s", globPattern)
}
for _, f := range fileMatches {
parserByLocation[f] = parser
}
}
return parserByLocation
}

View File

@ -0,0 +1,73 @@
package common
import (
"fmt"
"io"
"io/ioutil"
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
func parser(_ string, reader io.Reader) ([]pkg.Package, error) {
contents, err := ioutil.ReadAll(reader)
if err != nil {
panic(err)
}
return []pkg.Package{
{
Name: string(contents),
},
}, nil
}
func TestGenericCataloger(t *testing.T) {
globParsers := map[string]ParserFn{
"**a-path.txt": parser,
}
pathParsers := map[string]ParserFn{
"test-fixtures/another-path.txt": parser,
"test-fixtures/last/path.txt": parser,
}
upstream := "some-other-cataloger"
expectedSelection := []string{"test-fixtures/last/path.txt", "test-fixtures/another-path.txt", "test-fixtures/a-path.txt"}
resolver := source.NewMockResolverForPaths(expectedSelection...)
cataloger := NewGenericCataloger(pathParsers, globParsers, upstream)
expectedPkgs := make(map[string]pkg.Package)
for _, path := range expectedSelection {
expectedPkgs[path] = pkg.Package{
FoundBy: upstream,
Name: fmt.Sprintf("%s file contents!", path),
}
}
actualPkgs, err := cataloger.Catalog(resolver)
if err != nil {
t.Fatalf("cataloger catalog action failed: %+v", err)
}
if len(actualPkgs) != len(expectedPkgs) {
t.Fatalf("unexpected packages len: %d", len(actualPkgs))
}
for _, p := range actualPkgs {
ref := p.Locations[0]
exP, ok := expectedPkgs[ref.RealPath]
if !ok {
t.Errorf("missing expected pkg: ref=%+v", ref)
continue
}
if p.FoundBy != exP.FoundBy {
t.Errorf("bad upstream: %s", p.FoundBy)
}
if exP.Name != p.Name {
t.Errorf("bad contents mapping: %+v", p.Locations)
}
}
}

View File

@ -0,0 +1 @@
test-fixtures/a-path.txt file contents!

View File

@ -0,0 +1 @@
test-fixtures/another-path.txt file contents!

View File

@ -0,0 +1 @@
test-fixtures/last/path.txt file contents!

View File

@ -0,0 +1,156 @@
/*
Package dpkg provides a concrete Cataloger implementation for Debian package DB status files.
*/
package deb
import (
"fmt"
"io"
"path"
"path/filepath"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
)
const (
md5sumsExt = ".md5sums"
docsPath = "/usr/share/doc"
)
type Cataloger struct{}
// NewDpkgdbCataloger returns a new Deb package cataloger object.
func NewDpkgdbCataloger() *Cataloger {
return &Cataloger{}
}
// Name returns a string that uniquely describes a cataloger
func (c *Cataloger) Name() string {
return "dpkgdb-cataloger"
}
// Catalog is given an object to resolve file references and content, this function returns any discovered Packages after analyzing dpkg support files.
// nolint:funlen
func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, error) {
dbFileMatches, err := resolver.FilesByGlob(pkg.DpkgDbGlob)
if err != nil {
return nil, fmt.Errorf("failed to find dpkg status files's by glob: %w", err)
}
var results []pkg.Package
var pkgs []pkg.Package
for _, dbLocation := range dbFileMatches {
dbContents, err := resolver.FileContentsByLocation(dbLocation)
if err != nil {
return nil, err
}
pkgs, err = parseDpkgStatus(dbContents)
if err != nil {
return nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err)
}
for i := range pkgs {
p := &pkgs[i]
p.FoundBy = c.Name()
p.Locations = []source.Location{dbLocation}
metadata := p.Metadata.(pkg.DpkgMetadata)
md5Reader, md5Location, err := fetchMd5Contents(resolver, dbLocation, p)
if err != nil {
return nil, fmt.Errorf("unable to find dpkg md5 contents: %w", err)
}
if md5Reader != nil {
// attach the file list
metadata.Files = parseDpkgMD5Info(md5Reader)
// keep a record of the file where this was discovered
if md5Location != nil {
p.Locations = append(p.Locations, *md5Location)
}
} else {
// ensure the file list is an empty collection (not nil)
metadata.Files = make([]pkg.DpkgFileRecord, 0)
}
// persist alterations
p.Metadata = metadata
// get license information from the copyright file
copyrightReader, copyrightLocation, err := fetchCopyrightContents(resolver, dbLocation, p)
if err != nil {
return nil, fmt.Errorf("unable to find dpkg copyright contents: %w", err)
}
if copyrightReader != nil {
// attach the licenses
p.Licenses = parseLicensesFromCopyright(copyrightReader)
// keep a record of the file where this was discovered
if copyrightLocation != nil {
p.Locations = append(p.Locations, *copyrightLocation)
}
}
}
results = append(results, pkgs...)
}
return results, nil
}
func fetchMd5Contents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.Reader, *source.Location, error) {
parentPath := filepath.Dir(dbLocation.RealPath)
// look for /var/lib/dpkg/info/NAME:ARCH.md5sums
name := md5Key(p)
md5SumLocation := resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", name+md5sumsExt))
if md5SumLocation == nil {
// the most specific key did not work, fallback to just the name
// look for /var/lib/dpkg/info/NAME.md5sums
md5SumLocation = resolver.RelativeFileByPath(dbLocation, path.Join(parentPath, "info", p.Name+md5sumsExt))
}
// this is unexpected, but not a show-stopper
if md5SumLocation == nil {
return nil, nil, nil
}
reader, err := resolver.FileContentsByLocation(*md5SumLocation)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch deb md5 contents (%+v): %w", p, err)
}
return reader, md5SumLocation, nil
}
func fetchCopyrightContents(resolver source.FileResolver, dbLocation source.Location, p *pkg.Package) (io.Reader, *source.Location, error) {
// look for /usr/share/docs/NAME/copyright files
name := p.Name
copyrightPath := path.Join(docsPath, name, "copyright")
copyrightLocation := resolver.RelativeFileByPath(dbLocation, copyrightPath)
// we may not have a copyright file for each package, ignore missing files
if copyrightLocation == nil {
return nil, nil, nil
}
reader, err := resolver.FileContentsByLocation(*copyrightLocation)
if err != nil {
return nil, nil, fmt.Errorf("failed to fetch deb copyright contents (%+v): %w", p, err)
}
return reader, copyrightLocation, nil
}
func md5Key(p *pkg.Package) string {
metadata := p.Metadata.(pkg.DpkgMetadata)
contentKey := p.Name
if metadata.Architecture != "" && metadata.Architecture != "all" {
contentKey = contentKey + ":" + metadata.Architecture
}
return contentKey
}

View File

@ -51,17 +51,21 @@ func TestDpkgCataloger(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
img, cleanup := imagetest.GetFixtureImage(t, "docker-archive", "image-dpkg") img := imagetest.GetFixtureImage(t, "docker-archive", "image-dpkg")
defer cleanup()
s, err := source.NewFromImage(img, source.SquashedScope, "") s, err := source.NewFromImage(img, "")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
c := NewDpkgdbCataloger() c := NewDpkgdbCataloger()
actual, err := c.Catalog(s.Resolver) resolver, err := s.FileResolver(source.SquashedScope)
if err != nil {
t.Errorf("could not get resolver error: %+v", err)
}
actual, err := c.Catalog(resolver)
if err != nil { if err != nil {
t.Fatalf("failed to catalog: %+v", err) t.Fatalf("failed to catalog: %+v", err)
} }

View File

@ -4,7 +4,7 @@ Package golang provides a concrete Cataloger implementation for go.mod files.
package golang package golang
import ( import (
"github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// NewGoModCataloger returns a new Go module cataloger object. // NewGoModCataloger returns a new Go module cataloger object.

View File

@ -9,8 +9,8 @@ import (
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/file" "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// integrity check // integrity check

View File

@ -4,7 +4,7 @@ Package java provides a concrete Cataloger implementation for Java archives (jar
package java package java
import ( import (
"github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// NewJavaCataloger returns a new Java archive cataloger object. // NewJavaCataloger returns a new Java archive cataloger object.

View File

@ -1,4 +1,4 @@
/packages/* /packages/sb
*.fingerprint *.fingerprint
# maven when running in a volume may spit out directories like this # maven when running in a volume may spit out directories like this
**/\?/ **/\?/

View File

@ -4,7 +4,7 @@ Package javascript provides a concrete Cataloger implementation for JavaScript e
package javascript package javascript
import ( import (
"github.com/anchore/syft/syft/cataloger/common" "github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// NewJavascriptPackageCataloger returns a new JavaScript cataloger object based on detection of npm based packages. // NewJavascriptPackageCataloger returns a new JavaScript cataloger object based on detection of npm based packages.

View File

@ -13,8 +13,8 @@ import (
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// integrity check // integrity check
@ -161,7 +161,7 @@ func licensesFromJSON(p PackageJSON) ([]string, error) {
return nil, fmt.Errorf("unable to parse license field: %w", err) return nil, fmt.Errorf("unable to parse license field: %w", err)
} }
// parsePackageJson parses a package.json and returns the discovered JavaScript packages. // parsePackageJSON parses a package.json and returns the discovered JavaScript packages.
func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) { func parsePackageJSON(_ string, reader io.Reader) ([]pkg.Package, error) {
packages := make([]pkg.Package, 0) packages := make([]pkg.Package, 0)
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)

View File

@ -5,8 +5,8 @@ import (
"fmt" "fmt"
"io" "io"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// integrity check // integrity check

View File

@ -8,8 +8,8 @@ import (
"strings" "strings"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/cataloger/common"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/common"
) )
// integrity check // integrity check

Some files were not shown because too many files have changed in this diff Show More