syft/syft/pkg/cataloger/arch/parse_alpm_db.go
William Murphy 3713d97b7b
chore: use ruleguard to test for missing defer statements (#2837)
* chore: ruleguard to enforce defer use

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* fix go.mod location

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close in linux release identifier

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: better lint suggestion

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: refactor binary classifier to defer close

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in gentoo cataloger

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: make go license parsing defer close readers

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer closing readers in alpine apm parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in graalvm parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in debian package parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in alpm parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in executable file cataloger

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in javascript license parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

* chore: defer close readers in go mod parser

Signed-off-by: Will Murphy <will.murphy@anchore.com>

---------

Signed-off-by: Will Murphy <will.murphy@anchore.com>
2024-05-07 05:42:29 -04:00

263 lines
6.5 KiB
Go

package arch
import (
"bufio"
"compress/gzip"
"context"
"fmt"
"io"
"path/filepath"
"strconv"
"strings"
"time"
"github.com/mitchellh/mapstructure"
"github.com/vbatts/go-mtree"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
var _ generic.Parser = parseAlpmDB
var (
ignoredFiles = map[string]bool{
"/set": true,
".BUILDINFO": true,
".PKGINFO": true,
"": true,
}
)
type parsedData struct {
Licenses string `mapstructure:"license"`
pkg.AlpmDBEntry `mapstructure:",squash"`
}
// parseAlpmDB parses the arch linux pacman database flat-files and returns the packages and relationships found within.
func parseAlpmDB(_ context.Context, resolver file.Resolver, env *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
data, err := parseAlpmDBEntry(reader)
if err != nil {
return nil, nil, err
}
base := filepath.Dir(reader.RealPath)
r, err := getFileReader(filepath.Join(base, "mtree"), resolver)
if err != nil {
return nil, nil, err
}
pkgFiles, err := parseMtree(r)
if err != nil {
return nil, nil, err
}
// replace the files found the pacman database with the files from the mtree These contain more metadata and
// thus more useful.
// TODO: probably want to use MTREE and PKGINFO here
data.Files = pkgFiles
// We only really do this to get any backup database entries from the files database
files := filepath.Join(base, "files")
_, err = getFileReader(files, resolver)
if err != nil {
return nil, nil, err
}
filesMetadata, err := parseAlpmDBEntry(reader)
if err != nil {
return nil, nil, err
} else if filesMetadata != nil {
data.Backup = filesMetadata.Backup
}
if data.Package == "" {
return nil, nil, nil
}
return []pkg.Package{
newPackage(
data,
env.LinuxRelease,
reader.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
}, nil, nil
}
func parseAlpmDBEntry(reader io.Reader) (*parsedData, error) {
scanner := newScanner(reader)
metadata, err := parseDatabase(scanner)
if err != nil {
return nil, err
}
return metadata, nil
}
func newScanner(reader io.Reader) *bufio.Scanner {
// This is taken from the apk parser
// https://github.com/anchore/syft/blob/v0.47.0/syft/pkg/cataloger/apkdb/parse_apk_db.go#L37
const maxScannerCapacity = 1024 * 1024
bufScan := make([]byte, maxScannerCapacity)
scanner := bufio.NewScanner(reader)
scanner.Buffer(bufScan, maxScannerCapacity)
onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
for i := 0; i < len(data); i++ {
if i > 0 && data[i-1] == '\n' && data[i] == '\n' {
return i + 1, data[:i-1], nil
}
}
if !atEOF {
return 0, nil, nil
}
// deliver the last token (which could be an empty string)
return 0, data, bufio.ErrFinalToken
}
scanner.Split(onDoubleLF)
return scanner
}
func getFileReader(path string, resolver file.Resolver) (io.Reader, error) {
locs, err := resolver.FilesByPath(path)
if err != nil {
return nil, err
}
if len(locs) == 0 {
return nil, fmt.Errorf("could not find file: %s", path)
}
// TODO: Should we maybe check if we found the file
dbContentReader, err := resolver.FileContentsByLocation(locs[0])
if err != nil {
return nil, err
}
defer internal.CloseAndLogError(dbContentReader, locs[0].RealPath)
return dbContentReader, nil
}
func parseDatabase(b *bufio.Scanner) (*parsedData, error) {
var err error
pkgFields := make(map[string]interface{})
for b.Scan() {
fields := strings.SplitN(b.Text(), "\n", 2)
// End of File
if len(fields) == 1 {
break
}
// The alpm database surrounds the keys with %.
key := strings.ReplaceAll(fields[0], "%", "")
key = strings.ToLower(key)
value := strings.TrimSpace(fields[1])
switch key {
case "files":
var files []map[string]string
for _, f := range strings.Split(value, "\n") {
path := fmt.Sprintf("/%s", f)
if ok := ignoredFiles[path]; !ok {
files = append(files, map[string]string{"path": path})
}
}
pkgFields[key] = files
case "backup":
var backup []map[string]interface{}
for _, f := range strings.Split(value, "\n") {
fields := strings.SplitN(f, "\t", 2)
path := fmt.Sprintf("/%s", fields[0])
if ok := ignoredFiles[path]; !ok {
backup = append(backup, map[string]interface{}{
"path": path,
"digests": []file.Digest{{
Algorithm: "md5",
Value: fields[1],
}}})
}
}
pkgFields[key] = backup
case "reason":
fallthrough
case "size":
pkgFields[key], err = strconv.ParseInt(value, 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to parse %s to integer", value)
}
default:
pkgFields[key] = value
}
}
return parsePkgFiles(pkgFields)
}
func parsePkgFiles(pkgFields map[string]interface{}) (*parsedData, error) {
var entry parsedData
if err := mapstructure.Decode(pkgFields, &entry); err != nil {
return nil, fmt.Errorf("unable to parse ALPM metadata: %w", err)
}
if entry.Backup == nil {
entry.Backup = make([]pkg.AlpmFileRecord, 0)
}
if entry.Package == "" && len(entry.Files) == 0 && len(entry.Backup) == 0 {
return nil, nil
}
return &entry, nil
}
func parseMtree(r io.Reader) ([]pkg.AlpmFileRecord, error) {
var err error
var entries []pkg.AlpmFileRecord
r, err = gzip.NewReader(r)
if err != nil {
return nil, err
}
specDh, err := mtree.ParseSpec(r)
if err != nil {
return nil, err
}
for _, f := range specDh.Entries {
var entry pkg.AlpmFileRecord
entry.Digests = make([]file.Digest, 0)
fileFields := make(map[string]interface{})
if ok := ignoredFiles[f.Name]; ok {
continue
}
path := fmt.Sprintf("/%s", f.Name)
fileFields["path"] = path
for _, kv := range f.Keywords {
kw := string(kv.Keyword())
switch kw {
case "time":
// All unix timestamps have a .0 suffixs.
v := strings.Split(kv.Value(), ".")
i, _ := strconv.ParseInt(v[0], 10, 64)
tm := time.Unix(i, 0)
fileFields[kw] = tm
case "sha256digest":
entry.Digests = append(entry.Digests, file.Digest{
Algorithm: "sha256",
Value: kv.Value(),
})
case "md5digest":
entry.Digests = append(entry.Digests, file.Digest{
Algorithm: "md5",
Value: kv.Value(),
})
default:
fileFields[kw] = kv.Value()
}
}
if err := mapstructure.Decode(fileFields, &entry); err != nil {
return nil, fmt.Errorf("unable to parse ALPM mtree data: %w", err)
}
entries = append(entries, entry)
}
return entries, nil
}