Fix parsing of apk databases with large entries (#1365)

Closes https://github.com/anchore/syft/issues/1354
This commit is contained in:
Dan Luhring 2022-11-29 10:16:36 -05:00 committed by GitHub
parent bd523bdb5d
commit f6996f7b9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 172084 additions and 151 deletions

View File

@ -3,13 +3,10 @@ package apkdb
import ( import (
"bufio" "bufio"
"fmt" "fmt"
"io"
"path" "path"
"strconv" "strconv"
"strings" "strings"
"github.com/mitchellh/mapstructure"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
@ -22,28 +19,73 @@ import (
// integrity check // integrity check
var _ generic.Parser = parseApkDB var _ generic.Parser = parseApkDB
// parseApkDb parses individual packages from a given Alpine DB file. For more information on specific fields // parseApkDB parses packages from a given APK installed DB file. For more
// see https://wiki.alpinelinux.org/wiki/Apk_spec . // information on specific fields, see https://wiki.alpinelinux.org/wiki/Apk_spec.
//
//nolint:funlen
func parseApkDB(_ source.FileResolver, env *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func parseApkDB(_ source.FileResolver, env *generic.Environment, reader source.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// larger capacity for the scanner.
const maxScannerCapacity = 1024 * 1024
// a new larger buffer for the scanner
bufScan := make([]byte, maxScannerCapacity)
pkgs := make([]pkg.Package, 0)
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
scanner.Buffer(bufScan, maxScannerCapacity)
onDoubleLF := func(data []byte, atEOF bool) (advance int, token []byte, err error) { var apks []pkg.ApkMetadata
for i := 0; i < len(data); i++ { var currentEntry pkg.ApkMetadata
if i > 0 && data[i-1] == '\n' && data[i] == '\n' { entryParsingInProgress := false
return i + 1, data[:i-1], nil fileParsingCtx := newApkFileParsingContext()
// creating a dedicated append-like function here instead of using `append(...)`
// below since there is nontrivial logic to be performed for each finalized apk
// entry.
appendApk := func(p pkg.ApkMetadata) {
if files := fileParsingCtx.files; len(files) >= 1 {
// attached accumulated files to current package
p.Files = files
// reset file parsing for next use
fileParsingCtx = newApkFileParsingContext()
} }
nilFieldsToEmptySlice(&p)
apks = append(apks, p)
} }
if !atEOF {
return 0, nil, nil for scanner.Scan() {
line := scanner.Text()
if line == "" {
// i.e. apk entry separator
if entryParsingInProgress {
// current entry is complete
appendApk(currentEntry)
} }
// deliver the last token (which could be an empty string)
return 0, data, bufio.ErrFinalToken entryParsingInProgress = false
// zero-out currentEntry for use by any future entry
currentEntry = pkg.ApkMetadata{}
continue
}
field := parseApkField(line)
if field == nil {
log.Warnf("unable to parse field data from line %q", line)
continue
}
entryParsingInProgress = true
field.apply(&currentEntry, fileParsingCtx)
}
if entryParsingInProgress {
// There was no final empty line, so currentEntry hasn't been added to the
// collection yet; but we've now reached the end of scanning, so let's be sure to
// add currentEntry to the collection.
appendApk(currentEntry)
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("failed to parse APK installed DB file: %w", err)
} }
var r *linux.Release var r *linux.Release
@ -51,128 +93,192 @@ func parseApkDB(_ source.FileResolver, env *generic.Environment, reader source.L
r = env.LinuxRelease r = env.LinuxRelease
} }
scanner.Split(onDoubleLF) pkgs := make([]pkg.Package, 0, len(apks))
for scanner.Scan() { for _, apk := range apks {
metadata, err := parseApkDBEntry(strings.NewReader(scanner.Text())) pkgs = append(pkgs, newPackage(apk, r, reader.Location))
if err != nil {
return nil, nil, err
}
if metadata != nil {
pkgs = append(pkgs, newPackage(*metadata, r, reader.Location))
}
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("failed to parse APK DB file: %w", err)
} }
return pkgs, discoverPackageDependencies(pkgs), nil return pkgs, discoverPackageDependencies(pkgs), nil
} }
// parseApkDBEntry reads and parses a single pkg.ApkMetadata element from the stream, returning nil if their are no more entries. func parseApkField(line string) *apkField {
// parts := strings.SplitN(line, ":", 2)
if len(parts) != 2 {
return nil
}
f := apkField{
name: parts[0],
value: parts[1],
}
return &f
}
type apkField struct {
name string
value string
}
//nolint:funlen //nolint:funlen
func parseApkDBEntry(reader io.Reader) (*pkg.ApkMetadata, error) { func (f apkField) apply(p *pkg.ApkMetadata, ctx *apkFileParsingContext) {
var entry pkg.ApkMetadata switch f.name {
pkgFields := make(map[string]interface{}) // APKINDEX field parsing
// We want sane defaults for collections, i.e. an empty array instead of null. case "P":
pkgFields["D"] = []string{} p.Package = f.value
pkgFields["p"] = []string{} case "o":
files := make([]pkg.ApkFileRecord, 0) p.OriginPackage = f.value
case "m":
var fileRecord *pkg.ApkFileRecord p.Maintainer = f.value
lastFile := "/" case "V":
p.Version = f.value
scanner := bufio.NewScanner(reader) case "L":
for scanner.Scan() { p.License = f.value
line := scanner.Text() case "A":
fields := strings.SplitN(line, ":", 2) p.Architecture = f.value
if len(fields) != 2 { case "U":
continue p.URL = f.value
case "T":
p.Description = f.value
case "S":
i, err := strconv.Atoi(f.value)
if err != nil {
log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
return
} }
key := fields[0] p.Size = i
value := strings.TrimSpace(fields[1]) case "I":
i, err := strconv.Atoi(f.value)
if err != nil {
log.Warnf("unable to parse value %q for field %q: %w", f.value, f.name, err)
return
}
p.InstalledSize = i
case "D":
deps := parseListValue(f.value)
p.Dependencies = deps
case "p":
provides := parseListValue(f.value)
p.Provides = provides
case "C":
p.Checksum = f.value
case "c":
p.GitCommit = f.value
// File/directory field parsing:
switch key {
case "D", "p":
entries := strings.Split(value, " ")
pkgFields[key] = entries
case "F": case "F":
currentFile := "/" + value directory := path.Join("/", f.value)
newFileRecord := pkg.ApkFileRecord{ ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: directory})
Path: currentFile, ctx.indexOfLatestDirectory = len(ctx.files) - 1
case "M":
i := ctx.indexOfLatestDirectory
latest := ctx.files[i]
var ok bool
latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
if !ok {
log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
return
} }
files = append(files, newFileRecord)
fileRecord = &files[len(files)-1]
// future aux references are relative to previous "F" records // save updated directory
lastFile = currentFile ctx.files[i] = latest
continue
case "R": case "R":
newFileRecord := pkg.ApkFileRecord{ var regularFile string
Path: path.Join(lastFile, value),
dirIndex := ctx.indexOfLatestDirectory
if dirIndex < 0 {
regularFile = path.Join("/", f.value)
} else {
latestDirPath := ctx.files[dirIndex].Path
regularFile = path.Join(latestDirPath, f.value)
} }
files = append(files, newFileRecord)
fileRecord = &files[len(files)-1] ctx.files = append(ctx.files, pkg.ApkFileRecord{Path: regularFile})
case "a", "M": ctx.indexOfLatestRegularFile = len(ctx.files) - 1
ownershipFields := strings.Split(value, ":") case "a":
if len(ownershipFields) < 3 { i := ctx.indexOfLatestRegularFile
log.Warnf("unexpected APK ownership field: %q", value) latest := ctx.files[i]
continue
var ok bool
latest.OwnerUID, latest.OwnerGID, latest.Permissions, ok = processFileInfo(f.value)
if !ok {
log.Warnf("unexpected value for APK ACL field %q: %q", f.name, f.value)
return
} }
if fileRecord == nil {
log.Warnf("ownership field with no parent record: %q", value) // save updated file
continue ctx.files[i] = latest
}
fileRecord.OwnerUID = ownershipFields[0]
fileRecord.OwnerGID = ownershipFields[1]
fileRecord.Permissions = ownershipFields[2]
// note: there are more optional fields available that we are not capturing, e.g.:
// "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
case "Z": case "Z":
if fileRecord == nil { i := ctx.indexOfLatestRegularFile
log.Warnf("checksum field with no parent record: %q", value) latest := ctx.files[i]
continue latest.Digest = processChecksum(f.value)
}
fileRecord.Digest = processChecksum(value) // save updated file
case "I", "S": ctx.files[i] = latest
// coerce to integer
iVal, err := strconv.Atoi(value)
if err != nil {
return nil, fmt.Errorf("failed to parse APK int: '%+v'", value)
}
pkgFields[key] = iVal
default:
pkgFields[key] = value
} }
} }
decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ func processFileInfo(v string) (uid, gid, perms string, ok bool) {
// By default, mapstructure compares field names in a *case-insensitive* manner. ok = false
// That would be the wrong approach here, since these apk files use case
// *sensitive* field names (e.g. 'P' vs. 'p'). fileInfo := strings.Split(v, ":")
MatchName: func(mapKey, fieldName string) bool { if len(fileInfo) < 3 {
return mapKey == fieldName return
},
Result: &entry,
})
if err != nil {
return nil, err
} }
if err := decoder.Decode(pkgFields); err != nil { uid = fileInfo[0]
return nil, fmt.Errorf("unable to parse APK metadata: %w", err) gid = fileInfo[1]
} perms = fileInfo[2]
if entry.Package == "" {
return nil, nil // note: there are more optional fields available that we are not capturing,
// e.g.: "0:0:755:Q1JaDEHQHBbizhEzoWK1YxuraNU/4="
ok = true
return
} }
entry.Files = files // apkFileParsingContext helps keep track of what file data has been captured so far for the APK currently being parsed.
type apkFileParsingContext struct {
files []pkg.ApkFileRecord
indexOfLatestDirectory int
indexOfLatestRegularFile int
}
return &entry, nil func newApkFileParsingContext() *apkFileParsingContext {
return &apkFileParsingContext{
indexOfLatestDirectory: -1, // no directories yet
indexOfLatestRegularFile: -1, // no regular files yet
}
}
// parseListValue parses a space-separated list from an apk entry field value.
func parseListValue(value string) []string {
items := strings.Split(value, " ")
if len(items) >= 1 {
return items
}
return nil
}
func nilFieldsToEmptySlice(p *pkg.ApkMetadata) {
if p.Dependencies == nil {
p.Dependencies = []string{}
}
if p.Provides == nil {
p.Provides = []string{}
}
if p.Files == nil {
p.Files = []pkg.ApkFileRecord{}
}
} }
func processChecksum(value string) *file.Digest { func processChecksum(value string) *file.Digest {

View File

@ -1,8 +1,8 @@
package apkdb package apkdb
import ( import (
"bufio"
"os" "os"
"path/filepath"
"testing" "testing"
"github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp"
@ -60,16 +60,15 @@ func TestExtraFileAttributes(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
f, err := os.Open("test-fixtures/extra-file-attributes") fixturePath := "test-fixtures/extra-file-attributes"
require.NoError(t, err) lrc := newLocationReadCloser(t, fixturePath)
t.Cleanup(func() { require.NoError(t, f.Close()) })
reader := bufio.NewReader(f) pkgs, _, err := parseApkDB(nil, new(generic.Environment), lrc)
assert.NoError(t, err)
require.Len(t, pkgs, 1)
metadata := pkgs[0].Metadata.(pkg.ApkMetadata)
entry, err := parseApkDBEntry(reader) if diff := cmp.Diff(test.expected.Files, metadata.Files); diff != "" {
require.NoError(t, err)
if diff := cmp.Diff(entry.Files, test.expected.Files); diff != "" {
t.Errorf("Files mismatch (-want +got):\n%s", diff) t.Errorf("Files mismatch (-want +got):\n%s", diff)
} }
}) })
@ -615,17 +614,14 @@ func TestSinglePackageDetails(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) { t.Run(test.fixture, func(t *testing.T) {
f, err := os.Open(test.fixture) lrc := newLocationReadCloser(t, test.fixture)
pkgs, _, err := parseApkDB(nil, new(generic.Environment), lrc)
require.NoError(t, err) require.NoError(t, err)
t.Cleanup(func() { require.NoError(t, f.Close()) }) require.Len(t, pkgs, 1)
metadata := pkgs[0].Metadata.(pkg.ApkMetadata)
reader := bufio.NewReader(f) if diff := cmp.Diff(test.expected, metadata); diff != "" {
entry, err := parseApkDBEntry(reader)
require.NoError(t, err)
require.NotNil(t, entry)
if diff := cmp.Diff(*entry, test.expected); diff != "" {
t.Errorf("Entry mismatch (-want +got):\n%s", diff) t.Errorf("Entry mismatch (-want +got):\n%s", diff)
} }
}) })
@ -765,7 +761,6 @@ func TestMultiplePackages(t *testing.T) {
}} }}
pkgtest.TestFileParserWithEnv(t, fixture, parseApkDB, &env, expectedPkgs, expectedRelationships) pkgtest.TestFileParserWithEnv(t, fixture, parseApkDB, &env, expectedPkgs, expectedRelationships)
} }
func Test_processChecksum(t *testing.T) { func Test_processChecksum(t *testing.T) {
@ -791,15 +786,15 @@ func Test_processChecksum(t *testing.T) {
}, },
}, },
} }
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { for _, test := range tests {
assert.Equal(t, &tt.want, processChecksum(tt.value)) t.Run(test.name, func(t *testing.T) {
assert.Equal(t, &test.want, processChecksum(test.value))
}) })
} }
} }
func Test_discoverPackageDependencies(t *testing.T) { func Test_discoverPackageDependencies(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
genFn func() ([]pkg.Package, []artifact.Relationship) genFn func() ([]pkg.Package, []artifact.Relationship)
@ -934,9 +929,10 @@ func Test_discoverPackageDependencies(t *testing.T) {
}, },
}, },
} }
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { for _, test := range tests {
pkgs, wantRelationships := tt.genFn() t.Run(test.name, func(t *testing.T) {
pkgs, wantRelationships := test.genFn()
gotRelationships := discoverPackageDependencies(pkgs) gotRelationships := discoverPackageDependencies(pkgs)
d := cmp.Diff(wantRelationships, gotRelationships, cmpopts.IgnoreUnexported(pkg.Package{}, source.LocationSet{})) d := cmp.Diff(wantRelationships, gotRelationships, cmpopts.IgnoreUnexported(pkg.Package{}, source.LocationSet{}))
if d != "" { if d != "" {
@ -1007,3 +1003,89 @@ func TestPackageDbDependenciesByParse(t *testing.T) {
}) })
} }
} }
func Test_parseApkDB_expectedPkgNames(t *testing.T) {
tests := []struct {
fixture string
wantPkgNames []string
wantErr assert.ErrorAssertionFunc
}{
{
fixture: "very-large-entries",
wantPkgNames: []string{
"ca-certificates-bundle",
"glibc-locale-posix",
"wolfi-baselayout",
"glibc",
"libcrypto3",
"libssl3",
"zlib",
"apk-tools",
"ncurses-terminfo-base",
"ncurses",
"bash",
"libcap",
"bubblewrap",
"busybox",
"libbrotlicommon1",
"libbrotlidec1",
"libnghttp2-14",
"libcurl4",
"curl",
"expat",
"libpcre2-8-0",
"git",
"binutils",
"libstdc++-dev",
"libgcc",
"libstdc++",
"gmp",
"isl",
"mpfr",
"mpc",
"gcc",
"linux-headers",
"glibc-dev",
"make",
"pkgconf",
"build-base",
"go",
"tree",
"sdk",
},
wantErr: assert.NoError,
},
}
for _, test := range tests {
t.Run(test.fixture, func(t *testing.T) {
fixturePath := filepath.Join("test-fixtures", test.fixture)
lrc := newLocationReadCloser(t, fixturePath)
pkgs, _, err := parseApkDB(nil, new(generic.Environment), lrc)
test.wantErr(t, err)
names := toPackageNames(pkgs)
if diff := cmp.Diff(test.wantPkgNames, names); diff != "" {
t.Errorf("Packages mismatch (-want +got):\n%s", diff)
}
})
}
}
func toPackageNames(pkgs []pkg.Package) []string {
names := make([]string, 0, len(pkgs))
for _, p := range pkgs {
names = append(names, p.Name)
}
return names
}
func newLocationReadCloser(t *testing.T, path string) source.LocationReadCloser {
f, err := os.Open(path)
require.NoError(t, err)
t.Cleanup(func() { f.Close() })
return source.NewLocationReadCloser(source.NewLocation(path), f)
}

File diff suppressed because it is too large Load Diff