ignore minor parsing error when reading dpkg status files (#786)

* ignore minor parsing error when reading dpkg status files

helps with https://github.com/anchore/syft/issues/733

Question: should we add a smarter parser to guess approximate installed-size
value?

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* add datasize lib to help dpkg parsing

added unit tests to expand coverage of dpkg parsing

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* drop parse error

added unit tests to handleNewKeyValue

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* don't return parsing errors from dpkg

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* go mod tidy

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* test higher level functions

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* return parsing err to let cataloger handle it

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* feedback changes

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* ignore key parsing error

log warning with relevant context

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* go mod tidy

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* add context info to log lines

simpler error assertion

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* use error.As to assert error in chain

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>
This commit is contained in:
Jonas Xavier 2022-02-17 14:05:11 -08:00 committed by GitHub
parent e6b5872bc8
commit 4b16737b2f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 197 additions and 16 deletions

2
go.mod
View File

@ -25,6 +25,7 @@ require (
github.com/gookit/color v1.2.7 github.com/gookit/color v1.2.7
github.com/hashicorp/go-multierror v1.1.0 github.com/hashicorp/go-multierror v1.1.0
github.com/jinzhu/copier v0.3.2 github.com/jinzhu/copier v0.3.2
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
github.com/mholt/archiver/v3 v3.5.1 github.com/mholt/archiver/v3 v3.5.1
github.com/mitchellh/go-homedir v1.1.0 github.com/mitchellh/go-homedir v1.1.0
github.com/mitchellh/hashstructure/v2 v2.0.2 github.com/mitchellh/hashstructure/v2 v2.0.2
@ -85,7 +86,6 @@ require (
github.com/mattn/go-colorable v0.0.9 // indirect github.com/mattn/go-colorable v0.0.9 // indirect
github.com/mattn/go-isatty v0.0.6 // indirect github.com/mattn/go-isatty v0.0.6 // indirect
github.com/mattn/go-runewidth v0.0.7 // indirect github.com/mattn/go-runewidth v0.0.7 // indirect
github.com/mgutz/ansi v0.0.0-20200706080929-d51e80ef957d // indirect
github.com/nwaples/rardecode v1.1.0 // indirect github.com/nwaples/rardecode v1.1.0 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.0.2 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect

View File

@ -53,7 +53,8 @@ func (c *Cataloger) Catalog(resolver source.FileResolver) ([]pkg.Package, []arti
pkgs, err := parseDpkgStatus(dbContents) pkgs, err := parseDpkgStatus(dbContents)
internal.CloseAndLogError(dbContents, dbLocation.VirtualPath) internal.CloseAndLogError(dbContents, dbLocation.VirtualPath)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to catalog dpkg package=%+v: %w", dbLocation.RealPath, err) log.Warnf("dpkg cataloger: unable to catalog package=%+v: %w", dbLocation.RealPath, err)
continue
} }
for i := range pkgs { for i := range pkgs {

View File

@ -12,7 +12,6 @@ import (
) )
func TestDpkgCataloger(t *testing.T) { func TestDpkgCataloger(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
sources map[string][]string sources map[string][]string

View File

@ -6,12 +6,12 @@ import (
"fmt" "fmt"
"io" "io"
"regexp" "regexp"
"strconv"
"strings" "strings"
"github.com/anchore/syft/internal" "github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/dustin/go-humanize"
"github.com/mitchellh/mapstructure" "github.com/mitchellh/mapstructure"
) )
@ -133,7 +133,8 @@ func extractAllFields(reader *bufio.Reader) (map[string]interface{}, error) {
var val interface{} var val interface{}
key, val, err = handleNewKeyValue(line) key, val, err = handleNewKeyValue(line)
if err != nil { if err != nil {
return nil, err log.Warnf("parsing dpkg status: extracting key-value from line: %s err: %v", line, err)
continue
} }
if _, ok := dpkgFields[key]; ok { if _, ok := dpkgFields[key]; ok {
@ -154,9 +155,9 @@ func extractSourceVersion(source string) (string, string) {
} }
// handleNewKeyValue parse a new key-value pair from the given unprocessed line // handleNewKeyValue parse a new key-value pair from the given unprocessed line
func handleNewKeyValue(line string) (string, interface{}, error) { func handleNewKeyValue(line string) (key string, val interface{}, err error) {
if i := strings.Index(line, ":"); i > 0 { if i := strings.Index(line, ":"); i > 0 {
var key = strings.TrimSpace(line[0:i]) key = strings.TrimSpace(line[0:i])
// mapstruct cant handle "-" // mapstruct cant handle "-"
key = strings.ReplaceAll(key, "-", "") key = strings.ReplaceAll(key, "-", "")
val := strings.TrimSpace(line[i+1:]) val := strings.TrimSpace(line[i+1:])
@ -164,11 +165,11 @@ func handleNewKeyValue(line string) (string, interface{}, error) {
// further processing of values based on the key that was discovered // further processing of values based on the key that was discovered
switch key { switch key {
case "InstalledSize": case "InstalledSize":
numVal, err := strconv.Atoi(val) s, err := humanize.ParseBytes(val)
if err != nil { if err != nil {
return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err) return "", nil, fmt.Errorf("bad installed-size value=%q: %w", val, err)
} }
return key, numVal, nil return key, int(s), nil
default: default:
return key, val, nil return key, val, nil
} }

View File

@ -2,10 +2,15 @@ package deb
import ( import (
"bufio" "bufio"
"errors"
"fmt"
"os" "os"
"path/filepath"
"strings"
"testing" "testing"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/go-test/deep" "github.com/go-test/deep"
@ -22,9 +27,11 @@ func TestSinglePackage(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
expected pkg.DpkgMetadata expected pkg.DpkgMetadata
fixturePath string
}{ }{
{ {
name: "Test Single Package", name: "Test Single Package",
fixturePath: filepath.Join("test-fixtures", "status", "single"),
expected: pkg.DpkgMetadata{ expected: pkg.DpkgMetadata{
Package: "apt", Package: "apt",
Source: "apt-dev", Source: "apt-dev",
@ -68,11 +75,22 @@ func TestSinglePackage(t *testing.T) {
}, },
}, },
}, },
} {
name: "parse storage notation",
fixturePath: filepath.Join("test-fixtures", "status", "installed-size-4KB"),
expected: pkg.DpkgMetadata{
Package: "apt",
Source: "apt-dev",
Version: "1.8.2",
Architecture: "amd64",
InstalledSize: 4000,
Maintainer: "APT Development Team <deity@lists.debian.org>",
},
}}
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
file, err := os.Open("test-fixtures/status/single") file, err := os.Open(test.fixturePath)
if err != nil { if err != nil {
t.Fatal("Unable to read test_fixtures/single: ", err) t.Fatal("Unable to read test_fixtures/single: ", err)
} }
@ -204,7 +222,6 @@ func TestMultiplePackages(t *testing.T) {
} }
func TestSourceVersionExtract(t *testing.T) { func TestSourceVersionExtract(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
input string input string
@ -242,3 +259,135 @@ func TestSourceVersionExtract(t *testing.T) {
}) })
} }
} }
func assertAs(expected error) assert.ErrorAssertionFunc {
return func(t assert.TestingT, err error, i ...interface{}) bool {
return assert.ErrorAs(t, err, &expected)
}
}
func Test_parseDpkgStatus(t *testing.T) {
tests := []struct {
name string
input string
want []pkg.Package
wantErr assert.ErrorAssertionFunc
}{
{
name: "no more packages",
input: `Package: apt`,
wantErr: assert.NoError,
},
{
name: "duplicated key",
input: `Package: apt
Package: apt-get
`,
wantErr: assertAs(errors.New("duplicate key discovered: Package")),
},
{
name: "no match for continuation",
input: ` Package: apt
`,
wantErr: assertAs(errors.New("no match for continuation: line: ' Package: apt'")),
},
{
name: "find keys",
input: `Package: apt
Status: install ok installed
Installed-Size: 10kib
`,
want: []pkg.Package{
{
Name: "apt",
Type: "deb",
MetadataType: "DpkgMetadata",
Metadata: pkg.DpkgMetadata{
Package: "apt",
InstalledSize: 10240,
Files: []pkg.DpkgFileRecord{},
},
},
},
wantErr: assert.NoError,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
r := bufio.NewReader(strings.NewReader(tt.input))
got, err := parseDpkgStatus(r)
tt.wantErr(t, err, fmt.Sprintf("parseDpkgStatus"))
assert.Equal(t, tt.want, got)
})
}
}
func Test_handleNewKeyValue(t *testing.T) {
tests := []struct {
name string
line string
wantKey string
wantVal interface{}
wantErr assert.ErrorAssertionFunc
}{
{
name: "cannot parse field",
line: "blabla",
wantErr: assertAs(errors.New("cannot parse field from line: 'blabla'")),
},
{
name: "parse field",
line: "key: val",
wantKey: "key",
wantVal: "val",
wantErr: assert.NoError,
},
{
name: "parse installed size",
line: "InstalledSize: 128",
wantKey: "InstalledSize",
wantVal: 128,
wantErr: assert.NoError,
},
{
name: "parse installed kib size",
line: "InstalledSize: 1kib",
wantKey: "InstalledSize",
wantVal: 1024,
wantErr: assert.NoError,
},
{
name: "parse installed kb size",
line: "InstalledSize: 1kb",
wantKey: "InstalledSize",
wantVal: 1000,
wantErr: assert.NoError,
},
{
name: "parse installed-size mb",
line: "Installed-Size: 1 mb",
wantKey: "InstalledSize",
wantVal: 1000000,
wantErr: assert.NoError,
},
{
name: "fail parsing installed-size",
line: "Installed-Size: 1bla",
wantKey: "",
wantErr: assertAs(fmt.Errorf("unhandled size name: %s", "bla")),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotKey, gotVal, err := handleNewKeyValue(tt.line)
tt.wantErr(t, err, fmt.Sprintf("handleNewKeyValue(%v)", tt.line))
assert.Equalf(t, tt.wantKey, gotKey, "handleNewKeyValue(%v)", tt.line)
assert.Equalf(t, tt.wantVal, gotVal, "handleNewKeyValue(%v)", tt.line)
})
}
}

View File

@ -0,0 +1,31 @@
Package: apt
Status: install ok installed
Priority: required
Section: admin
Installed-Size: 4KB
Maintainer: APT Development Team <deity@lists.debian.org>
Architecture: amd64
Version: 1.8.2
Source: apt-dev
Replaces: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~)
Provides: apt-transport-https (= 1.8.2)
Depends: adduser, gpgv | gpgv2 | gpgv1, debian-archive-keyring, libapt-pkg5.0 (>= 1.7.0~alpha3~), libc6 (>= 2.15), libgcc1 (>= 1:3.0), libgnutls30 (>= 3.6.6), libseccomp2 (>= 1.0.1), libstdc++6 (>= 5.2)
Recommends: ca-certificates
Suggests: apt-doc, aptitude | synaptic | wajig, dpkg-dev (>= 1.17.2), gnupg | gnupg2 | gnupg1, powermgmt-base
Breaks: apt-transport-https (<< 1.5~alpha4~), apt-utils (<< 1.3~exp2~), aptitude (<< 0.8.10)
Conffiles:
Description: commandline package manager
This package provides commandline tools for searching and
managing as well as querying information about packages
as a low-level access to all features of the libapt-pkg library.
.
These include:
* apt-get for retrieval of packages and information about them
from authenticated sources and for installation, upgrade and
removal of packages together with their dependencies
* apt-cache for querying available information about installed
as well as installable packages
* apt-cdrom to use removable media as a source for packages
* apt-config as an interface to the configuration settings
* apt-key as an interface to manage authentication keys