syft/syft/formats/formats.go
Alex Goodman 988041ba6d
Speed up cataloging by replacing globs searching with index lookups (#1510)
* replace raw globs with index equivelent operations

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add cataloger test for alpm cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix import sorting for binary cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix linting for mock resolver

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* separate portage cataloger parser impl from cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* enhance cataloger pkgtest utils to account for resolver responses

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for alpm cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for apkdb cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for dpkg cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for cpp cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for dart cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for dotnet cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for elixir cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for erlang cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for golang cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for haskell cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for java cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for javascript cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for php cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for portage cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for python cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for rpm cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for rust cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for sbom cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for swift cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* allow generic catloger to run all mimetype searches at once

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* remove stutter from php and javascript cataloger constructors

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* bump stereoscope

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add tests for generic.Search

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add exceptions for java archive git ignore entries

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* enhance basename and extension resolver methods to be variadic

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* dont allow * prefix on extension searches

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* add glob-based cataloger tests for ruby cataloger

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* remove unnecessary string casting

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* incorporate surfacing of leaf link resolitions from stereoscope results

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* [wip] switch to stereoscope file metadata

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* [wip + failing] revert to old globs but keep new resolvers

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* index files, links, and dirs within the directory resolver

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix several resolver bugs and inconsistencies

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* move format testutils to internal package

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update syft json to account for file type string normalization

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* split up directory resolver from indexing

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* update docs to include details about searching

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* [wip] bump stereoscope to development version

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix linting

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* adjust symlinks fixture to be fixed to digest

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix all-locations resolver tests

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix test fixture reference

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* rename file.Type

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* bump stereoscope

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* fix PR comment to exclude extra *

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* bump to dev version of stereoscope

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* bump to final version of stereoscope

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* move observing resolver to pkgtest

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

---------

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
2023-02-09 16:19:47 +00:00

144 lines
3.4 KiB
Go

package formats
import (
"bytes"
"errors"
"fmt"
"io"
"regexp"
"strings"
"golang.org/x/exp/slices"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/formats/cyclonedxjson"
"github.com/anchore/syft/syft/formats/cyclonedxxml"
"github.com/anchore/syft/syft/formats/github"
"github.com/anchore/syft/syft/formats/spdxjson"
"github.com/anchore/syft/syft/formats/spdxtagvalue"
"github.com/anchore/syft/syft/formats/syftjson"
"github.com/anchore/syft/syft/formats/table"
"github.com/anchore/syft/syft/formats/template"
"github.com/anchore/syft/syft/formats/text"
"github.com/anchore/syft/syft/sbom"
)
func Formats() []sbom.Format {
return []sbom.Format{
syftjson.Format(),
cyclonedxxml.Format(),
cyclonedxjson.Format(),
github.Format(),
spdxtagvalue.Format2_1(),
spdxtagvalue.Format2_2(),
spdxtagvalue.Format2_3(),
spdxjson.Format2_2(),
spdxjson.Format2_3(),
table.Format(),
text.Format(),
template.Format(),
}
}
func Identify(by []byte) sbom.Format {
for _, f := range Formats() {
if err := f.Validate(bytes.NewReader(by)); err != nil {
if !errors.Is(err, sbom.ErrValidationNotSupported) {
log.WithFields("error", err).Tracef("format validation for %s failed", f.ID())
}
continue
}
return f
}
return nil
}
// ByName accepts a name@version string, such as:
//
// spdx-json@2.1 or cyclonedx@2
func ByName(name string) sbom.Format {
parts := strings.SplitN(name, "@", 2)
version := sbom.AnyVersion
if len(parts) > 1 {
version = parts[1]
}
return ByNameAndVersion(parts[0], version)
}
func ByNameAndVersion(name string, version string) sbom.Format {
name = cleanFormatName(name)
var mostRecentFormat sbom.Format
for _, f := range Formats() {
for _, n := range f.IDs() {
if cleanFormatName(string(n)) == name && versionMatches(f.Version(), version) {
if mostRecentFormat == nil || f.Version() > mostRecentFormat.Version() {
mostRecentFormat = f
}
}
}
}
return mostRecentFormat
}
func versionMatches(version string, match string) bool {
if version == sbom.AnyVersion || match == sbom.AnyVersion {
return true
}
dots := strings.Count(match, ".")
if dots == 0 {
match += ".*"
}
match = strings.ReplaceAll(match, ".", "\\.")
match = strings.ReplaceAll(match, "*", ".*")
match = strings.ReplaceAll(match, "\\..*", "(\\..*)*")
match = fmt.Sprintf("^%s$", match)
matcher, err := regexp.Compile(match)
if err != nil {
return false
}
return matcher.MatchString(version)
}
func cleanFormatName(name string) string {
r := strings.NewReplacer("-", "", "_", "")
return strings.ToLower(r.Replace(name))
}
// Encode takes all SBOM elements and a format option and encodes an SBOM document.
func Encode(s sbom.SBOM, f sbom.Format) ([]byte, error) {
buff := bytes.Buffer{}
if err := f.Encode(&buff, s); err != nil {
return nil, fmt.Errorf("unable to encode sbom: %w", err)
}
return buff.Bytes(), nil
}
// Decode takes a reader for an SBOM and generates all internal SBOM elements.
func Decode(reader io.Reader) (*sbom.SBOM, sbom.Format, error) {
by, err := io.ReadAll(reader)
if err != nil {
return nil, nil, fmt.Errorf("unable to read sbom: %w", err)
}
f := Identify(by)
if f == nil {
return nil, nil, fmt.Errorf("unable to identify format")
}
s, err := f.Decode(bytes.NewReader(by))
return s, f, err
}
func AllIDs() (ids []sbom.FormatID) {
for _, f := range Formats() {
if slices.Contains(ids, f.ID()) {
continue
}
ids = append(ids, f.ID())
}
return ids
}