Convert between SBOM formats (#964)

* add convert command

Signed-off-by: Jonas Galvão Xavier <jonas.agx@gmail.com>

* mvp

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* fix hanging bug

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* validate SBOM formats for conversion

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* move convert cmd to new structure

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* remove bin

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* drop event loop from convert cmd

extract SBOM type from document namespace

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* validate SPDX in tests

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* documenting convert cmd

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* support output format=file.json notation

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* test convertible formats

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* fix typo

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* clean up

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* more clean up and docs

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* feedback changes

Signed-off-by: Jonas Galvão Xavier <jonasx@anchore.com>

* nit

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* feedback changes

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* re-use more code

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* undo encode-decode cycle test

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* remove unnecessary test constraint

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* fix readme

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* try verbose

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* cleaner README and no table conversion

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* simpler conversion

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* feedback changes and cleanup

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* nit space fix

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* use defer

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

* feedback changes

Signed-off-by: Jonas Xavier <jonasx@anchore.com>

Co-authored-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Jonas Xavier 2022-05-09 17:28:33 -07:00 committed by GitHub
parent a83506628c
commit 24f08e7738
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 377 additions and 92 deletions

View File

@ -25,6 +25,7 @@ A CLI tool and Go library for generating a Software Bill of Materials (SBOM) fro
- Linux distribution identification
- Works seamlessly with [Grype](https://github.com/anchore/grype) (a fast, modern vulnerability scanner)
- Able to create signed SBOM attestations using the [in-toto specification](https://github.com/in-toto/attestation/blob/main/spec/README.md)
- Convert between SBOM formats, such as CycloneDX, SPDX, and Syft's own format.
### Supported Ecosystems
@ -93,6 +94,29 @@ The above output includes only software that is visible in the container (i.e.,
syft <image> --scope all-layers
```
#### Format conversion (experimental)
The ability to convert existing SBOMs means you can create SBOMs in different formats quickly, without the need to regenerate the SBOM from scratch, which may take significantly more time.
```
syft convert <ORIGINAL-SBOM-FILE> -o <NEW-SBOM-FORMAT>[=<NEW-SBOM-FILE>]
```
This feature is experimental and data might be lost when converting formats. Packages are the main SBOM component easily transferable across formats, whereas files and relationships, as well as other information Syft doesn't support, are more likely to be lost.
We support formats with wide community usage AND good encode/decode support by Syft. The supported formats are:
- Syft JSON
- SPDX 2.2 JSON
- SPDX 2.2 tag-value
- CycloneDX 1.4 JSON
- CycloneDX 1.4 XML
Conversion example:
```sh
syft alpine:latest -o syft-json=sbom.syft.json # generate a syft SBOM
syft convert sbom.syft.json -o cyclonedx-json=sbom.cdx.json # convert it to CycloneDX
```
#### SBOM attestation
### Keyless support

View File

@ -46,6 +46,7 @@ func New() (*cobra.Command, error) {
// root options are also passed to the attestCmd so that a user provided config location can be discovered
attestCmd := Attest(v, app, ro)
poweruserCmd := PowerUser(v, app, ro)
convertCmd := Convert(v, app, ro)
// rootCmd is currently an alias for the packages command
rootCmd := &cobra.Command{
@ -84,6 +85,7 @@ func New() (*cobra.Command, error) {
// Add sub-commands.
rootCmd.AddCommand(packagesCmd)
rootCmd.AddCommand(attestCmd)
rootCmd.AddCommand(convertCmd)
rootCmd.AddCommand(poweruserCmd)
rootCmd.AddCommand(Completion())
rootCmd.AddCommand(Version(v, app))

47
cmd/syft/cli/convert.go Normal file
View File

@ -0,0 +1,47 @@
package cli
import (
"fmt"
"github.com/anchore/syft/cmd/syft/cli/convert"
"github.com/anchore/syft/cmd/syft/cli/options"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/config"
"github.com/spf13/cobra"
"github.com/spf13/viper"
)
const (
convertExample = ` {{.appName}} {{.command}} img.syft.json -o spdx-json convert a syft SBOM to spdx-json, output goes to stdout in table format, by default
{{.appName}} {{.command}} img.syft.json -o cyclonedx-json=img.cdx.json convert a syft SBOM to CycloneDX, output goes to a file named img.cdx.json
`
)
func Convert(v *viper.Viper, app *config.Application, ro *options.RootOptions) *cobra.Command {
cmd := &cobra.Command{
Use: "convert [SOURCE-SBOM] -o [FORMAT]",
Short: "Convert between SBOM formats",
Long: "[Experimental] Convert SBOM files to, and from, SPDX, CycloneDX and Syft's format. For more info about data loss between formats see https://github.com/anchore/syft#format-conversion-experimental",
Example: internal.Tprintf(convertExample, map[string]interface{}{
"appName": internal.ApplicationName,
"command": "convert",
}),
Args: func(cmd *cobra.Command, args []string) error {
if err := app.LoadAllValues(v, ro.Config); err != nil {
return fmt.Errorf("invalid application config: %w", err)
}
newLogWrapper(app)
logApplicationConfig(app)
return validateArgs(cmd, args)
},
SilenceUsage: true,
SilenceErrors: true,
RunE: func(cmd *cobra.Command, args []string) error {
if app.CheckForAppUpdate {
checkForApplicationUpdate()
}
return convert.Run(cmd.Context(), app, args)
},
}
return cmd
}

View File

@ -0,0 +1,41 @@
package convert
import (
"context"
"fmt"
"os"
"github.com/anchore/syft/cmd/syft/cli/options"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft"
)
func Run(ctx context.Context, app *config.Application, args []string) error {
log.Warn("convert is an experimental feature, run `syft convert -h` for help")
writer, err := options.MakeWriter(app.Outputs, app.File)
if err != nil {
return err
}
defer func() {
if err := writer.Close(); err != nil {
log.Warnf("unable to write to report destination: %w", err)
}
}()
// this can only be a SBOM file
userInput := args[0]
f, err := os.Open(userInput)
if err != nil {
return fmt.Errorf("failed to open SBOM file: %w", err)
}
defer f.Close()
sbom, _, err := syft.Decode(f)
if err != nil {
return fmt.Errorf("failed to decode SBOM: %w", err)
}
return writer.Write(*sbom)
}

View File

@ -0,0 +1,62 @@
package options
import (
"fmt"
"strings"
"github.com/anchore/syft/internal/formats/table"
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/sbom"
"github.com/hashicorp/go-multierror"
)
// makeWriter creates a sbom.Writer for output or returns an error. this will either return a valid writer
// or an error but neither both and if there is no error, sbom.Writer.Close() should be called
func MakeWriter(outputs []string, defaultFile string) (sbom.Writer, error) {
outputOptions, err := parseOutputs(outputs, defaultFile)
if err != nil {
return nil, err
}
writer, err := sbom.NewWriter(outputOptions...)
if err != nil {
return nil, err
}
return writer, nil
}
// parseOptions utility to parse command-line option strings and retain the existing behavior of default format and file
func parseOutputs(outputs []string, defaultFile string) (out []sbom.WriterOption, errs error) {
// always should have one option -- we generally get the default of "table", but just make sure
if len(outputs) == 0 {
outputs = append(outputs, string(table.ID))
}
for _, name := range outputs {
name = strings.TrimSpace(name)
// split to at most two parts for <format>=<file>
parts := strings.SplitN(name, "=", 2)
// the format name is the first part
name = parts[0]
// default to the --file or empty string if not specified
file := defaultFile
// If a file is specified as part of the output formatName, use that
if len(parts) > 1 {
file = parts[1]
}
format := syft.FormatByName(name)
if format == nil {
errs = multierror.Append(errs, fmt.Errorf("bad output format: '%s'", name))
continue
}
out = append(out, sbom.NewWriterOption(format, file))
}
return out, errs
}

View File

@ -0,0 +1,34 @@
package options
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestIsSupportedFormat(t *testing.T) {
tests := []struct {
outputs []string
wantErr assert.ErrorAssertionFunc
}{
{
outputs: []string{"json"},
wantErr: assert.NoError,
},
{
outputs: []string{"table", "json"},
wantErr: assert.NoError,
},
{
outputs: []string{"unknown"},
wantErr: func(t assert.TestingT, err error, bla ...interface{}) bool {
return assert.ErrorContains(t, err, "bad output format: 'unknown'")
},
},
}
for _, tt := range tests {
_, err := MakeWriter(tt.outputs, "")
tt.wantErr(t, err)
}
}

View File

@ -5,7 +5,6 @@ import (
"fmt"
"io/ioutil"
"os"
"strings"
"github.com/anchore/stereoscope"
"github.com/anchore/syft/cmd/syft/cli/eventloop"
@ -14,7 +13,6 @@ import (
"github.com/anchore/syft/internal/anchore"
"github.com/anchore/syft/internal/bus"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/formats/table"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/ui"
"github.com/anchore/syft/internal/version"
@ -23,12 +21,11 @@ import (
"github.com/anchore/syft/syft/event"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
"github.com/hashicorp/go-multierror"
"github.com/wagoodman/go-partybus"
)
func Run(ctx context.Context, app *config.Application, args []string) error {
writer, err := makeWriter(app.Outputs, app.File)
writer, err := options.MakeWriter(app.Outputs, app.File)
if err != nil {
return err
}
@ -187,54 +184,3 @@ func runPackageSbomUpload(src *source.Source, s sbom.SBOM, app *config.Applicati
return nil
}
// makeWriter creates a sbom.Writer for output or returns an error. this will either return a valid writer
// or an error but neither both and if there is no error, sbom.Writer.Close() should be called
func makeWriter(outputs []string, defaultFile string) (sbom.Writer, error) {
outputOptions, err := parseOptions(outputs, defaultFile)
if err != nil {
return nil, err
}
writer, err := sbom.NewWriter(outputOptions...)
if err != nil {
return nil, err
}
return writer, nil
}
// parseOptions utility to parse command-line option strings and retain the existing behavior of default format and file
func parseOptions(outputs []string, defaultFile string) (out []sbom.WriterOption, errs error) {
// always should have one option -- we generally get the default of "table", but just make sure
if len(outputs) == 0 {
outputs = append(outputs, string(table.ID))
}
for _, name := range outputs {
name = strings.TrimSpace(name)
// split to at most two parts for <format>=<file>
parts := strings.SplitN(name, "=", 2)
// the format name is the first part
name = parts[0]
// default to the --file or empty string if not specified
file := defaultFile
// If a file is specified as part of the output formatName, use that
if len(parts) > 1 {
file = parts[1]
}
format := syft.FormatByName(name)
if format == nil {
errs = multierror.Append(errs, fmt.Errorf("bad output format: '%s'", name))
continue
}
out = append(out, sbom.NewWriterOption(format, file))
}
return out, errs
}

View File

@ -22,7 +22,7 @@ func GetValidator(format cyclonedx.BOMFileFormat) sbom.Validator {
return err
}
// random JSON does not necessarily cause an error (e.g. SPDX)
if (cyclonedx.BOM{} == *bom) {
if (cyclonedx.BOM{} == *bom || bom.Components == nil) {
return fmt.Errorf("not a valid CycloneDX document")
}
return nil

View File

@ -1,22 +1,21 @@
package spdxhelpers
import (
"fmt"
"path"
"strings"
"github.com/anchore/syft/syft/source"
)
func DocumentName(srcMetadata source.Metadata) (string, error) {
func DocumentName(srcMetadata source.Metadata) string {
switch srcMetadata.Scheme {
case source.ImageScheme:
return cleanName(srcMetadata.ImageMetadata.UserInput), nil
return cleanName(srcMetadata.ImageMetadata.UserInput)
case source.DirectoryScheme, source.FileScheme:
return cleanName(srcMetadata.Path), nil
return cleanName(srcMetadata.Path)
default:
return "unknown"
}
return "", fmt.Errorf("unable to determine document name from scheme=%q", srcMetadata.Scheme)
}
func cleanName(name string) string {

View File

@ -5,8 +5,6 @@ import (
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/source"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
@ -59,8 +57,7 @@ func Test_DocumentName(t *testing.T) {
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual, err := DocumentName(test.srcMetadata)
require.NoError(t, err)
actual := DocumentName(test.srcMetadata)
assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual name %q", actual))
// track each scheme tested (passed or not)

View File

@ -10,12 +10,9 @@ import (
"github.com/google/uuid"
)
func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string, error) {
name, err := DocumentName(srcMetadata)
if err != nil {
return "", "", err
}
return name, DocumentNamespace(name, srcMetadata), nil
func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string) {
name := DocumentName(srcMetadata)
return name, DocumentNamespace(name, srcMetadata)
}
func DocumentNamespace(name string, srcMetadata source.Metadata) string {

View File

@ -2,6 +2,7 @@ package spdxhelpers
import (
"errors"
"net/url"
"strconv"
"strings"
@ -24,7 +25,13 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) {
spdxIDMap := make(map[string]interface{})
src := source.Metadata{Scheme: source.UnknownScheme}
if doc.CreationInfo != nil {
src.Scheme = extractSchemeFromNamespace(doc.CreationInfo.DocumentNamespace)
}
s := &sbom.SBOM{
Source: src,
Artifacts: sbom.Artifacts{
PackageCatalog: pkg.NewCatalog(),
FileMetadata: map[source.Coordinates]source.FileMetadata{},
@ -42,6 +49,30 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) {
return s, nil
}
// NOTE(jonas): SPDX doesn't inform what an SBOM is about,
// image, directory, for example. This is our best effort to determine
// the scheme. Syft-generated SBOMs have in the namespace
// field a type encoded, which we try to identify here.
func extractSchemeFromNamespace(ns string) source.Scheme {
u, err := url.Parse(ns)
if err != nil {
return source.UnknownScheme
}
parts := strings.Split(u.Path, "/")
for _, p := range parts {
switch p {
case "file":
return source.FileScheme
case "image":
return source.ImageScheme
case "dir":
return source.DirectoryScheme
}
}
return source.UnknownScheme
}
func findLinuxReleaseByPURL(doc *spdx.Document2_2) *linux.Release {
for _, p := range doc.Packages {
purlValue := findPURLValue(p)

View File

@ -4,8 +4,10 @@ import (
"testing"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/source"
"github.com/spdx/tools-golang/spdx"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestToSyftModel(t *testing.T) {
@ -195,3 +197,39 @@ func Test_extractMetadata(t *testing.T) {
})
}
}
func TestExtractSourceFromNamespaces(t *testing.T) {
tests := []struct {
namespace string
expected source.Scheme
}{
{
namespace: "https://anchore.com/syft/file/d42b01d0-7325-409b-b03f-74082935c4d3",
expected: source.FileScheme,
},
{
namespace: "https://anchore.com/syft/image/d42b01d0-7325-409b-b03f-74082935c4d3",
expected: source.ImageScheme,
},
{
namespace: "https://anchore.com/syft/dir/d42b01d0-7325-409b-b03f-74082935c4d3",
expected: source.DirectoryScheme,
},
{
namespace: "https://another-host/blob/123",
expected: source.UnknownScheme,
},
{
namespace: "bla bla",
expected: source.UnknownScheme,
},
{
namespace: "",
expected: source.UnknownScheme,
},
}
for _, tt := range tests {
require.Equal(t, tt.expected, extractSchemeFromNamespace(tt.namespace))
}
}

View File

@ -8,10 +8,7 @@ import (
)
func encoder(output io.Writer, s sbom.SBOM) error {
doc, err := toFormatModel(s)
if err != nil {
return err
}
doc := toFormatModel(s)
enc := json.NewEncoder(output)
// prevent > and < from being escaped in the payload

View File

@ -20,11 +20,8 @@ import (
)
// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results.
func toFormatModel(s sbom.SBOM) (*model.Document, error) {
name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source)
if err != nil {
return nil, err
}
func toFormatModel(s sbom.SBOM) *model.Document {
name, namespace := spdxhelpers.DocumentNameAndNamespace(s.Source)
return &model.Document{
Element: model.Element{
@ -46,7 +43,7 @@ func toFormatModel(s sbom.SBOM) (*model.Document, error) {
Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships),
Files: toFiles(s),
Relationships: toRelationships(s.Relationships),
}, nil
}
}
func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package {

View File

@ -8,9 +8,6 @@ import (
)
func encoder(output io.Writer, s sbom.SBOM) error {
model, err := toFormatModel(s)
if err != nil {
return err
}
model := toFormatModel(s)
return tvsaver.Save2_2(model, output)
}

View File

@ -16,11 +16,9 @@ import (
// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results.
// nolint:funlen
func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) {
name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source)
if err != nil {
return nil, err
}
func toFormatModel(s sbom.SBOM) *spdx.Document2_2 {
name, namespace := spdxhelpers.DocumentNameAndNamespace(s.Source)
return &spdx.Document2_2{
CreationInfo: &spdx.CreationInfo2_2{
// 2.1: SPDX Version; should be in the format "SPDX-2.2"
@ -86,7 +84,7 @@ func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) {
DocumentComment: "",
},
Packages: toFormatPackages(s.Artifacts.PackageCatalog),
}, nil
}
}
// packages populates all Package Information from the package Catalog (see https://spdx.github.io/spdx-spec/3-package-information/)

View File

@ -0,0 +1,78 @@
package integration
import (
"context"
"io/ioutil"
"os"
"testing"
"github.com/anchore/syft/cmd/syft/cli/convert"
"github.com/anchore/syft/internal/config"
"github.com/anchore/syft/internal/formats/cyclonedxjson"
"github.com/anchore/syft/internal/formats/cyclonedxxml"
"github.com/anchore/syft/internal/formats/spdx22json"
"github.com/anchore/syft/internal/formats/spdx22tagvalue"
"github.com/anchore/syft/internal/formats/syftjson"
"github.com/anchore/syft/internal/formats/table"
"github.com/anchore/syft/syft"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/require"
)
var convertibleFormats = []sbom.Format{
syftjson.Format(),
spdx22json.Format(),
spdx22tagvalue.Format(),
cyclonedxjson.Format(),
cyclonedxxml.Format(),
}
// TestConvertCmd tests if the converted SBOM is a valid document according
// to spec.
// TODO: This test can, but currently does not, check the converted SBOM content. It
// might be useful to do that in the future, once we gather a better understanding of
// what users expect from the convert command.
func TestConvertCmd(t *testing.T) {
for _, format := range convertibleFormats {
t.Run(format.ID().String(), func(t *testing.T) {
sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope)
format := syft.FormatByID(syftjson.ID)
f, err := ioutil.TempFile("", "test-convert-sbom-")
require.NoError(t, err)
defer func() {
err := f.Close()
require.NoError(t, err)
os.Remove(f.Name())
}()
err = format.Encode(f, sbom)
require.NoError(t, err)
stdr, stdw, err := os.Pipe()
require.NoError(t, err)
originalStdout := os.Stdout
os.Stdout = stdw
ctx := context.Background()
app := &config.Application{Outputs: []string{format.ID().String()}}
err = convert.Run(ctx, app, []string{f.Name()})
require.NoError(t, err)
stdw.Close()
out, err := ioutil.ReadAll(stdr)
require.NoError(t, err)
os.Stdout = originalStdout
formatFound := syft.IdentifyFormat(out)
if format.ID() == table.ID {
require.Nil(t, formatFound)
return
}
require.Equal(t, format.ID(), formatFound.ID())
})
}
}