diff --git a/README.md b/README.md index 8eca2f31f..0a4fb3c1d 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ A CLI tool and Go library for generating a Software Bill of Materials (SBOM) fro - Linux distribution identification - Works seamlessly with [Grype](https://github.com/anchore/grype) (a fast, modern vulnerability scanner) - Able to create signed SBOM attestations using the [in-toto specification](https://github.com/in-toto/attestation/blob/main/spec/README.md) +- Convert between SBOM formats, such as CycloneDX, SPDX, and Syft's own format. ### Supported Ecosystems @@ -93,6 +94,29 @@ The above output includes only software that is visible in the container (i.e., syft --scope all-layers ``` +#### Format conversion (experimental) + +The ability to convert existing SBOMs means you can create SBOMs in different formats quickly, without the need to regenerate the SBOM from scratch, which may take significantly more time. + +``` +syft convert -o [=] +``` + +This feature is experimental and data might be lost when converting formats. Packages are the main SBOM component easily transferable across formats, whereas files and relationships, as well as other information Syft doesn't support, are more likely to be lost. + +We support formats with wide community usage AND good encode/decode support by Syft. The supported formats are: +- Syft JSON +- SPDX 2.2 JSON +- SPDX 2.2 tag-value +- CycloneDX 1.4 JSON +- CycloneDX 1.4 XML + +Conversion example: +```sh +syft alpine:latest -o syft-json=sbom.syft.json # generate a syft SBOM +syft convert sbom.syft.json -o cyclonedx-json=sbom.cdx.json # convert it to CycloneDX +``` + #### SBOM attestation ### Keyless support diff --git a/cmd/syft/cli/commands.go b/cmd/syft/cli/commands.go index fff623d58..9f3497398 100644 --- a/cmd/syft/cli/commands.go +++ b/cmd/syft/cli/commands.go @@ -46,6 +46,7 @@ func New() (*cobra.Command, error) { // root options are also passed to the attestCmd so that a user provided config location can be discovered attestCmd := Attest(v, app, ro) poweruserCmd := PowerUser(v, app, ro) + convertCmd := Convert(v, app, ro) // rootCmd is currently an alias for the packages command rootCmd := &cobra.Command{ @@ -84,6 +85,7 @@ func New() (*cobra.Command, error) { // Add sub-commands. rootCmd.AddCommand(packagesCmd) rootCmd.AddCommand(attestCmd) + rootCmd.AddCommand(convertCmd) rootCmd.AddCommand(poweruserCmd) rootCmd.AddCommand(Completion()) rootCmd.AddCommand(Version(v, app)) diff --git a/cmd/syft/cli/convert.go b/cmd/syft/cli/convert.go new file mode 100644 index 000000000..0794db8c5 --- /dev/null +++ b/cmd/syft/cli/convert.go @@ -0,0 +1,47 @@ +package cli + +import ( + "fmt" + + "github.com/anchore/syft/cmd/syft/cli/convert" + "github.com/anchore/syft/cmd/syft/cli/options" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/config" + "github.com/spf13/cobra" + "github.com/spf13/viper" +) + +const ( + convertExample = ` {{.appName}} {{.command}} img.syft.json -o spdx-json convert a syft SBOM to spdx-json, output goes to stdout in table format, by default + {{.appName}} {{.command}} img.syft.json -o cyclonedx-json=img.cdx.json convert a syft SBOM to CycloneDX, output goes to a file named img.cdx.json +` +) + +func Convert(v *viper.Viper, app *config.Application, ro *options.RootOptions) *cobra.Command { + cmd := &cobra.Command{ + Use: "convert [SOURCE-SBOM] -o [FORMAT]", + Short: "Convert between SBOM formats", + Long: "[Experimental] Convert SBOM files to, and from, SPDX, CycloneDX and Syft's format. For more info about data loss between formats see https://github.com/anchore/syft#format-conversion-experimental", + Example: internal.Tprintf(convertExample, map[string]interface{}{ + "appName": internal.ApplicationName, + "command": "convert", + }), + Args: func(cmd *cobra.Command, args []string) error { + if err := app.LoadAllValues(v, ro.Config); err != nil { + return fmt.Errorf("invalid application config: %w", err) + } + newLogWrapper(app) + logApplicationConfig(app) + return validateArgs(cmd, args) + }, + SilenceUsage: true, + SilenceErrors: true, + RunE: func(cmd *cobra.Command, args []string) error { + if app.CheckForAppUpdate { + checkForApplicationUpdate() + } + return convert.Run(cmd.Context(), app, args) + }, + } + return cmd +} diff --git a/cmd/syft/cli/convert/convert.go b/cmd/syft/cli/convert/convert.go new file mode 100644 index 000000000..87a8d497d --- /dev/null +++ b/cmd/syft/cli/convert/convert.go @@ -0,0 +1,41 @@ +package convert + +import ( + "context" + "fmt" + "os" + + "github.com/anchore/syft/cmd/syft/cli/options" + "github.com/anchore/syft/internal/config" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft" +) + +func Run(ctx context.Context, app *config.Application, args []string) error { + log.Warn("convert is an experimental feature, run `syft convert -h` for help") + writer, err := options.MakeWriter(app.Outputs, app.File) + if err != nil { + return err + } + + defer func() { + if err := writer.Close(); err != nil { + log.Warnf("unable to write to report destination: %w", err) + } + }() + + // this can only be a SBOM file + userInput := args[0] + f, err := os.Open(userInput) + if err != nil { + return fmt.Errorf("failed to open SBOM file: %w", err) + } + defer f.Close() + + sbom, _, err := syft.Decode(f) + if err != nil { + return fmt.Errorf("failed to decode SBOM: %w", err) + } + + return writer.Write(*sbom) +} diff --git a/cmd/syft/cli/options/writer.go b/cmd/syft/cli/options/writer.go new file mode 100644 index 000000000..d69f3abfb --- /dev/null +++ b/cmd/syft/cli/options/writer.go @@ -0,0 +1,62 @@ +package options + +import ( + "fmt" + "strings" + + "github.com/anchore/syft/internal/formats/table" + "github.com/anchore/syft/syft" + "github.com/anchore/syft/syft/sbom" + "github.com/hashicorp/go-multierror" +) + +// makeWriter creates a sbom.Writer for output or returns an error. this will either return a valid writer +// or an error but neither both and if there is no error, sbom.Writer.Close() should be called +func MakeWriter(outputs []string, defaultFile string) (sbom.Writer, error) { + outputOptions, err := parseOutputs(outputs, defaultFile) + if err != nil { + return nil, err + } + + writer, err := sbom.NewWriter(outputOptions...) + if err != nil { + return nil, err + } + + return writer, nil +} + +// parseOptions utility to parse command-line option strings and retain the existing behavior of default format and file +func parseOutputs(outputs []string, defaultFile string) (out []sbom.WriterOption, errs error) { + // always should have one option -- we generally get the default of "table", but just make sure + if len(outputs) == 0 { + outputs = append(outputs, string(table.ID)) + } + + for _, name := range outputs { + name = strings.TrimSpace(name) + + // split to at most two parts for = + parts := strings.SplitN(name, "=", 2) + + // the format name is the first part + name = parts[0] + + // default to the --file or empty string if not specified + file := defaultFile + + // If a file is specified as part of the output formatName, use that + if len(parts) > 1 { + file = parts[1] + } + + format := syft.FormatByName(name) + if format == nil { + errs = multierror.Append(errs, fmt.Errorf("bad output format: '%s'", name)) + continue + } + + out = append(out, sbom.NewWriterOption(format, file)) + } + return out, errs +} diff --git a/cmd/syft/cli/options/writer_test.go b/cmd/syft/cli/options/writer_test.go new file mode 100644 index 000000000..9e5063e85 --- /dev/null +++ b/cmd/syft/cli/options/writer_test.go @@ -0,0 +1,34 @@ +package options + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIsSupportedFormat(t *testing.T) { + tests := []struct { + outputs []string + wantErr assert.ErrorAssertionFunc + }{ + { + outputs: []string{"json"}, + wantErr: assert.NoError, + }, + { + outputs: []string{"table", "json"}, + wantErr: assert.NoError, + }, + { + outputs: []string{"unknown"}, + wantErr: func(t assert.TestingT, err error, bla ...interface{}) bool { + return assert.ErrorContains(t, err, "bad output format: 'unknown'") + }, + }, + } + + for _, tt := range tests { + _, err := MakeWriter(tt.outputs, "") + tt.wantErr(t, err) + } +} diff --git a/cmd/syft/cli/packages/packages.go b/cmd/syft/cli/packages/packages.go index c0615022f..74ca43410 100644 --- a/cmd/syft/cli/packages/packages.go +++ b/cmd/syft/cli/packages/packages.go @@ -5,7 +5,6 @@ import ( "fmt" "io/ioutil" "os" - "strings" "github.com/anchore/stereoscope" "github.com/anchore/syft/cmd/syft/cli/eventloop" @@ -14,7 +13,6 @@ import ( "github.com/anchore/syft/internal/anchore" "github.com/anchore/syft/internal/bus" "github.com/anchore/syft/internal/config" - "github.com/anchore/syft/internal/formats/table" "github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/ui" "github.com/anchore/syft/internal/version" @@ -23,12 +21,11 @@ import ( "github.com/anchore/syft/syft/event" "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/source" - "github.com/hashicorp/go-multierror" "github.com/wagoodman/go-partybus" ) func Run(ctx context.Context, app *config.Application, args []string) error { - writer, err := makeWriter(app.Outputs, app.File) + writer, err := options.MakeWriter(app.Outputs, app.File) if err != nil { return err } @@ -187,54 +184,3 @@ func runPackageSbomUpload(src *source.Source, s sbom.SBOM, app *config.Applicati return nil } - -// makeWriter creates a sbom.Writer for output or returns an error. this will either return a valid writer -// or an error but neither both and if there is no error, sbom.Writer.Close() should be called -func makeWriter(outputs []string, defaultFile string) (sbom.Writer, error) { - outputOptions, err := parseOptions(outputs, defaultFile) - if err != nil { - return nil, err - } - - writer, err := sbom.NewWriter(outputOptions...) - if err != nil { - return nil, err - } - - return writer, nil -} - -// parseOptions utility to parse command-line option strings and retain the existing behavior of default format and file -func parseOptions(outputs []string, defaultFile string) (out []sbom.WriterOption, errs error) { - // always should have one option -- we generally get the default of "table", but just make sure - if len(outputs) == 0 { - outputs = append(outputs, string(table.ID)) - } - - for _, name := range outputs { - name = strings.TrimSpace(name) - - // split to at most two parts for = - parts := strings.SplitN(name, "=", 2) - - // the format name is the first part - name = parts[0] - - // default to the --file or empty string if not specified - file := defaultFile - - // If a file is specified as part of the output formatName, use that - if len(parts) > 1 { - file = parts[1] - } - - format := syft.FormatByName(name) - if format == nil { - errs = multierror.Append(errs, fmt.Errorf("bad output format: '%s'", name)) - continue - } - - out = append(out, sbom.NewWriterOption(format, file)) - } - return out, errs -} diff --git a/internal/formats/common/cyclonedxhelpers/decoder.go b/internal/formats/common/cyclonedxhelpers/decoder.go index 43ff2607b..139cf06ee 100644 --- a/internal/formats/common/cyclonedxhelpers/decoder.go +++ b/internal/formats/common/cyclonedxhelpers/decoder.go @@ -22,7 +22,7 @@ func GetValidator(format cyclonedx.BOMFileFormat) sbom.Validator { return err } // random JSON does not necessarily cause an error (e.g. SPDX) - if (cyclonedx.BOM{} == *bom) { + if (cyclonedx.BOM{} == *bom || bom.Components == nil) { return fmt.Errorf("not a valid CycloneDX document") } return nil diff --git a/internal/formats/common/spdxhelpers/document_name.go b/internal/formats/common/spdxhelpers/document_name.go index af80a52ed..4e896427a 100644 --- a/internal/formats/common/spdxhelpers/document_name.go +++ b/internal/formats/common/spdxhelpers/document_name.go @@ -1,22 +1,21 @@ package spdxhelpers import ( - "fmt" "path" "strings" "github.com/anchore/syft/syft/source" ) -func DocumentName(srcMetadata source.Metadata) (string, error) { +func DocumentName(srcMetadata source.Metadata) string { switch srcMetadata.Scheme { case source.ImageScheme: - return cleanName(srcMetadata.ImageMetadata.UserInput), nil + return cleanName(srcMetadata.ImageMetadata.UserInput) case source.DirectoryScheme, source.FileScheme: - return cleanName(srcMetadata.Path), nil + return cleanName(srcMetadata.Path) + default: + return "unknown" } - - return "", fmt.Errorf("unable to determine document name from scheme=%q", srcMetadata.Scheme) } func cleanName(name string) string { diff --git a/internal/formats/common/spdxhelpers/document_name_test.go b/internal/formats/common/spdxhelpers/document_name_test.go index ab1095e46..e3d0ff5b5 100644 --- a/internal/formats/common/spdxhelpers/document_name_test.go +++ b/internal/formats/common/spdxhelpers/document_name_test.go @@ -5,8 +5,6 @@ import ( "strings" "testing" - "github.com/stretchr/testify/require" - "github.com/anchore/syft/syft/source" "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/assert" @@ -59,8 +57,7 @@ func Test_DocumentName(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual, err := DocumentName(test.srcMetadata) - require.NoError(t, err) + actual := DocumentName(test.srcMetadata) assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual name %q", actual)) // track each scheme tested (passed or not) diff --git a/internal/formats/common/spdxhelpers/document_namespace.go b/internal/formats/common/spdxhelpers/document_namespace.go index d3c438a74..9110efef5 100644 --- a/internal/formats/common/spdxhelpers/document_namespace.go +++ b/internal/formats/common/spdxhelpers/document_namespace.go @@ -10,12 +10,9 @@ import ( "github.com/google/uuid" ) -func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string, error) { - name, err := DocumentName(srcMetadata) - if err != nil { - return "", "", err - } - return name, DocumentNamespace(name, srcMetadata), nil +func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string) { + name := DocumentName(srcMetadata) + return name, DocumentNamespace(name, srcMetadata) } func DocumentNamespace(name string, srcMetadata source.Metadata) string { diff --git a/internal/formats/common/spdxhelpers/to_syft_model.go b/internal/formats/common/spdxhelpers/to_syft_model.go index a36eac3de..57dbe3692 100644 --- a/internal/formats/common/spdxhelpers/to_syft_model.go +++ b/internal/formats/common/spdxhelpers/to_syft_model.go @@ -2,6 +2,7 @@ package spdxhelpers import ( "errors" + "net/url" "strconv" "strings" @@ -24,7 +25,13 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) { spdxIDMap := make(map[string]interface{}) + src := source.Metadata{Scheme: source.UnknownScheme} + if doc.CreationInfo != nil { + src.Scheme = extractSchemeFromNamespace(doc.CreationInfo.DocumentNamespace) + } + s := &sbom.SBOM{ + Source: src, Artifacts: sbom.Artifacts{ PackageCatalog: pkg.NewCatalog(), FileMetadata: map[source.Coordinates]source.FileMetadata{}, @@ -42,6 +49,30 @@ func ToSyftModel(doc *spdx.Document2_2) (*sbom.SBOM, error) { return s, nil } +// NOTE(jonas): SPDX doesn't inform what an SBOM is about, +// image, directory, for example. This is our best effort to determine +// the scheme. Syft-generated SBOMs have in the namespace +// field a type encoded, which we try to identify here. +func extractSchemeFromNamespace(ns string) source.Scheme { + u, err := url.Parse(ns) + if err != nil { + return source.UnknownScheme + } + + parts := strings.Split(u.Path, "/") + for _, p := range parts { + switch p { + case "file": + return source.FileScheme + case "image": + return source.ImageScheme + case "dir": + return source.DirectoryScheme + } + } + return source.UnknownScheme +} + func findLinuxReleaseByPURL(doc *spdx.Document2_2) *linux.Release { for _, p := range doc.Packages { purlValue := findPURLValue(p) diff --git a/internal/formats/common/spdxhelpers/to_syft_model_test.go b/internal/formats/common/spdxhelpers/to_syft_model_test.go index 9864c5bef..9bced2343 100644 --- a/internal/formats/common/spdxhelpers/to_syft_model_test.go +++ b/internal/formats/common/spdxhelpers/to_syft_model_test.go @@ -4,8 +4,10 @@ import ( "testing" "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" "github.com/spdx/tools-golang/spdx" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestToSyftModel(t *testing.T) { @@ -195,3 +197,39 @@ func Test_extractMetadata(t *testing.T) { }) } } + +func TestExtractSourceFromNamespaces(t *testing.T) { + tests := []struct { + namespace string + expected source.Scheme + }{ + { + namespace: "https://anchore.com/syft/file/d42b01d0-7325-409b-b03f-74082935c4d3", + expected: source.FileScheme, + }, + { + namespace: "https://anchore.com/syft/image/d42b01d0-7325-409b-b03f-74082935c4d3", + expected: source.ImageScheme, + }, + { + namespace: "https://anchore.com/syft/dir/d42b01d0-7325-409b-b03f-74082935c4d3", + expected: source.DirectoryScheme, + }, + { + namespace: "https://another-host/blob/123", + expected: source.UnknownScheme, + }, + { + namespace: "bla bla", + expected: source.UnknownScheme, + }, + { + namespace: "", + expected: source.UnknownScheme, + }, + } + + for _, tt := range tests { + require.Equal(t, tt.expected, extractSchemeFromNamespace(tt.namespace)) + } +} diff --git a/internal/formats/spdx22json/encoder.go b/internal/formats/spdx22json/encoder.go index ceddd0f80..36bfeb8a2 100644 --- a/internal/formats/spdx22json/encoder.go +++ b/internal/formats/spdx22json/encoder.go @@ -8,10 +8,7 @@ import ( ) func encoder(output io.Writer, s sbom.SBOM) error { - doc, err := toFormatModel(s) - if err != nil { - return err - } + doc := toFormatModel(s) enc := json.NewEncoder(output) // prevent > and < from being escaped in the payload diff --git a/internal/formats/spdx22json/to_format_model.go b/internal/formats/spdx22json/to_format_model.go index c69518f17..917738955 100644 --- a/internal/formats/spdx22json/to_format_model.go +++ b/internal/formats/spdx22json/to_format_model.go @@ -20,11 +20,8 @@ import ( ) // toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. -func toFormatModel(s sbom.SBOM) (*model.Document, error) { - name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source) - if err != nil { - return nil, err - } +func toFormatModel(s sbom.SBOM) *model.Document { + name, namespace := spdxhelpers.DocumentNameAndNamespace(s.Source) return &model.Document{ Element: model.Element{ @@ -46,7 +43,7 @@ func toFormatModel(s sbom.SBOM) (*model.Document, error) { Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships), Files: toFiles(s), Relationships: toRelationships(s.Relationships), - }, nil + } } func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package { diff --git a/internal/formats/spdx22tagvalue/encoder.go b/internal/formats/spdx22tagvalue/encoder.go index b7fd8eaa1..6f9fdf5d8 100644 --- a/internal/formats/spdx22tagvalue/encoder.go +++ b/internal/formats/spdx22tagvalue/encoder.go @@ -8,9 +8,6 @@ import ( ) func encoder(output io.Writer, s sbom.SBOM) error { - model, err := toFormatModel(s) - if err != nil { - return err - } + model := toFormatModel(s) return tvsaver.Save2_2(model, output) } diff --git a/internal/formats/spdx22tagvalue/to_format_model.go b/internal/formats/spdx22tagvalue/to_format_model.go index b189f977d..99e007af5 100644 --- a/internal/formats/spdx22tagvalue/to_format_model.go +++ b/internal/formats/spdx22tagvalue/to_format_model.go @@ -16,11 +16,9 @@ import ( // toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. // nolint:funlen -func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) { - name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source) - if err != nil { - return nil, err - } +func toFormatModel(s sbom.SBOM) *spdx.Document2_2 { + name, namespace := spdxhelpers.DocumentNameAndNamespace(s.Source) + return &spdx.Document2_2{ CreationInfo: &spdx.CreationInfo2_2{ // 2.1: SPDX Version; should be in the format "SPDX-2.2" @@ -86,7 +84,7 @@ func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) { DocumentComment: "", }, Packages: toFormatPackages(s.Artifacts.PackageCatalog), - }, nil + } } // packages populates all Package Information from the package Catalog (see https://spdx.github.io/spdx-spec/3-package-information/) diff --git a/test/integration/convert_test.go b/test/integration/convert_test.go new file mode 100644 index 000000000..526d6006e --- /dev/null +++ b/test/integration/convert_test.go @@ -0,0 +1,78 @@ +package integration + +import ( + "context" + "io/ioutil" + "os" + "testing" + + "github.com/anchore/syft/cmd/syft/cli/convert" + "github.com/anchore/syft/internal/config" + "github.com/anchore/syft/internal/formats/cyclonedxjson" + "github.com/anchore/syft/internal/formats/cyclonedxxml" + "github.com/anchore/syft/internal/formats/spdx22json" + "github.com/anchore/syft/internal/formats/spdx22tagvalue" + "github.com/anchore/syft/internal/formats/syftjson" + "github.com/anchore/syft/internal/formats/table" + "github.com/anchore/syft/syft" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" + "github.com/stretchr/testify/require" +) + +var convertibleFormats = []sbom.Format{ + syftjson.Format(), + spdx22json.Format(), + spdx22tagvalue.Format(), + cyclonedxjson.Format(), + cyclonedxxml.Format(), +} + +// TestConvertCmd tests if the converted SBOM is a valid document according +// to spec. +// TODO: This test can, but currently does not, check the converted SBOM content. It +// might be useful to do that in the future, once we gather a better understanding of +// what users expect from the convert command. +func TestConvertCmd(t *testing.T) { + for _, format := range convertibleFormats { + t.Run(format.ID().String(), func(t *testing.T) { + sbom, _ := catalogFixtureImage(t, "image-pkg-coverage", source.SquashedScope) + format := syft.FormatByID(syftjson.ID) + + f, err := ioutil.TempFile("", "test-convert-sbom-") + require.NoError(t, err) + defer func() { + err := f.Close() + require.NoError(t, err) + os.Remove(f.Name()) + }() + + err = format.Encode(f, sbom) + require.NoError(t, err) + + stdr, stdw, err := os.Pipe() + require.NoError(t, err) + originalStdout := os.Stdout + os.Stdout = stdw + + ctx := context.Background() + app := &config.Application{Outputs: []string{format.ID().String()}} + + err = convert.Run(ctx, app, []string{f.Name()}) + require.NoError(t, err) + stdw.Close() + + out, err := ioutil.ReadAll(stdr) + require.NoError(t, err) + + os.Stdout = originalStdout + + formatFound := syft.IdentifyFormat(out) + if format.ID() == table.ID { + require.Nil(t, formatFound) + return + } + require.Equal(t, format.ID(), formatFound.ID()) + }) + } +}