Fix SPDX namespace value (#649)

* fix spdx namespace and add scheme range assertions

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* validate SPDX document name from source metadata

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>

* comment why namespace tests only check prefix

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-12-02 09:10:40 -05:00 committed by GitHub
parent 21d1738b27
commit da62387545
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 445 additions and 71 deletions

View File

@ -0,0 +1,31 @@
package spdxhelpers
import (
"fmt"
"path"
"strings"
"github.com/anchore/syft/syft/source"
)
func DocumentName(srcMetadata source.Metadata) (string, error) {
switch srcMetadata.Scheme {
case source.ImageScheme:
return cleanName(srcMetadata.ImageMetadata.UserInput), nil
case source.DirectoryScheme, source.FileScheme:
return cleanName(srcMetadata.Path), nil
}
return "", fmt.Errorf("unable to determine document name from scheme=%q", srcMetadata.Scheme)
}
func cleanName(name string) string {
// remove # according to specification
name = strings.ReplaceAll(name, "#", "-")
// remove : for url construction
name = strings.ReplaceAll(name, ":", "-")
// clean relative pathing
return path.Clean(name)
}

View File

@ -0,0 +1,73 @@
package spdxhelpers
import (
"fmt"
"strings"
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/source"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
)
func Test_DocumentName(t *testing.T) {
allSchemes := strset.New()
for _, s := range source.AllSchemes {
allSchemes.Add(string(s))
}
testedSchemes := strset.New()
tests := []struct {
name string
inputName string
srcMetadata source.Metadata
expected string
}{
{
name: "image",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.ImageScheme,
ImageMetadata: source.ImageMetadata{
UserInput: "image-repo/name:tag",
ID: "id",
ManifestDigest: "digest",
},
},
expected: "image-repo/name-tag",
},
{
name: "directory",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.DirectoryScheme,
Path: "some/path/to/place",
},
expected: "some/path/to/place",
},
{
name: "file",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.FileScheme,
Path: "some/path/to/place",
},
expected: "some/path/to/place",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual, err := DocumentName(test.srcMetadata)
require.NoError(t, err)
assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual name %q", actual))
// track each scheme tested (passed or not)
testedSchemes.Add(string(test.srcMetadata.Scheme))
})
}
// assert all possible schemes were under test
assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test")
}

View File

@ -0,0 +1,45 @@
package spdxhelpers
import (
"fmt"
"net/url"
"path"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/syft/source"
"github.com/google/uuid"
)
func DocumentNameAndNamespace(srcMetadata source.Metadata) (string, string, error) {
name, err := DocumentName(srcMetadata)
if err != nil {
return "", "", err
}
return name, DocumentNamespace(name, srcMetadata), nil
}
func DocumentNamespace(name string, srcMetadata source.Metadata) string {
input := "unknown-source-type"
switch srcMetadata.Scheme {
case source.ImageScheme:
input = "image"
case source.DirectoryScheme:
input = "dir"
case source.FileScheme:
input = "file"
}
uniqueID := uuid.Must(uuid.NewRandom())
identifier := path.Join(input, uniqueID.String())
if name != "." {
identifier = path.Join(input, fmt.Sprintf("%s-%s", name, uniqueID.String()))
}
u := url.URL{
Scheme: "https",
Host: "anchore.com",
Path: path.Join(internal.ApplicationName, identifier),
}
return u.String()
}

View File

@ -0,0 +1,71 @@
package spdxhelpers
import (
"fmt"
"strings"
"testing"
"github.com/anchore/syft/syft/source"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
)
func Test_documentNamespace(t *testing.T) {
allSchemes := strset.New()
for _, s := range source.AllSchemes {
allSchemes.Add(string(s))
}
testedSchemes := strset.New()
tests := []struct {
name string
inputName string
srcMetadata source.Metadata
expected string
}{
{
name: "image",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.ImageScheme,
ImageMetadata: source.ImageMetadata{
UserInput: "image-repo/name:tag",
ID: "id",
ManifestDigest: "digest",
},
},
expected: "https://anchore.com/syft/image/my-name-",
},
{
name: "directory",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.DirectoryScheme,
Path: "some/path/to/place",
},
expected: "https://anchore.com/syft/dir/my-name-",
},
{
name: "file",
inputName: "my-name",
srcMetadata: source.Metadata{
Scheme: source.FileScheme,
Path: "some/path/to/place",
},
expected: "https://anchore.com/syft/file/my-name-",
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
actual := DocumentNamespace(test.inputName, test.srcMetadata)
// note: since the namespace ends with a UUID we check the prefix
assert.True(t, strings.HasPrefix(actual, test.expected), fmt.Sprintf("actual namespace %q", actual))
// track each scheme tested (passed or not)
testedSchemes.Add(string(test.srcMetadata.Scheme))
})
}
// assert all possible schemes were under test
assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test")
}

View File

@ -69,7 +69,7 @@ func toBomDescriptorComponent(srcMetadata source.Metadata) *model.BomDescriptorC
Version: srcMetadata.ImageMetadata.ManifestDigest, Version: srcMetadata.ImageMetadata.ManifestDigest,
}, },
} }
case source.DirectoryScheme: case source.DirectoryScheme, source.FileScheme:
return &model.BomDescriptorComponent{ return &model.BomDescriptorComponent{
Component: model.Component{ Component: model.Component{
Type: "file", Type: "file",

View File

@ -7,15 +7,16 @@ import (
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
) )
const anchoreNamespace = "https://anchore.com/syft"
func encoder(output io.Writer, s sbom.SBOM) error { func encoder(output io.Writer, s sbom.SBOM) error {
doc := toFormatModel(s) doc, err := toFormatModel(s)
if err != nil {
return err
}
enc := json.NewEncoder(output) enc := json.NewEncoder(output)
// prevent > and < from being escaped in the payload // prevent > and < from being escaped in the payload
enc.SetEscapeHTML(false) enc.SetEscapeHTML(false)
enc.SetIndent("", " ") enc.SetIndent("", " ")
return enc.Encode(&doc) return enc.Encode(doc)
} }

View File

@ -3,7 +3,7 @@
"name": "/some/path", "name": "/some/path",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-11-17T19:35:54.834877Z", "created": "2021-12-01T15:08:29.469369Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
@ -11,7 +11,7 @@
"licenseListVersion": "3.15" "licenseListVersion": "3.15"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https:/anchore.com/syft/dir/some/path-65e2226e-a61e-4ed1-81bb-56022e1ff1eb", "documentNamespace": "https://anchore.com/syft/dir/some/path-f4586501-2da6-4541-a8e9-232b32f25e9a",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-2a115ac97d018a0e", "SPDXID": "SPDXRef-2a115ac97d018a0e",

View File

@ -3,7 +3,7 @@
"name": "user-image-input", "name": "user-image-input",
"spdxVersion": "SPDX-2.2", "spdxVersion": "SPDX-2.2",
"creationInfo": { "creationInfo": {
"created": "2021-11-17T19:35:57.761372Z", "created": "2021-12-01T15:08:29.476498Z",
"creators": [ "creators": [
"Organization: Anchore, Inc", "Organization: Anchore, Inc",
"Tool: syft-[not provided]" "Tool: syft-[not provided]"
@ -11,7 +11,7 @@
"licenseListVersion": "3.15" "licenseListVersion": "3.15"
}, },
"dataLicense": "CC0-1.0", "dataLicense": "CC0-1.0",
"documentNamespace": "https:/anchore.com/syft/image/user-image-input-5383918f-ec96-4aa9-b756-ad16e1ada31e", "documentNamespace": "https://anchore.com/syft/image/user-image-input-e3b7637c-9b2f-4005-a683-58e60f979082",
"packages": [ "packages": [
{ {
"SPDXID": "SPDXRef-888661d4f0362f02", "SPDXID": "SPDXRef-888661d4f0362f02",

View File

@ -2,7 +2,6 @@ package spdx22json
import ( import (
"fmt" "fmt"
"path"
"path/filepath" "path/filepath"
"sort" "sort"
"strings" "strings"
@ -19,14 +18,16 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/syft/source" "github.com/anchore/syft/syft/source"
"github.com/google/uuid"
) )
// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. // toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results.
func toFormatModel(s sbom.SBOM) model.Document { func toFormatModel(s sbom.SBOM) (*model.Document, error) {
name := documentName(s.Source) name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source)
if err != nil {
return nil, err
}
return model.Document{ return &model.Document{
Element: model.Element{ Element: model.Element{
SPDXID: model.ElementID("DOCUMENT").String(), SPDXID: model.ElementID("DOCUMENT").String(),
Name: name, Name: name,
@ -42,52 +43,11 @@ func toFormatModel(s sbom.SBOM) model.Document {
LicenseListVersion: spdxlicense.Version, LicenseListVersion: spdxlicense.Version,
}, },
DataLicense: "CC0-1.0", DataLicense: "CC0-1.0",
DocumentNamespace: documentNamespace(name, s.Source), DocumentNamespace: namespace,
Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships), Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships),
Files: toFiles(s), Files: toFiles(s),
Relationships: toRelationships(s.Relationships), Relationships: toRelationships(s.Relationships),
} }, nil
}
func documentName(srcMetadata source.Metadata) string {
switch srcMetadata.Scheme {
case source.ImageScheme:
return cleanSPDXName(srcMetadata.ImageMetadata.UserInput)
case source.DirectoryScheme:
return cleanSPDXName(srcMetadata.Path)
}
// TODO: is this alright?
return uuid.Must(uuid.NewRandom()).String()
}
func cleanSPDXName(name string) string {
// remove # according to specification
name = strings.ReplaceAll(name, "#", "-")
// remove : for url construction
name = strings.ReplaceAll(name, ":", "-")
// clean relative pathing
return path.Clean(name)
}
func documentNamespace(name string, srcMetadata source.Metadata) string {
input := "unknown-source-type"
switch srcMetadata.Scheme {
case source.ImageScheme:
input = "image"
case source.DirectoryScheme:
input = "dir"
}
uniqueID := uuid.Must(uuid.NewRandom())
identifier := path.Join(input, uniqueID.String())
if name != "." {
identifier = path.Join(input, fmt.Sprintf("%s-%s", name, uniqueID.String()))
}
return path.Join(anchoreNamespace, identifier)
} }
func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package { func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package {

View File

@ -8,6 +8,9 @@ import (
) )
func encoder(output io.Writer, s sbom.SBOM) error { func encoder(output io.Writer, s sbom.SBOM) error {
model := toFormatModel(s) model, err := toFormatModel(s)
return tvsaver.Save2_2(&model, output) if err != nil {
return err
}
return tvsaver.Save2_2(model, output)
} }

View File

@ -32,6 +32,10 @@ func TestSPDXTagValueImagePresenter(t *testing.T) {
func spdxTagValueRedactor(s []byte) []byte { func spdxTagValueRedactor(s []byte) []byte {
// each SBOM reports the time it was generated, which is not useful during snapshot testing // each SBOM reports the time it was generated, which is not useful during snapshot testing
s = regexp.MustCompile(`Created: .*`).ReplaceAll(s, []byte("redacted")) s = regexp.MustCompile(`Created: .*`).ReplaceAll(s, []byte("redacted"))
// each SBOM reports a unique documentNamespace when generated, this is not useful for snapshot testing
s = regexp.MustCompile(`DocumentNamespace: https://anchore.com/syft/.*`).ReplaceAll(s, []byte("redacted"))
// the license list will be updated periodically, the value here should not be directly tested in snapshot tests // the license list will be updated periodically, the value here should not be directly tested in snapshot tests
return regexp.MustCompile(`LicenseListVersion: .*`).ReplaceAll(s, []byte("redacted")) return regexp.MustCompile(`LicenseListVersion: .*`).ReplaceAll(s, []byte("redacted"))
} }

View File

@ -1,11 +1,12 @@
SPDXVersion: SPDX-2.2 SPDXVersion: SPDX-2.2
DataLicense: CC0-1.0 DataLicense: CC0-1.0
SPDXID: SPDXRef-DOCUMENT SPDXID: SPDXRef-DOCUMENT
DocumentNamespace: https://anchore.com/syft/image/ DocumentName: /some/path
LicenseListVersion: 3.13 DocumentNamespace: https://anchore.com/syft/dir/some/path-22f5732a-cab0-4376-9b79-15e413049500
LicenseListVersion: 3.15
Creator: Organization: Anchore, Inc Creator: Organization: Anchore, Inc
Creator: Tool: syft-[not provided] Creator: Tool: syft-[not provided]
Created: 2021-06-23T17:49:25Z Created: 2021-12-01T15:08:43Z
##### Package: package-2 ##### Package: package-2

View File

@ -2,11 +2,11 @@ SPDXVersion: SPDX-2.2
DataLicense: CC0-1.0 DataLicense: CC0-1.0
SPDXID: SPDXRef-DOCUMENT SPDXID: SPDXRef-DOCUMENT
DocumentName: user-image-input DocumentName: user-image-input
DocumentNamespace: https://anchore.com/syft/image/user-image-input DocumentNamespace: https://anchore.com/syft/image/user-image-input-ce4d4ae5-9d79-4f84-a410-361e394c2908
LicenseListVersion: 3.13 LicenseListVersion: 3.15
Creator: Organization: Anchore, Inc Creator: Organization: Anchore, Inc
Creator: Tool: syft-[not provided] Creator: Tool: syft-[not provided]
Created: 2021-06-23T17:49:25Z Created: 2021-12-01T15:08:44Z
##### Package: package-2 ##### Package: package-2

View File

@ -16,8 +16,12 @@ import (
// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. // toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results.
// nolint:funlen // nolint:funlen
func toFormatModel(s sbom.SBOM) spdx.Document2_2 { func toFormatModel(s sbom.SBOM) (*spdx.Document2_2, error) {
return spdx.Document2_2{ name, namespace, err := spdxhelpers.DocumentNameAndNamespace(s.Source)
if err != nil {
return nil, err
}
return &spdx.Document2_2{
CreationInfo: &spdx.CreationInfo2_2{ CreationInfo: &spdx.CreationInfo2_2{
// 2.1: SPDX Version; should be in the format "SPDX-2.2" // 2.1: SPDX Version; should be in the format "SPDX-2.2"
// Cardinality: mandatory, one // Cardinality: mandatory, one
@ -33,7 +37,7 @@ func toFormatModel(s sbom.SBOM) spdx.Document2_2 {
// 2.4: Document Name // 2.4: Document Name
// Cardinality: mandatory, one // Cardinality: mandatory, one
DocumentName: s.Source.ImageMetadata.UserInput, DocumentName: name,
// 2.5: Document Namespace // 2.5: Document Namespace
// Cardinality: mandatory, one // Cardinality: mandatory, one
@ -52,7 +56,7 @@ func toFormatModel(s sbom.SBOM) spdx.Document2_2 {
// In many cases, the URI will point to a web accessible document, but this should not be assumed // In many cases, the URI will point to a web accessible document, but this should not be assumed
// to be the case. // to be the case.
DocumentNamespace: fmt.Sprintf("https://anchore.com/syft/image/%s", s.Source.ImageMetadata.UserInput), DocumentNamespace: namespace,
// 2.6: External Document References // 2.6: External Document References
// Cardinality: optional, one or many // Cardinality: optional, one or many
@ -82,7 +86,7 @@ func toFormatModel(s sbom.SBOM) spdx.Document2_2 {
DocumentComment: "", DocumentComment: "",
}, },
Packages: toFormatPackages(s.Artifacts.PackageCatalog), Packages: toFormatPackages(s.Artifacts.PackageCatalog),
} }, nil
} }
// packages populates all Package Information from the package Catalog (see https://spdx.github.io/spdx-spec/3-package-information/) // packages populates all Package Information from the package Catalog (see https://spdx.github.io/spdx-spec/3-package-information/)

View File

@ -202,6 +202,11 @@ func toSourceModel(src source.Metadata) (model.Source, error) {
Type: "directory", Type: "directory",
Target: src.Path, Target: src.Path,
}, nil }, nil
case source.FileScheme:
return model.Source{
Type: "file",
Target: src.Path,
}, nil
default: default:
return model.Source{}, fmt.Errorf("unsupported source: %q", src.Scheme) return model.Source{}, fmt.Errorf("unsupported source: %q", src.Scheme)
} }

View File

@ -0,0 +1,84 @@
package syftjson
import (
"testing"
"github.com/scylladb/go-set/strset"
"github.com/anchore/syft/internal/formats/syftjson/model"
"github.com/anchore/syft/syft/source"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func Test_toSourceModel(t *testing.T) {
allSchemes := strset.New()
for _, s := range source.AllSchemes {
allSchemes.Add(string(s))
}
testedSchemes := strset.New()
tests := []struct {
name string
src source.Metadata
expected model.Source
}{
{
name: "directory",
src: source.Metadata{
Scheme: source.DirectoryScheme,
Path: "some/path",
},
expected: model.Source{
Type: "directory",
Target: "some/path",
},
},
{
name: "file",
src: source.Metadata{
Scheme: source.FileScheme,
Path: "some/path",
},
expected: model.Source{
Type: "file",
Target: "some/path",
},
},
{
name: "image",
src: source.Metadata{
Scheme: source.ImageScheme,
ImageMetadata: source.ImageMetadata{
UserInput: "user-input",
ID: "id...",
ManifestDigest: "digest...",
MediaType: "type...",
},
},
expected: model.Source{
Type: "image",
Target: source.ImageMetadata{
UserInput: "user-input",
ID: "id...",
ManifestDigest: "digest...",
MediaType: "type...",
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// track each scheme tested (passed or not)
testedSchemes.Add(string(test.src.Scheme))
// assert the model transformation is correct
actual, err := toSourceModel(test.src)
require.NoError(t, err)
assert.Equal(t, test.expected, actual)
})
}
// assert all possible schemes were under test
assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test")
}

View File

@ -40,6 +40,11 @@ func toSyftSourceData(s model.Source) *source.Metadata {
Scheme: source.DirectoryScheme, Scheme: source.DirectoryScheme,
Path: s.Target.(string), Path: s.Target.(string),
} }
case "file":
return &source.Metadata{
Scheme: source.FileScheme,
Path: s.Target.(string),
}
case "image": case "image":
return &source.Metadata{ return &source.Metadata{
Scheme: source.ImageScheme, Scheme: source.ImageScheme,

View File

@ -0,0 +1,81 @@
package syftjson
import (
"testing"
"github.com/anchore/syft/internal/formats/syftjson/model"
"github.com/anchore/syft/syft/source"
"github.com/scylladb/go-set/strset"
"github.com/stretchr/testify/assert"
)
func Test_toSyftSourceData(t *testing.T) {
allSchemes := strset.New()
for _, s := range source.AllSchemes {
allSchemes.Add(string(s))
}
testedSchemes := strset.New()
tests := []struct {
name string
src model.Source
expected source.Metadata
}{
{
name: "directory",
expected: source.Metadata{
Scheme: source.DirectoryScheme,
Path: "some/path",
},
src: model.Source{
Type: "directory",
Target: "some/path",
},
},
{
name: "file",
expected: source.Metadata{
Scheme: source.FileScheme,
Path: "some/path",
},
src: model.Source{
Type: "file",
Target: "some/path",
},
},
{
name: "image",
expected: source.Metadata{
Scheme: source.ImageScheme,
ImageMetadata: source.ImageMetadata{
UserInput: "user-input",
ID: "id...",
ManifestDigest: "digest...",
MediaType: "type...",
},
},
src: model.Source{
Type: "image",
Target: source.ImageMetadata{
UserInput: "user-input",
ID: "id...",
ManifestDigest: "digest...",
MediaType: "type...",
},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// assert the model transformation is correct
actual := toSyftSourceData(test.src)
assert.Equal(t, test.expected, *actual)
// track each scheme tested (passed or not)
testedSchemes.Add(string(test.expected.Scheme))
})
}
// assert all possible schemes were under test
assert.ElementsMatch(t, allSchemes.List(), testedSchemes.List(), "not all source.Schemes are under test")
}

View File

@ -16,7 +16,7 @@ func encoder(output io.Writer, s sbom.SBOM) error {
w.Init(output, 0, 8, 0, '\t', tabwriter.AlignRight) w.Init(output, 0, 8, 0, '\t', tabwriter.AlignRight)
switch s.Source.Scheme { switch s.Source.Scheme {
case source.DirectoryScheme: case source.DirectoryScheme, source.FileScheme:
fmt.Fprintf(w, "[Path: %s]\n", s.Source.Path) fmt.Fprintf(w, "[Path: %s]\n", s.Source.Path)
case source.ImageScheme: case source.ImageScheme:
fmt.Fprintln(w, "[Image]") fmt.Fprintln(w, "[Image]")

View File

@ -23,6 +23,12 @@ const (
FileScheme Scheme = "FileScheme" FileScheme Scheme = "FileScheme"
) )
var AllSchemes = []Scheme{
DirectoryScheme,
ImageScheme,
FileScheme,
}
func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, image.Source, string, error) { func detectScheme(fs afero.Fs, imageDetector sourceDetector, userInput string) (Scheme, image.Source, string, error) {
switch { switch {
case strings.HasPrefix(userInput, "dir:"): case strings.HasPrefix(userInput, "dir:"):