From f6cc0c86289f22b7ea73437327967c0c0143c068 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Sun, 10 Oct 2021 18:30:30 -0700 Subject: [PATCH] migrate spdxjson presenter to spdxjson format object Signed-off-by: Alex Goodman --- internal/formats/spdx22json/decoder.go | 24 +++ internal/formats/spdx22json/decoder_test.go | 50 +++++ internal/formats/spdx22json/encoder.go | 24 +++ internal/formats/spdx22json/format.go | 12 ++ .../formats/spdx22json/model/annotation.go | 21 ++ internal/formats/spdx22json/model/checksum.go | 7 + .../formats/spdx22json/model/creation_info.go | 19 ++ internal/formats/spdx22json/model/document.go | 51 +++++ internal/formats/spdx22json/model/element.go | 12 ++ .../formats/spdx22json/model/element_id.go | 37 ++++ .../spdx22json/model/external_document_ref.go | 9 + .../formats/spdx22json/model/external_ref.go | 43 ++++ internal/formats/spdx22json/model/file.go | 41 ++++ .../model/has_extracted_licensing_info.go | 14 ++ internal/formats/spdx22json/model/item.go | 22 +++ internal/formats/spdx22json/model/package.go | 53 +++++ .../model/package_verification_code.go | 23 +++ .../formats/spdx22json/model/relationship.go | 183 ++++++++++++++++++ internal/formats/spdx22json/model/snippet.go | 32 +++ .../spdx22json/model/syft_distro_data.go | 7 + .../spdx22json/model/syft_package_data.go | 114 +++++++++++ internal/formats/spdx22json/model/version.go | 3 + .../test-fixtures/image-simple/Dockerfile | 4 + .../test-fixtures/image-simple/file-1.txt | 1 + .../test-fixtures/image-simple/file-2.txt | 1 + .../TestSPDXJSONDirectoryPresenter.golden | 79 ++++++++ .../TestSPDXJSONImagePresenter.golden | 61 ++++++ .../stereoscope-fixture-image-simple.golden | Bin 0 -> 15360 bytes .../formats/spdx22json/to_format_model.go | 164 ++++++++++++++++ internal/formats/spdx22json/to_syft_model.go | 64 ++++++ internal/formats/spdx22json/validator.go | 26 +++ 31 files changed, 1201 insertions(+) create mode 100644 internal/formats/spdx22json/decoder.go create mode 100644 internal/formats/spdx22json/decoder_test.go create mode 100644 internal/formats/spdx22json/encoder.go create mode 100644 internal/formats/spdx22json/format.go create mode 100644 internal/formats/spdx22json/model/annotation.go create mode 100644 internal/formats/spdx22json/model/checksum.go create mode 100644 internal/formats/spdx22json/model/creation_info.go create mode 100644 internal/formats/spdx22json/model/document.go create mode 100644 internal/formats/spdx22json/model/element.go create mode 100644 internal/formats/spdx22json/model/element_id.go create mode 100644 internal/formats/spdx22json/model/external_document_ref.go create mode 100644 internal/formats/spdx22json/model/external_ref.go create mode 100644 internal/formats/spdx22json/model/file.go create mode 100644 internal/formats/spdx22json/model/has_extracted_licensing_info.go create mode 100644 internal/formats/spdx22json/model/item.go create mode 100644 internal/formats/spdx22json/model/package.go create mode 100644 internal/formats/spdx22json/model/package_verification_code.go create mode 100644 internal/formats/spdx22json/model/relationship.go create mode 100644 internal/formats/spdx22json/model/snippet.go create mode 100644 internal/formats/spdx22json/model/syft_distro_data.go create mode 100644 internal/formats/spdx22json/model/syft_package_data.go create mode 100644 internal/formats/spdx22json/model/version.go create mode 100644 internal/formats/spdx22json/test-fixtures/image-simple/Dockerfile create mode 100644 internal/formats/spdx22json/test-fixtures/image-simple/file-1.txt create mode 100644 internal/formats/spdx22json/test-fixtures/image-simple/file-2.txt create mode 100644 internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden create mode 100644 internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden create mode 100644 internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden create mode 100644 internal/formats/spdx22json/to_format_model.go create mode 100644 internal/formats/spdx22json/to_syft_model.go create mode 100644 internal/formats/spdx22json/validator.go diff --git a/internal/formats/spdx22json/decoder.go b/internal/formats/spdx22json/decoder.go new file mode 100644 index 000000000..d67cd8cd4 --- /dev/null +++ b/internal/formats/spdx22json/decoder.go @@ -0,0 +1,24 @@ +package spdx22json + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/anchore/syft/internal/formats/spdx22json/model" + "github.com/anchore/syft/syft/distro" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +func decoder(reader io.Reader) (*pkg.Catalog, *source.Metadata, *distro.Distro, error) { + dec := json.NewDecoder(reader) + + var doc model.Document + err := dec.Decode(&doc) + if err != nil { + return nil, nil, nil, fmt.Errorf("unable to decode spdx-json: %w", err) + } + + return toSyftModel(doc) +} diff --git a/internal/formats/spdx22json/decoder_test.go b/internal/formats/spdx22json/decoder_test.go new file mode 100644 index 000000000..e6e52186f --- /dev/null +++ b/internal/formats/spdx22json/decoder_test.go @@ -0,0 +1,50 @@ +package spdx22json + +import ( + "bytes" + "strings" + "testing" + + "github.com/anchore/syft/internal/formats/common/testutils" + "github.com/go-test/deep" + "github.com/stretchr/testify/assert" +) + +func TestEncodeDecodeCycle(t *testing.T) { + testImage := "image-simple" + originalCatalog, originalMetadata, _ := testutils.ImageInput(t, testImage) + + var buf bytes.Buffer + assert.NoError(t, encoder(&buf, originalCatalog, &originalMetadata, nil)) + + actualCatalog, actualMetadata, _, err := decoder(bytes.NewReader(buf.Bytes())) + assert.NoError(t, err) + + for _, d := range deep.Equal(originalMetadata, *actualMetadata) { + t.Errorf("metadata difference: %+v", d) + } + + actualPackages := actualCatalog.Sorted() + for idx, p := range originalCatalog.Sorted() { + if !assert.Equal(t, p.Name, actualPackages[idx].Name) { + t.Errorf("different package at idx=%d: %s vs %s", idx, p.Name, actualPackages[idx].Name) + continue + } + + // ids will never be equal + p.ID = "" + actualPackages[idx].ID = "" + + for _, d := range deep.Equal(*p, *actualPackages[idx]) { + if strings.Contains(d, ".VirtualPath: ") { + // location.Virtual path is not exposed in the json output + continue + } + if strings.HasSuffix(d, " != []") { + // semantically the same + continue + } + t.Errorf("package difference (%s): %+v", p.Name, d) + } + } +} diff --git a/internal/formats/spdx22json/encoder.go b/internal/formats/spdx22json/encoder.go new file mode 100644 index 000000000..d9737b3ab --- /dev/null +++ b/internal/formats/spdx22json/encoder.go @@ -0,0 +1,24 @@ +package spdx22json + +import ( + "encoding/json" + "io" + + "github.com/anchore/syft/syft/distro" + + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +const anchoreNamespace = "https://anchore.com/syft" + +func encoder(output io.Writer, catalog *pkg.Catalog, srcMetadata *source.Metadata, d *distro.Distro) error { + doc := toFormatModel(catalog, srcMetadata, d) + + enc := json.NewEncoder(output) + // prevent > and < from being escaped in the payload + enc.SetEscapeHTML(false) + enc.SetIndent("", " ") + + return enc.Encode(&doc) +} diff --git a/internal/formats/spdx22json/format.go b/internal/formats/spdx22json/format.go new file mode 100644 index 000000000..88b4ee641 --- /dev/null +++ b/internal/formats/spdx22json/format.go @@ -0,0 +1,12 @@ +package spdx22json + +import "github.com/anchore/syft/syft/format" + +func Format() format.Format { + return format.NewFormat( + format.SPDXJSONOption, + encoder, + decoder, + validator, + ) +} diff --git a/internal/formats/spdx22json/model/annotation.go b/internal/formats/spdx22json/model/annotation.go new file mode 100644 index 000000000..e924cb7c6 --- /dev/null +++ b/internal/formats/spdx22json/model/annotation.go @@ -0,0 +1,21 @@ +package model + +import "time" + +type AnnotationType string + +const ( + ReviewerAnnotationType AnnotationType = "REVIEWER" + OtherAnnotationType AnnotationType = "OTHER" +) + +type Annotation struct { + // Identify when the comment was made. This is to be specified according to the combined date and time in the + // UTC format, as specified in the ISO 8601 standard. + AnnotationDate time.Time `json:"annotationDate"` + // Type of the annotation + AnnotationType AnnotationType `json:"annotationType"` + // This field identifies the person, organization or tool that has commented on a file, package, or the entire document. + Annotator string `json:"annotator"` + Comment string `json:"comment"` +} diff --git a/internal/formats/spdx22json/model/checksum.go b/internal/formats/spdx22json/model/checksum.go new file mode 100644 index 000000000..b995a95fb --- /dev/null +++ b/internal/formats/spdx22json/model/checksum.go @@ -0,0 +1,7 @@ +package model + +type Checksum struct { + // Identifies the algorithm used to produce the subject Checksum. One of: "SHA256", "SHA1", "SHA384", "MD2", "MD4", "SHA512", "MD6", "MD5", "SHA224" + Algorithm string `json:"algorithm"` + ChecksumValue string `json:"checksumValue"` +} diff --git a/internal/formats/spdx22json/model/creation_info.go b/internal/formats/spdx22json/model/creation_info.go new file mode 100644 index 000000000..2feb494fc --- /dev/null +++ b/internal/formats/spdx22json/model/creation_info.go @@ -0,0 +1,19 @@ +package model + +import "time" + +type CreationInfo struct { + Comment string `json:"comment,omitempty"` + // Identify when the SPDX file was originally created. The date is to be specified according to combined date and + // time in UTC format as specified in ISO 8601 standard. This field is distinct from the fields in section 8, + // which involves the addition of information during a subsequent review. + Created time.Time `json:"created"` + // Identify who (or what, in the case of a tool) created the SPDX file. If the SPDX file was created by an + // individual, indicate the person's name. If the SPDX file was created on behalf of a company or organization, + //indicate the entity name. If the SPDX file was created using a software tool, indicate the name and version + // for that tool. If multiple participants or tools were involved, use multiple instances of this field. Person + // name or organization name may be designated as “anonymous” if appropriate. + Creators []string `json:"creators"` + // An optional field for creators of the SPDX file to provide the version of the SPDX License List used when the SPDX file was created. + LicenseListVersion string `json:"licenseListVersion"` +} diff --git a/internal/formats/spdx22json/model/document.go b/internal/formats/spdx22json/model/document.go new file mode 100644 index 000000000..c7e9b9965 --- /dev/null +++ b/internal/formats/spdx22json/model/document.go @@ -0,0 +1,51 @@ +package model + +import "github.com/anchore/syft/syft/source" + +// derived from: +// - https://spdx.github.io/spdx-spec/appendix-III-RDF-data-model-implementation-and-identifier-syntax/ +// - https://github.com/spdx/spdx-spec/blob/v2.2/schemas/spdx-schema.json +// - https://github.com/spdx/spdx-spec/tree/v2.2/ontology + +type Document struct { + Element + SPDXVersion string `json:"spdxVersion"` + // One instance is required for each SPDX file produced. It provides the necessary information for forward + // and backward compatibility for processing tools. + CreationInfo CreationInfo `json:"creationInfo"` + // SyftSourceData contains information about what is being described in this SPDX document (e.g. a container image, a directory, etc) + SyftSourceData *source.Metadata `json:"syftSourceData,omitempty"` + // SyftDistroData contains information about the linux distribution discovered + SyftDistroData *SyftDistroData `json:"syftDistroData,omitempty"` + // 2.2: Data License; should be "CC0-1.0" + // Cardinality: mandatory, one + // License expression for dataLicense. Compliance with the SPDX specification includes populating the SPDX + // fields therein with data related to such fields (\"SPDX-Metadata\"). The SPDX specification contains numerous + // fields where an SPDX document creator may provide relevant explanatory text in SPDX-Metadata. Without + // opining on the lawfulness of \"database rights\" (in jurisdictions where applicable), such explanatory text + // is copyrightable subject matter in most Berne Convention countries. By using the SPDX specification, or any + // portion hereof, you hereby agree that any copyright rights (as determined by your jurisdiction) in any + // SPDX-Metadata, including without limitation explanatory text, shall be subject to the terms of the Creative + // Commons CC0 1.0 Universal license. For SPDX-Metadata not containing any copyright rights, you hereby agree + // and acknowledge that the SPDX-Metadata is provided to you \"as-is\" and without any representations or + // warranties of any kind concerning the SPDX-Metadata, express, implied, statutory or otherwise, including + // without limitation warranties of title, merchantability, fitness for a particular purpose, non-infringement, + // or the absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not + // discoverable, all to the greatest extent permissible under applicable law. + DataLicense string `json:"dataLicense"` + // Information about an external SPDX document reference including the checksum. This allows for verification of the external references. + ExternalDocumentRefs []ExternalDocumentRef `json:"externalDocumentRefs,omitempty"` + // Indicates that a particular ExtractedLicensingInfo was defined in the subject SpdxDocument. + HasExtractedLicensingInfos []HasExtractedLicensingInfo `json:"hasExtractedLicensingInfos,omitempty"` + // note: found in example documents from SPDX, but not in the JSON schema. See https://spdx.github.io/spdx-spec/2-document-creation-information/#25-spdx-document-namespace + DocumentNamespace string `json:"documentNamespace"` + // note: found in example documents from SPDX, but not in the JSON schema + // DocumentDescribes []string `json:"documentDescribes"` + Packages []Package `json:"packages"` + // Files referenced in the SPDX document + Files []File `json:"files,omitempty"` + // Snippets referenced in the SPDX document + Snippets []Snippet `json:"snippets,omitempty"` + // Relationships referenced in the SPDX document + Relationships []Relationship `json:"relationships,omitempty"` +} diff --git a/internal/formats/spdx22json/model/element.go b/internal/formats/spdx22json/model/element.go new file mode 100644 index 000000000..9c2a68bcb --- /dev/null +++ b/internal/formats/spdx22json/model/element.go @@ -0,0 +1,12 @@ +package model + +type Element struct { + SPDXID string `json:"SPDXID"` + // Identify name of this SpdxElement. + Name string `json:"name"` + // Relationships referenced in the SPDX document + Relationships []Relationship `json:"relationships,omitempty"` + // Provide additional information about an SpdxElement. + Annotations []Annotation `json:"annotations,omitempty"` + Comment string `json:"comment,omitempty"` +} diff --git a/internal/formats/spdx22json/model/element_id.go b/internal/formats/spdx22json/model/element_id.go new file mode 100644 index 000000000..50251fe37 --- /dev/null +++ b/internal/formats/spdx22json/model/element_id.go @@ -0,0 +1,37 @@ +package model + +// ElementID represents the identifier string portion of an SPDX element +// identifier. DocElementID should be used for any attributes which can +// contain identifiers defined in a different SPDX document. +// ElementIDs should NOT contain the mandatory 'SPDXRef-' portion. +type ElementID string + +func (e ElementID) String() string { + return "SPDXRef-" + string(e) +} + +// DocElementID represents an SPDX element identifier that could be defined +// in a different SPDX document, and therefore could have a "DocumentRef-" +// portion, such as Relationship and Annotations. +// ElementID is used for attributes in which a "DocumentRef-" portion cannot +// appear, such as a Package or File definition (since it is necessarily +// being defined in the present document). +// DocumentRefID will be the empty string for elements defined in the +// present document. +// DocElementIDs should NOT contain the mandatory 'DocumentRef-' or +// 'SPDXRef-' portions. +type DocElementID struct { + DocumentRefID string + ElementRefID ElementID +} + +// RenderDocElementID takes a DocElementID and returns the string equivalent, +// with the SPDXRef- prefix (and, if applicable, the DocumentRef- prefix) +// reinserted. +func (d DocElementID) String() string { + prefix := "" + if d.DocumentRefID != "" { + prefix = "DocumentRef-" + d.DocumentRefID + ":" + } + return prefix + d.ElementRefID.String() +} diff --git a/internal/formats/spdx22json/model/external_document_ref.go b/internal/formats/spdx22json/model/external_document_ref.go new file mode 100644 index 000000000..10e1d9ec3 --- /dev/null +++ b/internal/formats/spdx22json/model/external_document_ref.go @@ -0,0 +1,9 @@ +package model + +type ExternalDocumentRef struct { + // externalDocumentId is a string containing letters, numbers, ., - and/or + which uniquely identifies an external document within this document. + ExternalDocumentID string `json:"externalDocumentId"` + Checksum Checksum `json:"checksum"` + // SPDX ID for SpdxDocument. A propoerty containing an SPDX document. + SpdxDocument string `json:"spdxDocument"` +} diff --git a/internal/formats/spdx22json/model/external_ref.go b/internal/formats/spdx22json/model/external_ref.go new file mode 100644 index 000000000..9dc49b3dd --- /dev/null +++ b/internal/formats/spdx22json/model/external_ref.go @@ -0,0 +1,43 @@ +package model + +type ReferenceCategory string + +const ( + SecurityReferenceCategory ReferenceCategory = "SECURITY" + PackageManagerReferenceCategory ReferenceCategory = "PACKAGE_MANAGER" + OtherReferenceCategory ReferenceCategory = "OTHER" +) + +// source: https://spdx.github.io/spdx-spec/appendix-VI-external-repository-identifiers/ + +type ExternalRefType string + +const ( + // see https://nvd.nist.gov/cpe + Cpe22ExternalRefType ExternalRefType = "cpe22Type" + // see https://nvd.nist.gov/cpe + Cpe23ExternalRefType ExternalRefType = "cpe23Type" + // see http://repo1.maven.org/maven2/ + MavenCentralExternalRefType ExternalRefType = "maven-central" + // see https://www.npmjs.com/ + NpmExternalRefType ExternalRefType = "npm" + // see https://www.nuget.org/ + NugetExternalRefType ExternalRefType = "nuget" + // see http://bower.io/ + BowerExternalRefType ExternalRefType = "bower" + // see https://github.com/package-url/purl-spec + PurlExternalRefType ExternalRefType = "purl" + // These point to objects present in the Software Heritage archive by the means of SoftWare Heritage persistent Identifiers (SWHID) + SwhExternalRefType ExternalRefType = "swh" +) + +type ExternalRef struct { + Comment string `json:"comment,omitempty"` + // Category for the external reference. + ReferenceCategory ReferenceCategory `json:"referenceCategory"` + // The unique string with no spaces necessary to access the package-specific information, metadata, or content + // within the target location. The format of the locator is subject to constraints defined by the . + ReferenceLocator string `json:"referenceLocator"` + // Type of the external reference. These are defined in an appendix in the SPDX specification. + ReferenceType ExternalRefType `json:"referenceType"` +} diff --git a/internal/formats/spdx22json/model/file.go b/internal/formats/spdx22json/model/file.go new file mode 100644 index 000000000..970f369cc --- /dev/null +++ b/internal/formats/spdx22json/model/file.go @@ -0,0 +1,41 @@ +package model + +type FileType string + +const ( + DocumentationFileType FileType = "DOCUMENTATION" + ImageFileType FileType = "IMAGE" + VideoFileType FileType = "VIDEO" + ArchiveFileType FileType = "ARCHIVE" + SpdxFileType FileType = "SPDX" + ApplicationFileType FileType = "APPLICATION" + SourceFileType FileType = "SOURCE" + BinaryFileType FileType = "BINARY" + TextFileType FileType = "TEXT" + AudioFileType FileType = "AUDIO" + OtherFileType FileType = "OTHER" +) + +type File struct { + Item + // (At least one is required.) The checksum property provides a mechanism that can be used to verify that the + // contents of a File or Package have not changed. + Checksums []Checksum `json:"checksums,omitempty"` + // This field provides a place for the SPDX file creator to record file contributors. Contributors could include + // names of copyright holders and/or authors who may not be copyright holders yet contributed to the file content. + FileContributors []string `json:"fileContributors,omitempty"` + // Each element is a SPDX ID for a File. + FileDependencies []string `json:"fileDependencies,omitempty"` + // The name of the file relative to the root of the package. + FileName string `json:"fileName"` + // The type of the file + FileTypes []string `json:"fileTypes,omitempty"` + // This field provides a place for the SPDX file creator to record potential legal notices found in the file. + // This may or may not include copyright statements. + NoticeText string `json:"noticeText,omitempty"` + // Indicates the project in which the SpdxElement originated. Tools must preserve doap:homepage and doap:name + // properties and the URI (if one is known) of doap:Project resources that are values of this property. All other + // properties of doap:Projects are not directly supported by SPDX and may be dropped when translating to or + // from some SPDX formats(deprecated). + ArtifactOf []string `json:"artifactOf,omitempty"` +} diff --git a/internal/formats/spdx22json/model/has_extracted_licensing_info.go b/internal/formats/spdx22json/model/has_extracted_licensing_info.go new file mode 100644 index 000000000..8c0748073 --- /dev/null +++ b/internal/formats/spdx22json/model/has_extracted_licensing_info.go @@ -0,0 +1,14 @@ +package model + +type HasExtractedLicensingInfo struct { + // Verbatim license or licensing notice text that was discovered. + ExtractedText string `json:"extractedText"` + // A human readable short form license identifier for a license. The license ID is iether on the standard license + // oist or the form \"LicenseRef-\"[idString] where [idString] is a unique string containing letters, + // numbers, \".\", \"-\" or \"+\". + LicenseID string `json:"licenseId"` + Comment string `json:"comment,omitempty"` + // Identify name of this SpdxElement. + Name string `json:"name,omitempty"` + SeeAlsos []string `json:"seeAlsos,omitempty"` +} diff --git a/internal/formats/spdx22json/model/item.go b/internal/formats/spdx22json/model/item.go new file mode 100644 index 000000000..178fbf842 --- /dev/null +++ b/internal/formats/spdx22json/model/item.go @@ -0,0 +1,22 @@ +package model + +type Item struct { + Element + // The licenseComments property allows the preparer of the SPDX document to describe why the licensing in + // spdx:licenseConcluded was chosen. + LicenseComments string `json:"licenseComments,omitempty"` + LicenseConcluded string `json:"licenseConcluded"` + // The licensing information that was discovered directly within the package. There will be an instance of this + // property for each distinct value of alllicenseInfoInFile properties of all files contained in the package. + LicenseInfoFromFiles []string `json:"licenseInfoFromFiles,omitempty"` + // Licensing information that was discovered directly in the subject file. This is also considered a declared license for the file. + LicenseInfoInFiles []string `json:"licenseInfoInFiles,omitempty"` + // The text of copyright declarations recited in the Package or File. + CopyrightText string `json:"copyrightText,omitempty"` + // This field provides a place for the SPDX data creator to record acknowledgements that may be required to be + // communicated in some contexts. This is not meant to include the actual complete license text (see + // licenseConculded and licenseDeclared), and may or may not include copyright notices (see also copyrightText). + // The SPDX data creator may use this field to record other acknowledgements, such as particular clauses from + // license texts, which may be necessary or desirable to reproduce. + AttributionTexts []string `json:"attributionTexts,omitempty"` +} diff --git a/internal/formats/spdx22json/model/package.go b/internal/formats/spdx22json/model/package.go new file mode 100644 index 000000000..5d303574f --- /dev/null +++ b/internal/formats/spdx22json/model/package.go @@ -0,0 +1,53 @@ +package model + +type Package struct { + Item + // The checksum property provides a mechanism that can be used to verify that the contents of a File or + // Package have not changed. + Checksums []Checksum `json:"checksums,omitempty"` + // Provides a detailed description of the package. + Description string `json:"description,omitempty"` + // The URI at which this package is available for download. Private (i.e., not publicly reachable) URIs are + // acceptable as values of this property. The values http://spdx.org/rdf/terms#none and http://spdx.org/rdf/terms#noassertion + // may be used to specify that the package is not downloadable or that no attempt was made to determine its + // download location, respectively. + DownloadLocation string `json:"downloadLocation,omitempty"` + // An External Reference allows a Package to reference an external source of additional information, metadata, + // enumerations, asset identifiers, or downloadable content believed to be relevant to the Package. + ExternalRefs []ExternalRef `json:"externalRefs,omitempty"` + // Indicates whether the file content of this package has been available for or subjected to analysis when + // creating the SPDX document. If false indicates packages that represent metadata or URI references to a + // project, product, artifact, distribution or a component. If set to false, the package must not contain any files + FilesAnalyzed bool `json:"filesAnalyzed"` + // Indicates that a particular file belongs to a package (elements are SPDX ID for a File). + HasFiles []string `json:"hasFiles,omitempty"` + // Provide a place for the SPDX file creator to record a web site that serves as the package's home page. + // This link can also be used to reference further information about the package referenced by the SPDX file creator. + Homepage string `json:"homepage,omitempty"` + // List the licenses that have been declared by the authors of the package. Any license information that does not + // originate from the package authors, e.g. license information from a third party repository, should not be included in this field. + LicenseDeclared string `json:"licenseDeclared"` + // The name and, optionally, contact information of the person or organization that originally created the package. + // Values of this property must conform to the agent and tool syntax. + Originator string `json:"originator,omitempty"` + // The base name of the package file name. For example, zlib-1.2.5.tar.gz. + PackageFileName string `json:"packageFileName,omitempty"` + // A manifest based verification code (the algorithm is defined in section 4.7 of the full specification) of the + // SPDX Item. This allows consumers of this data and/or database to determine if an SPDX item they have in hand + // is identical to the SPDX item from which the data was produced. This algorithm works even if the SPDX document + // is included in the SPDX item. + PackageVerificationCode *PackageVerificationCode `json:"packageVerificationCode,omitempty"` + // Allows the producer(s) of the SPDX document to describe how the package was acquired and/or changed from the original source. + SourceInfo string `json:"sourceInfo,omitempty"` + // Provides a short description of the package. + Summary string `json:"summary,omitempty"` + // The name and, optionally, contact information of the person or organization who was the immediate supplier + // of this package to the recipient. The supplier may be different than originator when the software has been + // repackaged. Values of this property must conform to the agent and tool syntax. + Supplier string `json:"supplier,omitempty"` + // Provides an indication of the version of the package that is described by this SpdxDocument. + VersionInfo string `json:"versionInfo,omitempty"` + // SyftPackageData provides a spot to add syft-specific data that is not available in-spec or is not easily decodable from + // other SPDX fields. + SyftPackageData *SyftPackageData `json:"syftPackageData,omitempty"` +} diff --git a/internal/formats/spdx22json/model/package_verification_code.go b/internal/formats/spdx22json/model/package_verification_code.go new file mode 100644 index 000000000..508c9169e --- /dev/null +++ b/internal/formats/spdx22json/model/package_verification_code.go @@ -0,0 +1,23 @@ +package model + +// Why are there two package identifier fields Package Checksum and Package Verification? +// Although the values of the two fields Package Checksum and Package Verification are similar, they each serve a +// different purpose. The Package Checksum provides a unique identifier of a software package which is computed by +// taking the SHA1 of the entire software package file. This enables one to quickly determine if two different copies +// of a package are the same. One disadvantage of this approach is that one cannot add an SPDX data file into the +// original package without changing the Package Checksum value. Alternatively, the Package Verification field enables +// the inclusion of an SPDX file. It enables one to quickly verify if one or more of the original package files has +// changed. The Package Verification field is a unique identifier that is based on SHAing only the original package +// files (e.g., excluding the SPDX file). This allows one to add an SPDX file to the original package without changing +// this unique identifier. +// source: https://wiki.spdx.org/view/SPDX_FAQ +type PackageVerificationCode struct { + // "A file that was excluded when calculating the package verification code. This is usually a file containing + // SPDX data regarding the package. If a package contains more than one SPDX file all SPDX files must be excluded + // from the package verification code. If this is not done it would be impossible to correctly calculate the + // verification codes in both files. + PackageVerificationCodeExcludedFiles []string `json:"packageVerificationCodeExcludedFiles"` + + // The actual package verification code as a hex encoded value. + PackageVerificationCodeValue string `json:"packageVerificationCodeValue"` +} diff --git a/internal/formats/spdx22json/model/relationship.go b/internal/formats/spdx22json/model/relationship.go new file mode 100644 index 000000000..51c52233b --- /dev/null +++ b/internal/formats/spdx22json/model/relationship.go @@ -0,0 +1,183 @@ +package model + +type Relationship struct { + // Id to which the SPDX element is related + SpdxElementID string `json:"spdxElementId"` + // Describes the type of relationship between two SPDX elements. + RelationshipType RelationshipType `json:"relationshipType"` + // SPDX ID for SpdxElement. A related SpdxElement. + RelatedSpdxElement string `json:"relatedSpdxElement"` + Comment string `json:"comment,omitempty"` +} + +// source: https://spdx.github.io/spdx-spec/7-relationships-between-SPDX-elements/ +type RelationshipType string + +const ( + // DescribedByRelationship is to be used when SPDXRef-A is described by SPDXREF-Document. + // Example: The package 'WildFly' is described by SPDX document WildFly.spdx. + DescribedByRelationship RelationshipType = "DESCRIBED_BY" + + // ContainsRelationship is to be used when SPDXRef-A contains SPDXRef-B. + // Example: An ARCHIVE file bar.tgz contains a SOURCE file foo.c. + ContainsRelationship RelationshipType = "CONTAINS" + + // ContainedByRelationship is to be used when SPDXRef-A is contained by SPDXRef-B. + // Example: A SOURCE file foo.c is contained by ARCHIVE file bar.tgz + ContainedByRelationship RelationshipType = "CONTAINED_BY" + + // DependsOnRelationship is to be used when SPDXRef-A depends on SPDXRef-B. + // Example: Package A depends on the presence of package B in order to build and run + DependsOnRelationship RelationshipType = "DEPENDS_ON" + + // DependencyOfRelationship is to be used when SPDXRef-A is dependency of SPDXRef-B. + // Example: A is explicitly stated as a dependency of B in a machine-readable file. Use when a package manager does not define scopes. + DependencyOfRelationship RelationshipType = "DEPENDENCY_OF" + + // DependencyManifestOfRelationship is to be used when SPDXRef-A is a manifest file that lists a set of dependencies for SPDXRef-B. + // Example: A file package.json is the dependency manifest of a package foo. Note that only one manifest should be used to define the same dependency graph. + DependencyManifestOfRelationship RelationshipType = "DEPENDENCY_MANIFEST_OF" + + // BuildDependencyOfRelationship is to be used when SPDXRef-A is a build dependency of SPDXRef-B. + // Example: A is in the compile scope of B in a Maven project. + BuildDependencyOfRelationship RelationshipType = "BUILD_DEPENDENCY_OF" + + // DevDependencyOfRelationship is to be used when SPDXRef-A is a development dependency of SPDXRef-B. + // Example: A is in the devDependencies scope of B in a Maven project. + DevDependencyOfRelationship RelationshipType = "DEV_DEPENDENCY_OF" + + // OptionalDependencyOfRelationship is to be used when SPDXRef-A is an optional dependency of SPDXRef-B. + // Example: Use when building the code will proceed even if a dependency cannot be found, fails to install, or is only installed on a specific platform. For example, A is in the optionalDependencies scope of npm project B. + OptionalDependencyOfRelationship RelationshipType = "OPTIONAL_DEPENDENCY_OF" + + // ProvidedDependencyOfRelationship is to be used when SPDXRef-A is a to be provided dependency of SPDXRef-B. + // Example: A is in the provided scope of B in a Maven project, indicating that the project expects it to be provided, for instance, by the container or JDK. + ProvidedDependencyOfRelationship RelationshipType = "PROVIDED_DEPENDENCY_OF" + + // TestDependencyOfRelationship is to be used when SPDXRef-A is a test dependency of SPDXRef-B. + // Example: A is in the test scope of B in a Maven project. + TestDependencyOfRelationship RelationshipType = "TEST_DEPENDENCY_OF" + + // RuntimeDependencyOfRelationship is to be used when SPDXRef-A is a dependency required for the execution of SPDXRef-B. + // Example: A is in the runtime scope of B in a Maven project. + RuntimeDependencyOfRelationship RelationshipType = "RUNTIME_DEPENDENCY_OF" + + // ExampleOfRelationship is to be used when SPDXRef-A is an example of SPDXRef-B. + // Example: The file or snippet that illustrates how to use an application or library. + ExampleOfRelationship RelationshipType = "EXAMPLE_OF" + + // GeneratesRelationship is to be used when SPDXRef-A generates SPDXRef-B. + // Example: A SOURCE file makefile.mk generates a BINARY file a.out + GeneratesRelationship RelationshipType = "GENERATES" + + // GeneratedFromRelationship is to be used when SPDXRef-A was generated from SPDXRef-B. + // Example: A BINARY file a.out has been generated from a SOURCE file makefile.mk. A BINARY file foolib.a is generated from a SOURCE file bar.c. + GeneratedFromRelationship RelationshipType = "GENERATED_FROM" + + // AncestorOfRelationship is to be used when SPDXRef-A is an ancestor (same lineage but pre-dates) SPDXRef-B. + // Example: A SOURCE file makefile.mk is a version of the original ancestor SOURCE file 'makefile2.mk' + AncestorOfRelationship RelationshipType = "ANCESTOR_OF" + + // DescendantOfRelationship is to be used when SPDXRef-A is a descendant of (same lineage but postdates) SPDXRef-B. + // Example: A SOURCE file makefile2.mk is a descendant of the original SOURCE file 'makefile.mk' + DescendantOfRelationship RelationshipType = "DESCENDANT_OF" + + // VariantOfRelationship is to be used when SPDXRef-A is a variant of (same lineage but not clear which came first) SPDXRef-B. + // Example: A SOURCE file makefile2.mk is a variant of SOURCE file makefile.mk if they differ by some edit, but there is no way to tell which came first (no reliable date information). + VariantOfRelationship RelationshipType = "VARIANT_OF" + + // DistributionArtifactRelationship is to be used when distributing SPDXRef-A requires that SPDXRef-B also be distributed. + // Example: A BINARY file foo.o requires that the ARCHIVE file bar-sources.tgz be made available on distribution. + DistributionArtifactRelationship RelationshipType = "DISTRIBUTION_ARTIFACT" + + // PatchForRelationship is to be used when SPDXRef-A is a patch file for (to be applied to) SPDXRef-B. + // Example: A SOURCE file foo.diff is a patch file for SOURCE file foo.c. + PatchForRelationship RelationshipType = "PATCH_FOR" + + // PatchAppliedRelationship is to be used when SPDXRef-A is a patch file that has been applied to SPDXRef-B. + // Example: A SOURCE file foo.diff is a patch file that has been applied to SOURCE file 'foo-patched.c'. + PatchAppliedRelationship RelationshipType = "PATCH_APPLIED" + + // CopyOfRelationship is to be used when SPDXRef-A is an exact copy of SPDXRef-B. + // Example: A BINARY file alib.a is an exact copy of BINARY file a2lib.a. + CopyOfRelationship RelationshipType = "COPY_OF" + + // FileAddedRelationship is to be used when SPDXRef-A is a file that was added to SPDXRef-B. + // Example: A SOURCE file foo.c has been added to package ARCHIVE bar.tgz. + FileAddedRelationship RelationshipType = "FILE_ADDED" + + // FileDeletedRelationship is to be used when SPDXRef-A is a file that was deleted from SPDXRef-B. + // Example: A SOURCE file foo.diff has been deleted from package ARCHIVE bar.tgz. + FileDeletedRelationship RelationshipType = "FILE_DELETED" + + // FileModifiedRelationship is to be used when SPDXRef-A is a file that was modified from SPDXRef-B. + // Example: A SOURCE file foo.c has been modified from SOURCE file foo.orig.c. + FileModifiedRelationship RelationshipType = "FILE_MODIFIED" + + // ExpandedFromArchiveRelationship is to be used when SPDXRef-A is expanded from the archive SPDXRef-B. + // Example: A SOURCE file foo.c, has been expanded from the archive ARCHIVE file xyz.tgz. + ExpandedFromArchiveRelationship RelationshipType = "EXPANDED_FROM_ARCHIVE" + + // DynamicLinkRelationship is to be used when SPDXRef-A dynamically links to SPDXRef-B. + // Example: An APPLICATION file 'myapp' dynamically links to BINARY file zlib.so. + DynamicLinkRelationship RelationshipType = "DYNAMIC_LINK" + + // StaticLinkRelationship is to be used when SPDXRef-A statically links to SPDXRef-B. + // Example: An APPLICATION file 'myapp' statically links to BINARY zlib.a. + StaticLinkRelationship RelationshipType = "STATIC_LINK" + + // DataFileOfRelationship is to be used when SPDXRef-A is a data file used in SPDXRef-B. + // Example: An IMAGE file 'kitty.jpg' is a data file of an APPLICATION 'hellokitty'. + DataFileOfRelationship RelationshipType = "DATA_FILE_OF" + + // TestCaseOfRelationship is to be used when SPDXRef-A is a test case used in testing SPDXRef-B. + // Example: A SOURCE file testMyCode.java is a unit test file used to test an APPLICATION MyPackage. + TestCaseOfRelationship RelationshipType = "TEST_CASE_OF" + + // BuildToolOfRelationship is to be used when SPDXRef-A is used to build SPDXRef-B. + // Example: A SOURCE file makefile.mk is used to build an APPLICATION 'zlib'. + BuildToolOfRelationship RelationshipType = "BUILD_TOOL_OF" + + // DevToolOfRelationship is to be used when SPDXRef-A is used as a development tool for SPDXRef-B. + // Example: Any tool used for development such as a code debugger. + DevToolOfRelationship RelationshipType = "DEV_TOOL_OF" + + // TestOfRelationship is to be used when SPDXRef-A is used for testing SPDXRef-B. + // Example: Generic relationship for cases where it's clear that something is used for testing but unclear whether it's TEST_CASE_OF or TEST_TOOL_OF. + TestOfRelationship RelationshipType = "TEST_OF" + + // TestToolOfRelationship is to be used when SPDXRef-A is used as a test tool for SPDXRef-B. + // Example: Any tool used to test the code such as ESlint. + TestToolOfRelationship RelationshipType = "TEST_TOOL_OF" + + // DocumentationOfRelationship is to be used when SPDXRef-A provides documentation of SPDXRef-B. + // Example: A DOCUMENTATION file readme.txt documents the APPLICATION 'zlib'. + DocumentationOfRelationship RelationshipType = "DOCUMENTATION_OF" + + // OptionalComponentOfRelationship is to be used when SPDXRef-A is an optional component of SPDXRef-B. + // Example: A SOURCE file fool.c (which is in the contributors directory) may or may not be included in the build of APPLICATION 'atthebar'. + OptionalComponentOfRelationship RelationshipType = "OPTIONAL_COMPONENT_OF" + + // MetafileOfRelationship is to be used when SPDXRef-A is a metafile of SPDXRef-B. + // Example: A SOURCE file pom.xml is a metafile of the APPLICATION 'Apache Xerces'. + MetafileOfRelationship RelationshipType = "METAFILE_OF" + + // PackageOfRelationship is to be used when SPDXRef-A is used as a package as part of SPDXRef-B. + // Example: A Linux distribution contains an APPLICATION package gawk as part of the distribution MyLinuxDistro. + PackageOfRelationship RelationshipType = "PACKAGE_OF" + + // AmendsRelationship is to be used when (current) SPDXRef-DOCUMENT amends the SPDX information in SPDXRef-B. + // Example: (Current) SPDX document A version 2 contains a correction to a previous version of the SPDX document A version 1. Note the reserved identifier SPDXRef-DOCUMENT for the current document is required. + AmendsRelationship RelationshipType = "AMENDS" + + // PrerequisiteForRelationship is to be used when SPDXRef-A is a prerequisite for SPDXRef-B. + // Example: A library bar.dll is a prerequisite or dependency for APPLICATION foo.exe + PrerequisiteForRelationship RelationshipType = "PREREQUISITE_FOR" + + // HasPrerequisiteRelationship is to be used when SPDXRef-A has as a prerequisite SPDXRef-B. + // Example: An APPLICATION foo.exe has prerequisite or dependency on bar.dll + HasPrerequisiteRelationship RelationshipType = "HAS_PREREQUISITE" + + // OtherRelationship is to be used for a relationship which has not been defined in the formal SPDX specification. A description of the relationship should be included in the Relationship comments field. + OtherRelationship RelationshipType = "OTHER" +) diff --git a/internal/formats/spdx22json/model/snippet.go b/internal/formats/spdx22json/model/snippet.go new file mode 100644 index 000000000..0d39e5dca --- /dev/null +++ b/internal/formats/spdx22json/model/snippet.go @@ -0,0 +1,32 @@ +package model + +type StartPointer struct { + Offset int `json:"offset,omitempty"` + LineNumber int `json:"lineNumber,omitempty"` + // SPDX ID for File + Reference string `json:"reference"` +} + +type EndPointer struct { + Offset int `json:"offset,omitempty"` + LineNumber int `json:"lineNumber,omitempty"` + // SPDX ID for File + Reference string `json:"reference"` +} + +type Range struct { + StartPointer StartPointer `json:"startPointer"` + EndPointer EndPointer `json:"endPointer"` +} + +type Snippet struct { + Item + // Licensing information that was discovered directly in the subject snippet. This is also considered a declared + // license for the snippet. (elements are license expressions) + LicenseInfoInSnippets []string `json:"licenseInfoInSnippets"` + // SPDX ID for File. File containing the SPDX element (e.g. the file contaning a snippet). + SnippetFromFile string `json:"snippetFromFile"` + // (At least 1 range is required). This field defines the byte range in the original host file (in X.2) that the + // snippet information applies to. + Ranges []Range `json:"ranges"` +} diff --git a/internal/formats/spdx22json/model/syft_distro_data.go b/internal/formats/spdx22json/model/syft_distro_data.go new file mode 100644 index 000000000..8f985bf26 --- /dev/null +++ b/internal/formats/spdx22json/model/syft_distro_data.go @@ -0,0 +1,7 @@ +package model + +type SyftDistroData struct { + Name string `json:"name"` // Name of the Linux distribution + Version string `json:"version"` // Version of the Linux distribution (major or major.minor version) + IDLike string `json:"idLike"` // the ID_LIKE field found within the /etc/os-release file +} diff --git a/internal/formats/spdx22json/model/syft_package_data.go b/internal/formats/spdx22json/model/syft_package_data.go new file mode 100644 index 000000000..58efd69b1 --- /dev/null +++ b/internal/formats/spdx22json/model/syft_package_data.go @@ -0,0 +1,114 @@ +package model + +import ( + "encoding/json" + "fmt" + + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +type SyftPackageData struct { + SyftPackageBasicData + SyftPackageCustomData +} + +type SyftPackageCustomData struct { + MetadataType pkg.MetadataType `json:"metadataType"` + Metadata interface{} `json:"metadata"` +} + +type SyftPackageBasicData struct { + PackageType pkg.Type `json:"type"` + FoundBy string `json:"foundBy"` + Locations []source.Location `json:"locations"` + Licenses []string `json:"licenses"` + Language pkg.Language `json:"language"` +} + +// syftPackageMetadataUnpacker is all values needed from Package to disambiguate ambiguous fields during json unmarshaling. +type syftPackageMetadataUnpacker struct { + MetadataType pkg.MetadataType `json:"metadataType"` + Metadata json.RawMessage `json:"metadata"` +} + +func (p *syftPackageMetadataUnpacker) String() string { + return fmt.Sprintf("metadataType: %s, metadata: %s", p.MetadataType, string(p.Metadata)) +} + +// UnmarshalJSON is a custom unmarshaller for handling basic values and values with ambiguous types. +func (p *SyftPackageData) UnmarshalJSON(b []byte) error { + var basic SyftPackageBasicData + if err := json.Unmarshal(b, &basic); err != nil { + return err + } + p.SyftPackageBasicData = basic + + var unpacker syftPackageMetadataUnpacker + if err := json.Unmarshal(b, &unpacker); err != nil { + log.Warnf("failed to unmarshall into syftPackageMetadataUnpacker: %v", err) + return err + } + + p.MetadataType = unpacker.MetadataType + + switch p.MetadataType { + case pkg.ApkMetadataType: + var payload pkg.ApkMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.RpmdbMetadataType: + var payload pkg.RpmdbMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.DpkgMetadataType: + var payload pkg.DpkgMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.JavaMetadataType: + var payload pkg.JavaMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.RustCargoPackageMetadataType: + var payload pkg.CargoPackageMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.GemMetadataType: + var payload pkg.GemMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.KbPackageMetadataType: + var payload pkg.KbPackageMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.PythonPackageMetadataType: + var payload pkg.PythonPackageMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + case pkg.NpmPackageJSONMetadataType: + var payload pkg.NpmPackageJSONMetadata + if err := json.Unmarshal(unpacker.Metadata, &payload); err != nil { + return err + } + p.Metadata = payload + } + + return nil +} diff --git a/internal/formats/spdx22json/model/version.go b/internal/formats/spdx22json/model/version.go new file mode 100644 index 000000000..8f105cbfa --- /dev/null +++ b/internal/formats/spdx22json/model/version.go @@ -0,0 +1,3 @@ +package model + +const Version = "SPDX-2.2" diff --git a/internal/formats/spdx22json/test-fixtures/image-simple/Dockerfile b/internal/formats/spdx22json/test-fixtures/image-simple/Dockerfile new file mode 100644 index 000000000..79cfa759e --- /dev/null +++ b/internal/formats/spdx22json/test-fixtures/image-simple/Dockerfile @@ -0,0 +1,4 @@ +# Note: changes to this file will result in updating several test values. Consider making a new image fixture instead of editing this one. +FROM scratch +ADD file-1.txt /somefile-1.txt +ADD file-2.txt /somefile-2.txt diff --git a/internal/formats/spdx22json/test-fixtures/image-simple/file-1.txt b/internal/formats/spdx22json/test-fixtures/image-simple/file-1.txt new file mode 100644 index 000000000..985d3408e --- /dev/null +++ b/internal/formats/spdx22json/test-fixtures/image-simple/file-1.txt @@ -0,0 +1 @@ +this file has contents \ No newline at end of file diff --git a/internal/formats/spdx22json/test-fixtures/image-simple/file-2.txt b/internal/formats/spdx22json/test-fixtures/image-simple/file-2.txt new file mode 100644 index 000000000..396d08bbc --- /dev/null +++ b/internal/formats/spdx22json/test-fixtures/image-simple/file-2.txt @@ -0,0 +1 @@ +file-2 contents! \ No newline at end of file diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden new file mode 100644 index 000000000..249517449 --- /dev/null +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONDirectoryPresenter.golden @@ -0,0 +1,79 @@ +{ + "SPDXID": "SPDXRef-DOCUMENT", + "name": "/some/path", + "spdxVersion": "SPDX-2.2", + "creationInfo": { + "created": "2021-09-16T20:44:35.198887Z", + "creators": [ + "Organization: Anchore, Inc", + "Tool: syft-[not provided]" + ], + "licenseListVersion": "3.14" + }, + "dataLicense": "CC0-1.0", + "documentNamespace": "https://anchore.com/syft/image/", + "packages": [ + { + "SPDXID": "SPDXRef-Package-python-package-1-1.0.1", + "name": "package-1", + "licenseConcluded": "MIT", + "downloadLocation": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:*:some:package:2:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + }, + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "a-purl-2", + "referenceType": "purl" + } + ], + "filesAnalyzed": false, + "hasFiles": [ + "SPDXRef-File-package-1-04cd22424378dcd6c77fce08beb52493b5494a60ea5e1f9bdf9b16dc0cacffe9" + ], + "licenseDeclared": "MIT", + "sourceInfo": "acquired package info from installed python package manifest file: /some/path/pkg1", + "versionInfo": "1.0.1" + }, + { + "SPDXID": "SPDXRef-Package-deb-package-2-2.0.1", + "name": "package-2", + "licenseConcluded": "NONE", + "downloadLocation": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:*:some:package:2:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + }, + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "a-purl-2", + "referenceType": "purl" + } + ], + "filesAnalyzed": false, + "licenseDeclared": "NONE", + "sourceInfo": "acquired package info from DPKG DB: /some/path/pkg1", + "versionInfo": "2.0.1" + } + ], + "files": [ + { + "SPDXID": "SPDXRef-File-package-1-04cd22424378dcd6c77fce08beb52493b5494a60ea5e1f9bdf9b16dc0cacffe9", + "name": "foo", + "licenseConcluded": "", + "fileName": "/some/path/pkg1/depedencies/foo" + } + ], + "relationships": [ + { + "spdxElementId": "SPDXRef-Package-python-package-1-1.0.1", + "relationshipType": "CONTAINS", + "relatedSpdxElement": "SPDXRef-File-package-1-04cd22424378dcd6c77fce08beb52493b5494a60ea5e1f9bdf9b16dc0cacffe9" + } + ] +} diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden new file mode 100644 index 000000000..8906ef161 --- /dev/null +++ b/internal/formats/spdx22json/test-fixtures/snapshot/TestSPDXJSONImagePresenter.golden @@ -0,0 +1,61 @@ +{ + "SPDXID": "SPDXRef-DOCUMENT", + "name": "user-image-input", + "spdxVersion": "SPDX-2.2", + "creationInfo": { + "created": "2021-09-16T20:44:35.203911Z", + "creators": [ + "Organization: Anchore, Inc", + "Tool: syft-[not provided]" + ], + "licenseListVersion": "3.14" + }, + "dataLicense": "CC0-1.0", + "documentNamespace": "https://anchore.com/syft/image/user-image-input", + "packages": [ + { + "SPDXID": "SPDXRef-Package-python-package-1-1.0.1", + "name": "package-1", + "licenseConcluded": "MIT", + "downloadLocation": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:*:some:package:1:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + }, + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "a-purl-1", + "referenceType": "purl" + } + ], + "filesAnalyzed": false, + "licenseDeclared": "MIT", + "sourceInfo": "acquired package info from installed python package manifest file: /somefile-1.txt", + "versionInfo": "1.0.1" + }, + { + "SPDXID": "SPDXRef-Package-deb-package-2-2.0.1", + "name": "package-2", + "licenseConcluded": "NONE", + "downloadLocation": "NOASSERTION", + "externalRefs": [ + { + "referenceCategory": "SECURITY", + "referenceLocator": "cpe:2.3:*:some:package:2:*:*:*:*:*:*:*", + "referenceType": "cpe23Type" + }, + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "a-purl-2", + "referenceType": "purl" + } + ], + "filesAnalyzed": false, + "licenseDeclared": "NONE", + "sourceInfo": "acquired package info from DPKG DB: /somefile-2.txt", + "versionInfo": "2.0.1" + } + ] +} diff --git a/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden b/internal/formats/spdx22json/test-fixtures/snapshot/stereoscope-fixture-image-simple.golden new file mode 100644 index 0000000000000000000000000000000000000000..c1b1d2b797ecd34a5276a1aa2fb18c5b0a58c732 GIT binary patch literal 15360 zcmeHO+iu%N5Y=ZaKc z4r#BxuDkuJ7{OBcm#%FzyUOYr8lw^9HIDjR+4F_Fg0dOV`$VGKgH38I>Hgnpur)R2 zl)HY_XM^MD!m4A0dKhDcAS?&*f(4T;Kn>ke#l4gzVN` ze(?Rjwf#RFzkC1s- z9>CiEk8VKl|Mzk`OrxxEw%gtY==qIJx$uy+*ED8?qX(EcDL{I387Ngsb4dh?x$wg9 z*h*rg6@ZQ(w@kA*(GHyphO!?TNJz`YJ>fe4rt;(!W2awfa(sRRC68dzXajVE@y-n>o(^op5E2ZJDVs7 z4hRGU0s;YnXN7=m8LaJu|D{?6{&zpONo)J>g#RTf@W1=9f8c-jgY$EprS<=wcn6J zY-{-4@PCvC{%=1g#vT7ROM=?XoSXT-=K0?$|A*;W&6n z=@JS7fq+0jARzERL||m{EQP9mOdkK&{`^hnzm$9>7qof)M_D-kzZUPOjKl2tmy1l1 zh;8Ft5R3%h!MHdz7xT>W8cHZCSB&8>&90l53=Xp<>E*D>M<2Vo8*8l9D*K-ZL#y zZQ=xy1TbwlA*sSvk$`4t>Jlk6(avMSc$^Yo%t_n4Rq$q>(~gPk+Y4v#cWf^=0@K;g zetL>o_SUyVTU8mpNs&Rdt4TMle|Dc6KFbBi^{-S||GP5Biri^bZCIhryCba^=@v2! WZ*QOH3R;0P6aoSPfq=kwgTOxo35itz literal 0 HcmV?d00001 diff --git a/internal/formats/spdx22json/to_format_model.go b/internal/formats/spdx22json/to_format_model.go new file mode 100644 index 000000000..d465919a4 --- /dev/null +++ b/internal/formats/spdx22json/to_format_model.go @@ -0,0 +1,164 @@ +package spdx22json + +import ( + "fmt" + "path" + "strings" + "time" + + "github.com/anchore/syft/syft/distro" + + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/formats/common/spdxhelpers" + "github.com/anchore/syft/internal/formats/spdx22json/model" + "github.com/anchore/syft/internal/spdxlicense" + "github.com/anchore/syft/internal/version" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" + "github.com/google/uuid" +) + +// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. +func toFormatModel(catalog *pkg.Catalog, srcMetadata *source.Metadata, d *distro.Distro) model.Document { + name := documentName(srcMetadata) + packages, files, relationships := extractFromCatalog(catalog) + + return model.Document{ + Element: model.Element{ + SPDXID: model.ElementID("DOCUMENT").String(), + Name: name, + }, + SPDXVersion: model.Version, + CreationInfo: model.CreationInfo{ + Created: time.Now().UTC(), + Creators: []string{ + // note: key-value format derived from the JSON example document examples: https://github.com/spdx/spdx-spec/blob/v2.2/examples/SPDXJSONExample-v2.2.spdx.json + "Organization: Anchore, Inc", + "Tool: " + internal.ApplicationName + "-" + version.FromBuild().Version, + }, + LicenseListVersion: spdxlicense.Version, + }, + DataLicense: "CC0-1.0", + DocumentNamespace: documentNamespace(name, srcMetadata), + Packages: packages, + Files: files, + Relationships: relationships, + SyftSourceData: srcMetadata, + SyftDistroData: toSyftDistroData(d), + } +} + +func toSyftDistroData(d *distro.Distro) *model.SyftDistroData { + if d == nil { + return nil + } + return &model.SyftDistroData{ + Name: d.Name(), + Version: d.FullVersion(), + IDLike: d.IDLike, + } +} + +func documentName(srcMetadata *source.Metadata) string { + if srcMetadata != nil { + switch srcMetadata.Scheme { + case source.ImageScheme: + return cleanSPDXName(srcMetadata.ImageMetadata.UserInput) + case source.DirectoryScheme: + return cleanSPDXName(srcMetadata.Path) + } + } + // TODO: is this alright? + return uuid.Must(uuid.NewRandom()).String() +} + +func documentNamespace(name string, srcMetadata *source.Metadata) string { + input := "unknown-source-type" + if srcMetadata != nil { + switch srcMetadata.Scheme { + case source.ImageScheme: + input = "image" + case source.DirectoryScheme: + input = "dir" + } + } + + uniqueID := uuid.Must(uuid.NewRandom()) + identifier := path.Join(input, uniqueID.String()) + if name != "." { + identifier = path.Join(input, fmt.Sprintf("%s-%s", name, uniqueID.String())) + } + + return path.Join(anchoreNamespace, identifier) +} + +func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, []model.Relationship) { + packages := make([]model.Package, 0) + relationships := make([]model.Relationship, 0) + files := make([]model.File, 0) + + for _, p := range catalog.Sorted() { + license := spdxhelpers.License(p) + packageSpdxID := model.ElementID(fmt.Sprintf("Package-%+v-%s-%s", p.Type, p.Name, p.Version)).String() + + packageFiles, fileIDs, packageFileRelationships := spdxhelpers.Files(packageSpdxID, p) + files = append(files, packageFiles...) + + relationships = append(relationships, packageFileRelationships...) + + // note: the license concluded and declared should be the same since we are collecting license information + // from the project data itself (the installed package files). + packages = append(packages, model.Package{ + Description: spdxhelpers.Description(p), + DownloadLocation: spdxhelpers.DownloadLocation(p), + ExternalRefs: spdxhelpers.ExternalRefs(p), + FilesAnalyzed: false, + HasFiles: fileIDs, + Homepage: spdxhelpers.Homepage(p), + LicenseDeclared: license, // The Declared License is what the authors of a project believe govern the package + Originator: spdxhelpers.Originator(p), + SourceInfo: spdxhelpers.SourceInfo(p), + VersionInfo: p.Version, + Item: model.Item{ + LicenseConcluded: license, // The Concluded License field is the license the SPDX file creator believes governs the package + Element: model.Element{ + SPDXID: packageSpdxID, + Name: p.Name, + }, + }, + SyftPackageData: toSyftPackageData(p), + }) + } + + return packages, files, relationships +} + +func toSyftPackageData(p *pkg.Package) *model.SyftPackageData { + if p == nil { + return nil + } + return &model.SyftPackageData{ + SyftPackageBasicData: model.SyftPackageBasicData{ + PackageType: p.Type, + FoundBy: p.FoundBy, + Locations: p.Locations, + Language: p.Language, + Licenses: p.Licenses, + }, + SyftPackageCustomData: model.SyftPackageCustomData{ + MetadataType: p.MetadataType, + Metadata: p.Metadata, + }, + } +} + +func cleanSPDXName(name string) string { + // remove # according to specification + name = strings.ReplaceAll(name, "#", "-") + + // remove : for url construction + name = strings.ReplaceAll(name, ":", "-") + + // clean relative pathing + return path.Clean(name) +} diff --git a/internal/formats/spdx22json/to_syft_model.go b/internal/formats/spdx22json/to_syft_model.go new file mode 100644 index 000000000..b0a9edbd3 --- /dev/null +++ b/internal/formats/spdx22json/to_syft_model.go @@ -0,0 +1,64 @@ +package spdx22json + +import ( + "github.com/anchore/syft/internal/formats/common/spdxhelpers" + "github.com/anchore/syft/internal/formats/spdx22json/model" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/distro" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/source" +) + +func toSyftModel(doc model.Document) (*pkg.Catalog, *source.Metadata, *distro.Distro, error) { + d, err := toSyftDistro(doc.SyftDistroData) + if err != nil { + log.Warnf("unable to parse distro info=%+v: %+v", d, err) + d = nil + } + + return toSyftCatalog(doc.Packages), doc.SyftSourceData, d, nil +} + +func toSyftCatalog(pkgs []model.Package) *pkg.Catalog { + catalog := pkg.NewCatalog() + for _, p := range pkgs { + catalog.Add(toSyftPackage(p)) + } + return catalog +} + +func toSyftPackage(p model.Package) pkg.Package { + syftPkg := pkg.Package{ + Name: p.Name, + Version: p.VersionInfo, + CPEs: spdxhelpers.ExtractCPEs(p.ExternalRefs), + PURL: spdxhelpers.ExtractPURL(p.ExternalRefs), + } + + if extra := p.SyftPackageData; extra != nil { + syftPkg.Type = extra.PackageType + syftPkg.FoundBy = extra.FoundBy + syftPkg.Locations = extra.Locations + syftPkg.Language = extra.Language + syftPkg.Licenses = extra.Licenses + syftPkg.MetadataType = extra.MetadataType + syftPkg.Metadata = extra.Metadata + } + + //if syftPkg.Type == "" && syftPkg.PURL != "" { + // // TODO: extract package type from purl --this is useful for ingesting from tools other than syft and is important for grype + //} + + return syftPkg +} + +func toSyftDistro(d *model.SyftDistroData) (*distro.Distro, error) { + if d == nil { + return nil, nil + } + newDistro, err := distro.NewDistro(distro.Type(d.Name), d.Version, d.IDLike) + if err != nil { + return nil, err + } + return &newDistro, nil +} diff --git a/internal/formats/spdx22json/validator.go b/internal/formats/spdx22json/validator.go new file mode 100644 index 000000000..5bcbc6f18 --- /dev/null +++ b/internal/formats/spdx22json/validator.go @@ -0,0 +1,26 @@ +package spdx22json + +import ( + "encoding/json" + "fmt" + "io" +) + +func validator(reader io.Reader) error { + type Document struct { + SPDXID string `json:"SPDXID"` + } + + dec := json.NewDecoder(reader) + + var doc Document + err := dec.Decode(&doc) + if err != nil { + return fmt.Errorf("unable to decode: %w", err) + } + + if doc.SPDXID != "" { + return nil + } + return fmt.Errorf("could not extract document SPDXID") +}