port remaining spdx-json relationships to sbom model

Signed-off-by: Alex Goodman <alex.goodman@anchore.com>
This commit is contained in:
Alex Goodman 2021-11-17 13:38:42 -05:00
parent 7640df99c8
commit d3b6419a34
No known key found for this signature in database
GPG Key ID: 5CB45AE22BAB7EA7
3 changed files with 182 additions and 91 deletions

View File

@ -1,51 +0,0 @@
package spdxhelpers
import (
"crypto/sha256"
"fmt"
"path/filepath"
"github.com/anchore/syft/internal/formats/spdx22json/model"
"github.com/anchore/syft/syft/pkg"
)
func Files(packageSpdxID string, p pkg.Package) (files []model.File, fileIDs []string, relationships []model.Relationship) {
files = make([]model.File, 0)
fileIDs = make([]string, 0)
relationships = make([]model.Relationship, 0)
if !hasMetadata(p) {
return files, fileIDs, relationships
}
pkgFileOwner, ok := p.Metadata.(pkg.FileOwner)
if !ok {
return files, fileIDs, relationships
}
for _, ownedFilePath := range pkgFileOwner.OwnedFiles() {
baseFileName := filepath.Base(ownedFilePath)
pathHash := sha256.Sum256([]byte(ownedFilePath))
fileSpdxID := model.ElementID(fmt.Sprintf("File-%s-%x", p.Name, pathHash)).String()
fileIDs = append(fileIDs, fileSpdxID)
files = append(files, model.File{
FileName: ownedFilePath,
Item: model.Item{
Element: model.Element{
SPDXID: fileSpdxID,
Name: baseFileName,
},
},
})
relationships = append(relationships, model.Relationship{
SpdxElementID: packageSpdxID,
RelationshipType: model.ContainsRelationship,
RelatedSpdxElement: fileSpdxID,
})
}
return files, fileIDs, relationships
}

View File

@ -3,17 +3,17 @@ package model
type FileType string
const (
DocumentationFileType FileType = "DOCUMENTATION"
ImageFileType FileType = "IMAGE"
VideoFileType FileType = "VIDEO"
ArchiveFileType FileType = "ARCHIVE"
SpdxFileType FileType = "SPDX"
ApplicationFileType FileType = "APPLICATION"
SourceFileType FileType = "SOURCE"
BinaryFileType FileType = "BINARY"
TextFileType FileType = "TEXT"
AudioFileType FileType = "AUDIO"
OtherFileType FileType = "OTHER"
DocumentationFileType FileType = "DOCUMENTATION" // if the file serves as documentation
ImageFileType FileType = "IMAGE" // if the file is associated with a picture image file (MIME type of image/*, e.g., .jpg, .gif)
VideoFileType FileType = "VIDEO" // if the file is associated with a video file type (MIME type of video/*)
ArchiveFileType FileType = "ARCHIVE" // if the file represents an archive (.tar, .jar, etc.)
SpdxFileType FileType = "SPDX" // if the file is an SPDX document
ApplicationFileType FileType = "APPLICATION" // if the file is associated with a specific application type (MIME type of application/*)
SourceFileType FileType = "SOURCE" // if the file is human readable source code (.c, .html, etc.)
BinaryFileType FileType = "BINARY" // if the file is a compiled object, target image or binary executable (.o, .a, etc.)
TextFileType FileType = "TEXT" // if the file is human readable text file (MIME type of text/*)
AudioFileType FileType = "AUDIO" // if the file is associated with an audio file (MIME type of audio/* , e.g. .mp3)
OtherFileType FileType = "OTHER" // if the file doesn't fit into the above categories (generated artifacts, data files, etc.)
)
type File struct {
@ -36,6 +36,6 @@ type File struct {
// Indicates the project in which the SpdxElement originated. Tools must preserve doap:homepage and doap:name
// properties and the URI (if one is known) of doap:Project resources that are values of this property. All other
// properties of doap:Projects are not directly supported by SPDX and may be dropped when translating to or
// from some SPDX formats(deprecated).
// from some SPDX formats (deprecated).
ArtifactOf []string `json:"artifactOf,omitempty"`
}

View File

@ -3,9 +3,15 @@ package spdx22json
import (
"fmt"
"path"
"path/filepath"
"sort"
"strings"
"time"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/sbom"
"github.com/anchore/syft/internal"
@ -21,7 +27,6 @@ import (
// toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results.
func toFormatModel(s sbom.SBOM) model.Document {
name := documentName(s.Source)
packages, files, relationships := extractFromCatalog(s.Artifacts.PackageCatalog)
return model.Document{
Element: model.Element{
@ -40,9 +45,9 @@ func toFormatModel(s sbom.SBOM) model.Document {
},
DataLicense: "CC0-1.0",
DocumentNamespace: documentNamespace(name, s.Source),
Packages: packages,
Files: files,
Relationships: relationships,
Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships),
Files: toFiles(s),
Relationships: toRelationships(s.Relationships),
}
}
@ -58,6 +63,17 @@ func documentName(srcMetadata source.Metadata) string {
return uuid.Must(uuid.NewRandom()).String()
}
func cleanSPDXName(name string) string {
// remove # according to specification
name = strings.ReplaceAll(name, "#", "-")
// remove : for url construction
name = strings.ReplaceAll(name, ":", "-")
// clean relative pathing
return path.Clean(name)
}
func documentNamespace(name string, srcMetadata source.Metadata) string {
input := "unknown-source-type"
switch srcMetadata.Scheme {
@ -76,19 +92,12 @@ func documentNamespace(name string, srcMetadata source.Metadata) string {
return path.Join(anchoreNamespace, identifier)
}
func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, []model.Relationship) {
func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package {
packages := make([]model.Package, 0)
relationships := make([]model.Relationship, 0)
files := make([]model.File, 0)
for _, p := range catalog.Sorted() {
license := spdxhelpers.License(p)
packageSpdxID := model.ElementID(fmt.Sprintf("Package-%+v-%s-%s", p.Type, p.Name, p.Version)).String()
packageFiles, fileIDs, packageFileRelationships := spdxhelpers.Files(packageSpdxID, p)
files = append(files, packageFiles...)
relationships = append(relationships, packageFileRelationships...)
packageSpdxID := model.ElementID(p.ID()).String()
// note: the license concluded and declared should be the same since we are collecting license information
// from the project data itself (the installed package files).
@ -97,14 +106,16 @@ func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, []
DownloadLocation: spdxhelpers.DownloadLocation(p),
ExternalRefs: spdxhelpers.ExternalRefs(p),
FilesAnalyzed: false,
HasFiles: fileIDs,
HasFiles: fileIDsForPackage(packageSpdxID, relationships),
Homepage: spdxhelpers.Homepage(p),
LicenseDeclared: license, // The Declared License is what the authors of a project believe govern the package
Originator: spdxhelpers.Originator(p),
SourceInfo: spdxhelpers.SourceInfo(p),
VersionInfo: p.Version,
// The Declared License is what the authors of a project believe govern the package
LicenseDeclared: license,
Originator: spdxhelpers.Originator(p),
SourceInfo: spdxhelpers.SourceInfo(p),
VersionInfo: p.Version,
Item: model.Item{
LicenseConcluded: license, // The Concluded License field is the license the SPDX file creator believes governs the package
// The Concluded License field is the license the SPDX file creator believes governs the package
LicenseConcluded: license,
Element: model.Element{
SPDXID: packageSpdxID,
Name: p.Name,
@ -113,16 +124,147 @@ func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, []
})
}
return packages, files, relationships
return packages
}
func cleanSPDXName(name string) string {
// remove # according to specification
name = strings.ReplaceAll(name, "#", "-")
func fileIDsForPackage(packageSpdxID string, relationships []artifact.Relationship) (fileIDs []string) {
for _, relationship := range relationships {
if relationship.Type != artifact.PackageOfRelationship {
continue
}
// remove : for url construction
name = strings.ReplaceAll(name, ":", "-")
// clean relative pathing
return path.Clean(name)
if string(relationship.To.ID()) == packageSpdxID {
fileIDs = append(fileIDs, string(relationship.From.ID()))
}
}
return fileIDs
}
func toFiles(s sbom.SBOM) []model.File {
results := make([]model.File, 0)
artifacts := s.Artifacts
for _, coordinates := range sbom.AllCoordinates(s) {
var metadata *source.FileMetadata
if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists {
metadata = &metadataForLocation
}
var digests []file.Digest
if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists {
digests = digestsForLocation
}
// TODO: these could make it into the document
//var classifications []file.Classification
//if classificationsForLocation, exists := artifacts.FileClassifications[coordinates]; exists {
// classifications = classificationsForLocation
//}
//
//var contents string
//if contentsForLocation, exists := artifacts.FileContents[coordinates]; exists {
// contents = contentsForLocation
//}
var comment string
if coordinates.FileSystemID != "" {
comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID)
}
results = append(results, model.File{
Item: model.Item{
Element: model.Element{
SPDXID: string(coordinates.ID()),
// TODO: this is encoding layer id... is there a better way?
Name: filepath.Base(coordinates.RealPath),
Comment: comment,
},
// required, no attempt made to determine license information
LicenseConcluded: "NOASSERTION",
},
Checksums: toFileChecksums(digests),
FileName: coordinates.RealPath,
FileTypes: toFileTypes(metadata),
})
}
// sort by real path then virtual path to ensure the result is stable across multiple runs
sort.SliceStable(results, func(i, j int) bool {
return results[i].FileName < results[j].FileName
})
return results
}
func toFileChecksums(digests []file.Digest) (checksums []model.Checksum) {
for _, digest := range digests {
checksums = append(checksums, model.Checksum{
Algorithm: digest.Algorithm,
ChecksumValue: digest.Value,
})
}
return checksums
}
func toFileTypes(metadata *source.FileMetadata) (ty []string) {
if metadata == nil {
return nil
}
mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0]
switch mimeTypePrefix {
case "image":
ty = append(ty, string(model.ImageFileType))
case "video":
ty = append(ty, string(model.VideoFileType))
case "application":
ty = append(ty, string(model.ApplicationFileType))
case "text":
ty = append(ty, string(model.TextFileType))
case "audio":
ty = append(ty, string(model.AudioFileType))
}
if internal.IsExecutable(metadata.MIMEType) {
ty = append(ty, string(model.BinaryFileType))
}
if internal.IsArchive(metadata.MIMEType) {
ty = append(ty, string(model.ArchiveFileType))
}
// TODO: source, spdx, and documentation
if len(ty) == 0 {
ty = append(ty, string(model.OtherFileType))
}
return ty
}
func toRelationships(relationships []artifact.Relationship) (result []model.Relationship) {
for _, r := range relationships {
exists, relationshipType, comment := lookupRelationship(r.Type)
if !exists {
// TODO: should we warn about lossyness here?
continue
}
result = append(result, model.Relationship{
SpdxElementID: string(r.From.ID()),
RelationshipType: relationshipType,
RelatedSpdxElement: string(r.To.ID()),
Comment: comment,
})
}
return result
}
func lookupRelationship(ty artifact.RelationshipType) (bool, model.RelationshipType, string) {
switch ty {
case artifact.PackageOfRelationship:
return true, model.PackageOfRelationship, ""
case artifact.OwnershipByFileOverlapRelationship:
return true, model.OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty)
}
return false, "", ""
}