From d3b6419a34da96da20f1849f91ead00c3ec9b130 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Wed, 17 Nov 2021 13:38:42 -0500 Subject: [PATCH] port remaining spdx-json relationships to sbom model Signed-off-by: Alex Goodman --- internal/formats/common/spdxhelpers/files.go | 51 ----- internal/formats/spdx22json/model/file.go | 24 +-- .../formats/spdx22json/to_format_model.go | 198 +++++++++++++++--- 3 files changed, 182 insertions(+), 91 deletions(-) delete mode 100644 internal/formats/common/spdxhelpers/files.go diff --git a/internal/formats/common/spdxhelpers/files.go b/internal/formats/common/spdxhelpers/files.go deleted file mode 100644 index c0d2be4f2..000000000 --- a/internal/formats/common/spdxhelpers/files.go +++ /dev/null @@ -1,51 +0,0 @@ -package spdxhelpers - -import ( - "crypto/sha256" - "fmt" - "path/filepath" - - "github.com/anchore/syft/internal/formats/spdx22json/model" - "github.com/anchore/syft/syft/pkg" -) - -func Files(packageSpdxID string, p pkg.Package) (files []model.File, fileIDs []string, relationships []model.Relationship) { - files = make([]model.File, 0) - fileIDs = make([]string, 0) - relationships = make([]model.Relationship, 0) - - if !hasMetadata(p) { - return files, fileIDs, relationships - } - - pkgFileOwner, ok := p.Metadata.(pkg.FileOwner) - if !ok { - return files, fileIDs, relationships - } - - for _, ownedFilePath := range pkgFileOwner.OwnedFiles() { - baseFileName := filepath.Base(ownedFilePath) - pathHash := sha256.Sum256([]byte(ownedFilePath)) - fileSpdxID := model.ElementID(fmt.Sprintf("File-%s-%x", p.Name, pathHash)).String() - - fileIDs = append(fileIDs, fileSpdxID) - - files = append(files, model.File{ - FileName: ownedFilePath, - Item: model.Item{ - Element: model.Element{ - SPDXID: fileSpdxID, - Name: baseFileName, - }, - }, - }) - - relationships = append(relationships, model.Relationship{ - SpdxElementID: packageSpdxID, - RelationshipType: model.ContainsRelationship, - RelatedSpdxElement: fileSpdxID, - }) - } - - return files, fileIDs, relationships -} diff --git a/internal/formats/spdx22json/model/file.go b/internal/formats/spdx22json/model/file.go index 970f369cc..c01764cdc 100644 --- a/internal/formats/spdx22json/model/file.go +++ b/internal/formats/spdx22json/model/file.go @@ -3,17 +3,17 @@ package model type FileType string const ( - DocumentationFileType FileType = "DOCUMENTATION" - ImageFileType FileType = "IMAGE" - VideoFileType FileType = "VIDEO" - ArchiveFileType FileType = "ARCHIVE" - SpdxFileType FileType = "SPDX" - ApplicationFileType FileType = "APPLICATION" - SourceFileType FileType = "SOURCE" - BinaryFileType FileType = "BINARY" - TextFileType FileType = "TEXT" - AudioFileType FileType = "AUDIO" - OtherFileType FileType = "OTHER" + DocumentationFileType FileType = "DOCUMENTATION" // if the file serves as documentation + ImageFileType FileType = "IMAGE" // if the file is associated with a picture image file (MIME type of image/*, e.g., .jpg, .gif) + VideoFileType FileType = "VIDEO" // if the file is associated with a video file type (MIME type of video/*) + ArchiveFileType FileType = "ARCHIVE" // if the file represents an archive (.tar, .jar, etc.) + SpdxFileType FileType = "SPDX" // if the file is an SPDX document + ApplicationFileType FileType = "APPLICATION" // if the file is associated with a specific application type (MIME type of application/*) + SourceFileType FileType = "SOURCE" // if the file is human readable source code (.c, .html, etc.) + BinaryFileType FileType = "BINARY" // if the file is a compiled object, target image or binary executable (.o, .a, etc.) + TextFileType FileType = "TEXT" // if the file is human readable text file (MIME type of text/*) + AudioFileType FileType = "AUDIO" // if the file is associated with an audio file (MIME type of audio/* , e.g. .mp3) + OtherFileType FileType = "OTHER" // if the file doesn't fit into the above categories (generated artifacts, data files, etc.) ) type File struct { @@ -36,6 +36,6 @@ type File struct { // Indicates the project in which the SpdxElement originated. Tools must preserve doap:homepage and doap:name // properties and the URI (if one is known) of doap:Project resources that are values of this property. All other // properties of doap:Projects are not directly supported by SPDX and may be dropped when translating to or - // from some SPDX formats(deprecated). + // from some SPDX formats (deprecated). ArtifactOf []string `json:"artifactOf,omitempty"` } diff --git a/internal/formats/spdx22json/to_format_model.go b/internal/formats/spdx22json/to_format_model.go index d6724ae91..c21fa5f3f 100644 --- a/internal/formats/spdx22json/to_format_model.go +++ b/internal/formats/spdx22json/to_format_model.go @@ -3,9 +3,15 @@ package spdx22json import ( "fmt" "path" + "path/filepath" + "sort" "strings" "time" + "github.com/anchore/syft/syft/file" + + "github.com/anchore/syft/syft/artifact" + "github.com/anchore/syft/syft/sbom" "github.com/anchore/syft/internal" @@ -21,7 +27,6 @@ import ( // toFormatModel creates and populates a new JSON document struct that follows the SPDX 2.2 spec from the given cataloging results. func toFormatModel(s sbom.SBOM) model.Document { name := documentName(s.Source) - packages, files, relationships := extractFromCatalog(s.Artifacts.PackageCatalog) return model.Document{ Element: model.Element{ @@ -40,9 +45,9 @@ func toFormatModel(s sbom.SBOM) model.Document { }, DataLicense: "CC0-1.0", DocumentNamespace: documentNamespace(name, s.Source), - Packages: packages, - Files: files, - Relationships: relationships, + Packages: toPackages(s.Artifacts.PackageCatalog, s.Relationships), + Files: toFiles(s), + Relationships: toRelationships(s.Relationships), } } @@ -58,6 +63,17 @@ func documentName(srcMetadata source.Metadata) string { return uuid.Must(uuid.NewRandom()).String() } +func cleanSPDXName(name string) string { + // remove # according to specification + name = strings.ReplaceAll(name, "#", "-") + + // remove : for url construction + name = strings.ReplaceAll(name, ":", "-") + + // clean relative pathing + return path.Clean(name) +} + func documentNamespace(name string, srcMetadata source.Metadata) string { input := "unknown-source-type" switch srcMetadata.Scheme { @@ -76,19 +92,12 @@ func documentNamespace(name string, srcMetadata source.Metadata) string { return path.Join(anchoreNamespace, identifier) } -func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, []model.Relationship) { +func toPackages(catalog *pkg.Catalog, relationships []artifact.Relationship) []model.Package { packages := make([]model.Package, 0) - relationships := make([]model.Relationship, 0) - files := make([]model.File, 0) for _, p := range catalog.Sorted() { license := spdxhelpers.License(p) - packageSpdxID := model.ElementID(fmt.Sprintf("Package-%+v-%s-%s", p.Type, p.Name, p.Version)).String() - - packageFiles, fileIDs, packageFileRelationships := spdxhelpers.Files(packageSpdxID, p) - files = append(files, packageFiles...) - - relationships = append(relationships, packageFileRelationships...) + packageSpdxID := model.ElementID(p.ID()).String() // note: the license concluded and declared should be the same since we are collecting license information // from the project data itself (the installed package files). @@ -97,14 +106,16 @@ func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, [] DownloadLocation: spdxhelpers.DownloadLocation(p), ExternalRefs: spdxhelpers.ExternalRefs(p), FilesAnalyzed: false, - HasFiles: fileIDs, + HasFiles: fileIDsForPackage(packageSpdxID, relationships), Homepage: spdxhelpers.Homepage(p), - LicenseDeclared: license, // The Declared License is what the authors of a project believe govern the package - Originator: spdxhelpers.Originator(p), - SourceInfo: spdxhelpers.SourceInfo(p), - VersionInfo: p.Version, + // The Declared License is what the authors of a project believe govern the package + LicenseDeclared: license, + Originator: spdxhelpers.Originator(p), + SourceInfo: spdxhelpers.SourceInfo(p), + VersionInfo: p.Version, Item: model.Item{ - LicenseConcluded: license, // The Concluded License field is the license the SPDX file creator believes governs the package + // The Concluded License field is the license the SPDX file creator believes governs the package + LicenseConcluded: license, Element: model.Element{ SPDXID: packageSpdxID, Name: p.Name, @@ -113,16 +124,147 @@ func extractFromCatalog(catalog *pkg.Catalog) ([]model.Package, []model.File, [] }) } - return packages, files, relationships + return packages } -func cleanSPDXName(name string) string { - // remove # according to specification - name = strings.ReplaceAll(name, "#", "-") +func fileIDsForPackage(packageSpdxID string, relationships []artifact.Relationship) (fileIDs []string) { + for _, relationship := range relationships { + if relationship.Type != artifact.PackageOfRelationship { + continue + } - // remove : for url construction - name = strings.ReplaceAll(name, ":", "-") - - // clean relative pathing - return path.Clean(name) + if string(relationship.To.ID()) == packageSpdxID { + fileIDs = append(fileIDs, string(relationship.From.ID())) + } + } + return fileIDs +} + +func toFiles(s sbom.SBOM) []model.File { + results := make([]model.File, 0) + artifacts := s.Artifacts + + for _, coordinates := range sbom.AllCoordinates(s) { + var metadata *source.FileMetadata + if metadataForLocation, exists := artifacts.FileMetadata[coordinates]; exists { + metadata = &metadataForLocation + } + + var digests []file.Digest + if digestsForLocation, exists := artifacts.FileDigests[coordinates]; exists { + digests = digestsForLocation + } + + // TODO: these could make it into the document + //var classifications []file.Classification + //if classificationsForLocation, exists := artifacts.FileClassifications[coordinates]; exists { + // classifications = classificationsForLocation + //} + // + //var contents string + //if contentsForLocation, exists := artifacts.FileContents[coordinates]; exists { + // contents = contentsForLocation + //} + + var comment string + if coordinates.FileSystemID != "" { + comment = fmt.Sprintf("layerID: %s", coordinates.FileSystemID) + } + + results = append(results, model.File{ + Item: model.Item{ + Element: model.Element{ + SPDXID: string(coordinates.ID()), + // TODO: this is encoding layer id... is there a better way? + Name: filepath.Base(coordinates.RealPath), + Comment: comment, + }, + // required, no attempt made to determine license information + LicenseConcluded: "NOASSERTION", + }, + Checksums: toFileChecksums(digests), + FileName: coordinates.RealPath, + FileTypes: toFileTypes(metadata), + }) + } + + // sort by real path then virtual path to ensure the result is stable across multiple runs + sort.SliceStable(results, func(i, j int) bool { + return results[i].FileName < results[j].FileName + }) + return results +} + +func toFileChecksums(digests []file.Digest) (checksums []model.Checksum) { + for _, digest := range digests { + checksums = append(checksums, model.Checksum{ + Algorithm: digest.Algorithm, + ChecksumValue: digest.Value, + }) + } + return checksums +} + +func toFileTypes(metadata *source.FileMetadata) (ty []string) { + if metadata == nil { + return nil + } + + mimeTypePrefix := strings.Split(metadata.MIMEType, "/")[0] + switch mimeTypePrefix { + case "image": + ty = append(ty, string(model.ImageFileType)) + case "video": + ty = append(ty, string(model.VideoFileType)) + case "application": + ty = append(ty, string(model.ApplicationFileType)) + case "text": + ty = append(ty, string(model.TextFileType)) + case "audio": + ty = append(ty, string(model.AudioFileType)) + } + + if internal.IsExecutable(metadata.MIMEType) { + ty = append(ty, string(model.BinaryFileType)) + } + + if internal.IsArchive(metadata.MIMEType) { + ty = append(ty, string(model.ArchiveFileType)) + } + + // TODO: source, spdx, and documentation + if len(ty) == 0 { + ty = append(ty, string(model.OtherFileType)) + } + + return ty +} + +func toRelationships(relationships []artifact.Relationship) (result []model.Relationship) { + for _, r := range relationships { + exists, relationshipType, comment := lookupRelationship(r.Type) + + if !exists { + // TODO: should we warn about lossyness here? + continue + } + + result = append(result, model.Relationship{ + SpdxElementID: string(r.From.ID()), + RelationshipType: relationshipType, + RelatedSpdxElement: string(r.To.ID()), + Comment: comment, + }) + } + return result +} + +func lookupRelationship(ty artifact.RelationshipType) (bool, model.RelationshipType, string) { + switch ty { + case artifact.PackageOfRelationship: + return true, model.PackageOfRelationship, "" + case artifact.OwnershipByFileOverlapRelationship: + return true, model.OtherRelationship, fmt.Sprintf("%s: indicates that the parent package claims ownership of a child package since the parent metadata indicates overlap with a location that a cataloger found the child package by", ty) + } + return false, "", "" }