From 7789506dc65830c32f0bbc41e2be657f9fa473cd Mon Sep 17 00:00:00 2001 From: Keith Zantow Date: Thu, 10 Mar 2022 22:38:12 -0500 Subject: [PATCH] Experimental GitHub export (#836) --- cmd/format_aliases.go | 2 + internal/formats/github/encoder.go | 183 ++++++++++++++++++ internal/formats/github/encoder_test.go | 161 +++++++++++++++ internal/formats/github/format.go | 29 +++ .../formats/github/github_dependency_api.go | 78 ++++++++ syft/formats.go | 5 + 6 files changed, 458 insertions(+) create mode 100644 internal/formats/github/encoder.go create mode 100644 internal/formats/github/encoder_test.go create mode 100644 internal/formats/github/format.go create mode 100644 internal/formats/github/github_dependency_api.go diff --git a/cmd/format_aliases.go b/cmd/format_aliases.go index 558d13f24..375b5f657 100644 --- a/cmd/format_aliases.go +++ b/cmd/format_aliases.go @@ -22,6 +22,8 @@ func formatAliases(ids ...sbom.FormatID) (aliases []string) { aliases = append(aliases, "cyclonedx-xml") case syft.CycloneDxJSONFormatID: aliases = append(aliases, "cyclonedx-json") + case syft.GitHubID: + aliases = append(aliases, "github", "github-json") default: aliases = append(aliases, string(id)) } diff --git a/internal/formats/github/encoder.go b/internal/formats/github/encoder.go new file mode 100644 index 000000000..967c87e64 --- /dev/null +++ b/internal/formats/github/encoder.go @@ -0,0 +1,183 @@ +package github + +import ( + "fmt" + "strings" + "time" + + "github.com/mholt/archiver/v3" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/internal" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/internal/version" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +// toGithubModel converts the provided SBOM to a GitHub dependency model +func toGithubModel(s *sbom.SBOM) DependencySnapshot { + scanTime := time.Now().Format(time.RFC3339) // TODO is there a record of this somewhere? + v := version.FromBuild().Version + if v == "[not provided]" { + v = "0.0.0-dev" + } + return DependencySnapshot{ + Version: 0, + // TODO allow property input to specify the Job, Sha, and Ref + Detector: DetectorMetadata{ + Name: internal.ApplicationName, + URL: "https://github.com/anchore/syft", + Version: v, + }, + Metadata: toSnapshotMetadata(s), + Manifests: toGithubManifests(s), + Scanned: scanTime, + } +} + +// toSnapshotMetadata captures the linux distribution information and other metadata +func toSnapshotMetadata(s *sbom.SBOM) Metadata { + out := Metadata{} + + if s.Artifacts.LinuxDistribution != nil { + d := s.Artifacts.LinuxDistribution + qualifiers := packageurl.Qualifiers{} + if len(d.IDLike) > 0 { + qualifiers = append(qualifiers, packageurl.Qualifier{ + Key: "like", + Value: strings.Join(d.IDLike, ","), + }) + } + purl := packageurl.NewPackageURL("generic", "", d.ID, d.VersionID, qualifiers, "") + out["syft:distro"] = purl.ToString() + } + + return out +} + +func filesystem(p pkg.Package) string { + if len(p.Locations) > 0 { + return p.Locations[0].FileSystemID + } + return "" +} + +// isArchive returns true if the path appears to be an archive +func isArchive(path string) bool { + _, err := archiver.ByExtension(path) + return err == nil +} + +// toPath Generates a string representation of the package location, optionally including the layer hash +func toPath(s source.Metadata, p pkg.Package) string { + inputPath := strings.TrimPrefix(s.Path, "./") + if inputPath == "." { + inputPath = "" + } + if len(p.Locations) > 0 { + location := p.Locations[0] + packagePath := location.RealPath + if location.VirtualPath != "" { + packagePath = location.VirtualPath + } + packagePath = strings.TrimPrefix(packagePath, "/") + switch s.Scheme { + case source.ImageScheme: + image := strings.ReplaceAll(s.ImageMetadata.UserInput, ":/", "//") + return fmt.Sprintf("%s:/%s", image, packagePath) + case source.FileScheme: + if isArchive(inputPath) { + return fmt.Sprintf("%s:/%s", inputPath, packagePath) + } + return inputPath + case source.DirectoryScheme: + if inputPath != "" { + return fmt.Sprintf("%s/%s", inputPath, packagePath) + } + return packagePath + } + } + return fmt.Sprintf("%s%s", inputPath, s.ImageMetadata.UserInput) +} + +// toGithubManifests manifests, each of which represents a specific location that has dependencies +func toGithubManifests(s *sbom.SBOM) Manifests { + manifests := map[string]*Manifest{} + + for _, p := range s.Artifacts.PackageCatalog.Sorted() { + path := toPath(s.Source, p) + manifest, ok := manifests[path] + if !ok { + manifest = &Manifest{ + Name: path, + File: FileInfo{ + SourceLocation: path, + }, + Resolved: DependencyGraph{}, + } + fs := filesystem(p) + if fs != "" { + manifest.Metadata = Metadata{ + "syft:filesystem": fs, + } + } + manifests[path] = manifest + } + + name := dependencyName(p) + manifest.Resolved[name] = DependencyNode{ + Purl: p.PURL, + Metadata: toDependencyMetadata(p), + Relationship: toDependencyRelationshipType(p), + Scope: toDependencyScope(p), + Dependencies: toDependencies(s, p), + } + } + + out := Manifests{} + for k, v := range manifests { + out[k] = *v + } + return out +} + +// dependencyName to make things a little nicer to read; this might end up being lossy +func dependencyName(p pkg.Package) string { + purl, err := packageurl.FromString(p.PURL) + if err != nil { + log.Warnf("Invalid PURL for package: '%s' PURL: '%s' (%w)", p.Name, p.PURL, err) + return "" + } + // don't use qualifiers for this + purl.Qualifiers = nil + return purl.ToString() +} + +func toDependencyScope(_ pkg.Package) DependencyScope { + return DependencyScopeRuntime +} + +func toDependencyRelationshipType(_ pkg.Package) DependencyRelationship { + return DependencyRelationshipDirect +} + +func toDependencyMetadata(_ pkg.Package) Metadata { + // We have limited properties: up to 8 with reasonably small values + // For now, we are encoding the location as part of the key, we are encoding PURLs with most + // of the other information Grype might need; and the distro information at the top level + // so we don't need anything here yet + return Metadata{} +} + +func toDependencies(s *sbom.SBOM, p pkg.Package) (out []string) { + for _, r := range s.Relationships { + if r.From.ID() == p.ID() { + if p, ok := r.To.(pkg.Package); ok { + out = append(out, dependencyName(p)) + } + } + } + return +} diff --git a/internal/formats/github/encoder_test.go b/internal/formats/github/encoder_test.go new file mode 100644 index 000000000..a08f72ce4 --- /dev/null +++ b/internal/formats/github/encoder_test.go @@ -0,0 +1,161 @@ +package github + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/anchore/packageurl-go" + "github.com/anchore/syft/syft/linux" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/sbom" + "github.com/anchore/syft/syft/source" +) + +func Test_toGithubModel(t *testing.T) { + s := sbom.SBOM{ + Source: source.Metadata{ + Scheme: source.ImageScheme, + ImageMetadata: source.ImageMetadata{ + UserInput: "ubuntu:18.04", + Architecture: "amd64", + }, + }, + Artifacts: sbom.Artifacts{ + LinuxDistribution: &linux.Release{ + ID: "ubuntu", + VersionID: "18.04", + IDLike: []string{"debian"}, + }, + PackageCatalog: pkg.NewCatalog(), + }, + } + for _, p := range []pkg.Package{ + { + Name: "pkg-1", + Version: "1.0.1", + Locations: []source.Location{{ + Coordinates: source.Coordinates{ + RealPath: "/usr/lib", + FileSystemID: "fsid-1", + }, + }}, + }, + { + Name: "pkg-2", + Version: "2.0.2", + Locations: []source.Location{{ + Coordinates: source.Coordinates{ + RealPath: "/usr/lib", + FileSystemID: "fsid-1", + }, + }}, + }, + { + Name: "pkg-3", + Version: "3.0.3", + Locations: []source.Location{{ + Coordinates: source.Coordinates{ + RealPath: "/etc", + FileSystemID: "fsid-1", + }, + }}, + }, + } { + p.PURL = packageurl.NewPackageURL( + "generic", + "", + p.Name, + p.Version, + nil, + "", + ).ToString() + s.Artifacts.PackageCatalog.Add(p) + } + + actual := toGithubModel(&s) + + expected := DependencySnapshot{ + Version: 0, + Detector: DetectorMetadata{ + Name: "syft", + Version: "0.0.0-dev", + URL: "https://github.com/anchore/syft", + }, + Metadata: Metadata{ + "syft:distro": "pkg:generic/ubuntu@18.04?like=debian", + }, + Scanned: actual.Scanned, + Manifests: Manifests{ + "ubuntu:18.04:/usr/lib": Manifest{ + Name: "ubuntu:18.04:/usr/lib", + File: FileInfo{ + SourceLocation: "ubuntu:18.04:/usr/lib", + }, + Metadata: Metadata{ + "syft:filesystem": "fsid-1", + }, + Resolved: DependencyGraph{ + "pkg:generic/pkg-1@1.0.1": DependencyNode{ + Purl: "pkg:generic/pkg-1@1.0.1", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + }, + "pkg:generic/pkg-2@2.0.2": DependencyNode{ + Purl: "pkg:generic/pkg-2@2.0.2", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + }, + }, + }, + "ubuntu:18.04:/etc": Manifest{ + Name: "ubuntu:18.04:/etc", + File: FileInfo{ + SourceLocation: "ubuntu:18.04:/etc", + }, + Metadata: Metadata{ + "syft:filesystem": "fsid-1", + }, + Resolved: DependencyGraph{ + "pkg:generic/pkg-3@3.0.3": DependencyNode{ + Purl: "pkg:generic/pkg-3@3.0.3", + Scope: DependencyScopeRuntime, + Relationship: DependencyRelationshipDirect, + }, + }, + }, + }, + } + + // just using JSONEq because it gives a comprehensible diff + s1, _ := json.Marshal(expected) + s2, _ := json.Marshal(actual) + assert.JSONEq(t, string(s1), string(s2)) + + // Just test the other schemes: + s.Source.Path = "." + s.Source.Scheme = source.DirectoryScheme + actual = toGithubModel(&s) + assert.Equal(t, "etc", actual.Manifests["etc"].Name) + + s.Source.Path = "./artifacts" + s.Source.Scheme = source.DirectoryScheme + actual = toGithubModel(&s) + assert.Equal(t, "artifacts/etc", actual.Manifests["artifacts/etc"].Name) + + s.Source.Path = "/artifacts" + s.Source.Scheme = source.DirectoryScheme + actual = toGithubModel(&s) + assert.Equal(t, "/artifacts/etc", actual.Manifests["/artifacts/etc"].Name) + + s.Source.Path = "./executable" + s.Source.Scheme = source.FileScheme + actual = toGithubModel(&s) + assert.Equal(t, "executable", actual.Manifests["executable"].Name) + + s.Source.Path = "./archive.tar.gz" + s.Source.Scheme = source.FileScheme + actual = toGithubModel(&s) + assert.Equal(t, "archive.tar.gz:/etc", actual.Manifests["archive.tar.gz:/etc"].Name) +} diff --git a/internal/formats/github/format.go b/internal/formats/github/format.go new file mode 100644 index 000000000..fad5dbff1 --- /dev/null +++ b/internal/formats/github/format.go @@ -0,0 +1,29 @@ +package github + +import ( + "encoding/json" + "io" + + "github.com/anchore/syft/syft/sbom" +) + +const ID sbom.FormatID = "github-0-json" + +func Format() sbom.Format { + return sbom.NewFormat( + ID, + func(writer io.Writer, sbom sbom.SBOM) error { + bom := toGithubModel(&sbom) + + bytes, err := json.MarshalIndent(bom, "", " ") + if err != nil { + return err + } + _, err = writer.Write(bytes) + + return err + }, + nil, + nil, + ) +} diff --git a/internal/formats/github/github_dependency_api.go b/internal/formats/github/github_dependency_api.go new file mode 100644 index 000000000..867434190 --- /dev/null +++ b/internal/formats/github/github_dependency_api.go @@ -0,0 +1,78 @@ +package github + +// Derived from: https://gist.github.com/reiddraper/fdab2883db0f372c146d1a750fc1c43f + +type DependencySnapshot struct { + Version int `json:"version"` + Job Job `json:"job,omitempty"` // !omitempty + Sha string `json:"sha,omitempty"` // !omitempty sha of the Git commit + Ref string `json:"ref,omitempty"` // !omitempty ref of the Git commit example "refs/heads/main" + Detector DetectorMetadata `json:"detector,omitempty"` + Metadata Metadata `json:"metadata,omitempty"` + Manifests Manifests `json:"manifests,omitempty"` + Scanned ISO8601Date `json:"scanned,omitempty"` +} + +type Job struct { + Name string `json:"name,omitempty"` // !omitempty + ID string `json:"id,omitempty"` // !omitempty + HTMLURL string `json:"html_url,omitempty"` +} + +type DetectorMetadata struct { + Name string `json:"name,omitempty"` + URL string `json:"url,omitempty"` + Version string `json:"version,omitempty"` +} + +type Manifests map[string]Manifest + +// Manifest A collection of related dependencies, either declared in a file, +// or representing a logical group of dependencies. +type Manifest struct { + Name string `json:"name"` + File FileInfo `json:"file"` + Metadata Metadata `json:"metadata,omitempty"` + Resolved DependencyGraph `json:"resolved,omitempty"` +} + +type FileInfo struct { + SourceLocation string `json:"source_location,omitempty"` +} + +// DependencyRelationship A notation of whether a dependency is requested directly +// by this manifest, or is a dependency of another dependency. +type DependencyRelationship string + +const ( + DependencyRelationshipDirect DependencyRelationship = "direct" + DependencyRelationshipIndirect DependencyRelationship = "indirect" +) + +// DependencyScope A notation of whether the dependency is required for the primary +// build artifact (runtime), or is only used for development. +// Future versions of this specification may allow for more granular +// scopes, like `runtimeserver`, `runtimeshipped`, +// `developmenttest`, `developmentbenchmark`. +type DependencyScope string + +const ( + DependencyScopeRuntime DependencyScope = "runtime" + DependencyScopeDevelopment DependencyScope = "development" +) + +type DependencyNode struct { + Purl string `json:"purl,omitempty"` + Metadata Metadata `json:"metadata,omitempty"` + Relationship DependencyRelationship `json:"relationship,omitempty"` + Scope DependencyScope `json:"scope,omitempty"` + Dependencies []string `json:"dependencies,omitempty"` +} + +type DependencyGraph map[string]DependencyNode + +type ISO8601Date = string + +type Scalar interface{} // should be: null | boolean | string | number + +type Metadata map[string]Scalar diff --git a/syft/formats.go b/syft/formats.go index 1e8a44983..8623848df 100644 --- a/syft/formats.go +++ b/syft/formats.go @@ -6,6 +6,7 @@ import ( "github.com/anchore/syft/internal/formats/cyclonedxjson" "github.com/anchore/syft/internal/formats/cyclonedxxml" + "github.com/anchore/syft/internal/formats/github" "github.com/anchore/syft/internal/formats/spdx22json" "github.com/anchore/syft/internal/formats/spdx22tagvalue" "github.com/anchore/syft/internal/formats/syftjson" @@ -21,6 +22,7 @@ const ( TableFormatID = table.ID CycloneDxXMLFormatID = cyclonedxxml.ID CycloneDxJSONFormatID = cyclonedxjson.ID + GitHubID = github.ID SPDXTagValueFormatID = spdx22tagvalue.ID SPDXJSONFormatID = spdx22json.ID ) @@ -32,6 +34,7 @@ func init() { syftjson.Format(), cyclonedxxml.Format(), cyclonedxjson.Format(), + github.Format(), spdx22tagvalue.Format(), spdx22json.Format(), table.Format(), @@ -71,6 +74,8 @@ func FormatByName(name string) sbom.Format { return FormatByID(cyclonedxxml.ID) case "cyclonedxjson": return FormatByID(cyclonedxjson.ID) + case "github": + return FormatByID(github.ID) case "spdx", "spdxtv", "spdxtagvalue": return FormatByID(spdx22tagvalue.ID) case "spdxjson":