mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 02:26:42 +01:00
feat: Add support for scanning GGUF models from OCI registries (#4335)
--------- Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
3a23cfff1d
commit
2c5e193f7a
@ -59,7 +59,9 @@ const (
|
||||
nonImageSchemeHelp = ` {{.appName}} {{.command}} dir:path/to/yourproject read directly from a path on disk (any directory)
|
||||
{{.appName}} {{.command}} file:path/to/yourproject/file read directly from a path on disk (any single file)
|
||||
`
|
||||
scanSchemeHelp = "\n " + schemeHelpHeader + "\n" + imageSchemeHelp + nonImageSchemeHelp
|
||||
modelSchemeHelp = ` {{.appName}} {{.command}} oci-model-registry:ai/llama3.2 scan an OCI model artifact from a registry (e.g. Docker Hub AI models)
|
||||
`
|
||||
scanSchemeHelp = "\n " + schemeHelpHeader + "\n" + imageSchemeHelp + modelSchemeHelp + nonImageSchemeHelp
|
||||
|
||||
scanHelp = scanExample + scanSchemeHelp
|
||||
)
|
||||
|
||||
@ -3,10 +3,12 @@ package internal
|
||||
const (
|
||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||
JSONSchemaVersion = "16.1.2"
|
||||
JSONSchemaVersion = "16.1.3"
|
||||
|
||||
// Changelog
|
||||
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).
|
||||
// 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field)
|
||||
// 16.1.2 - placeholder for 16.1.2 changelog
|
||||
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
|
||||
|
||||
)
|
||||
|
||||
@ -82,12 +82,29 @@ func assembleTypeContainer(items []any) (any, map[string]string) {
|
||||
return reflect.New(structType).Elem().Interface(), mapping
|
||||
}
|
||||
|
||||
//nolint:funlen
|
||||
func build() *jsonschema.Schema {
|
||||
// create metadata mapping first so we can use it in the Namer function for self-referential types
|
||||
pkgMetadataContainer, pkgMetadataMapping := assembleTypeContainer(packagemetadata.AllTypes())
|
||||
pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer)
|
||||
|
||||
// create a set of valid metadata display names for lookup
|
||||
// (since Namer now returns display names, the schema definitions use display names as keys)
|
||||
pkgMetadataDisplayNames := make(map[string]struct{}, len(pkgMetadataMapping))
|
||||
for _, displayName := range pkgMetadataMapping {
|
||||
pkgMetadataDisplayNames[displayName] = struct{}{}
|
||||
}
|
||||
|
||||
reflector := &jsonschema.Reflector{
|
||||
BaseSchemaID: schemaID(),
|
||||
AllowAdditionalProperties: true,
|
||||
Namer: func(r reflect.Type) string {
|
||||
return strings.TrimPrefix(r.Name(), "JSON")
|
||||
name := strings.TrimPrefix(r.Name(), "JSON")
|
||||
// if this is a metadata type, use the mapped name for consistent references
|
||||
if mappedName, ok := pkgMetadataMapping[name]; ok {
|
||||
return mappedName
|
||||
}
|
||||
return name
|
||||
},
|
||||
CommentMap: make(map[string]string),
|
||||
}
|
||||
@ -123,9 +140,6 @@ func build() *jsonschema.Schema {
|
||||
copyAliasFieldComments(reflector.CommentMap, repoRoot)
|
||||
}
|
||||
|
||||
pkgMetadataContainer, pkgMetadataMapping := assembleTypeContainer(packagemetadata.AllTypes())
|
||||
pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer)
|
||||
|
||||
// srcMetadataContainer := assembleTypeContainer(sourcemetadata.AllTypes())
|
||||
// srcMetadataContainerType := reflect.TypeOf(srcMetadataContainer)
|
||||
|
||||
@ -144,11 +158,10 @@ func build() *jsonschema.Schema {
|
||||
continue
|
||||
}
|
||||
|
||||
displayName, ok := pkgMetadataMapping[typeName]
|
||||
if ok {
|
||||
// this is a package metadata type...
|
||||
documentSchema.Definitions[displayName] = definition
|
||||
metadataNames = append(metadataNames, displayName)
|
||||
if _, ok := pkgMetadataDisplayNames[typeName]; ok {
|
||||
// this is a package metadata type (typeName is already the display name from Namer)
|
||||
documentSchema.Definitions[typeName] = definition
|
||||
metadataNames = append(metadataNames, typeName)
|
||||
} else {
|
||||
// this is a type that the metadata type uses (e.g. DpkgFileRecord)
|
||||
documentSchema.Definitions[typeName] = definition
|
||||
|
||||
@ -26,6 +26,7 @@ var knownNonMetadataTypeNames = strset.New(
|
||||
// known to be metadata types themselves. Adding to this list will prevent the removal of the type from the schema.
|
||||
var knownMetadataTypeNames = strset.New(
|
||||
"DotnetPortableExecutableEntry",
|
||||
"GGUFFileHeader",
|
||||
)
|
||||
|
||||
func DiscoverTypeNames() ([]string, error) {
|
||||
|
||||
@ -94,25 +94,33 @@ func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error)
|
||||
|
||||
// loop over all types declared in the type declaration
|
||||
for _, typ := range spec.Specs {
|
||||
// check if the type is a struct type
|
||||
spec, ok := typ.(*ast.TypeSpec)
|
||||
if !ok || spec.Type == nil {
|
||||
typeSpec, ok := typ.(*ast.TypeSpec)
|
||||
if !ok || typeSpec.Type == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
structType, ok := spec.Type.(*ast.StructType)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// check if the struct type ends with "Metadata"
|
||||
name := spec.Name.String()
|
||||
name := typeSpec.Name.String()
|
||||
|
||||
// only look for exported types that end with "Metadata"
|
||||
if isMetadataTypeCandidate(name) {
|
||||
// print the full declaration of the struct type
|
||||
if !isMetadataTypeCandidate(name) {
|
||||
continue
|
||||
}
|
||||
|
||||
metadataDefinitions = append(metadataDefinitions, name)
|
||||
|
||||
// handle struct types (e.g., "type FooMetadata struct {...}")
|
||||
if structType, ok := typeSpec.Type.(*ast.StructType); ok {
|
||||
usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...)
|
||||
continue
|
||||
}
|
||||
|
||||
// handle type definitions from another type (e.g., "type FooMetadata BarMetadata")
|
||||
// if the base type is NOT a metadata candidate, track it as used
|
||||
// (e.g., we want both ImageMetadata and OCIModelMetadata which is an alias to it)
|
||||
if ident, ok := typeSpec.Type.(*ast.Ident); ok {
|
||||
if !isMetadataTypeCandidate(ident.Name) {
|
||||
usedTypeNames = append(usedTypeNames, ident.Name)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,5 +6,5 @@ import "github.com/anchore/syft/syft/source"
|
||||
|
||||
// AllTypes returns a list of all source metadata types that syft supports (that are represented in the source.Description.Metadata field).
|
||||
func AllTypes() []any {
|
||||
return []any{source.DirectoryMetadata{}, source.FileMetadata{}, source.ImageMetadata{}, source.SnapMetadata{}}
|
||||
return []any{source.DirectoryMetadata{}, source.FileMetadata{}, source.ImageMetadata{}, source.OCIModelMetadata{}, source.SnapMetadata{}}
|
||||
}
|
||||
|
||||
@ -12,6 +12,7 @@ var jsonNameFromType = map[reflect.Type][]string{
|
||||
reflect.TypeOf(source.FileMetadata{}): {"file"},
|
||||
reflect.TypeOf(source.ImageMetadata{}): {"image"},
|
||||
reflect.TypeOf(source.SnapMetadata{}): {"snap"},
|
||||
reflect.TypeOf(source.OCIModelMetadata{}): {"oci-model"},
|
||||
}
|
||||
|
||||
func AllTypeNames() []string {
|
||||
|
||||
4248
schema/json/schema-16.1.3.json
Normal file
4248
schema/json/schema-16.1.3.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "anchore.io/schema/syft/json/16.1.2/document",
|
||||
"$id": "anchore.io/schema/syft/json/16.1.3/document",
|
||||
"$ref": "#/$defs/Document",
|
||||
"$defs": {
|
||||
"AlpmDbEntry": {
|
||||
@ -1478,6 +1478,13 @@
|
||||
"metadataHash": {
|
||||
"type": "string",
|
||||
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
||||
},
|
||||
"parts": {
|
||||
"items": {
|
||||
"$ref": "#/$defs/GgufFileHeader"
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Parts contains headers from additional GGUF files that were merged\ninto this package during post-processing (e.g., from OCI layers without model names)."
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
|
||||
@ -477,7 +477,7 @@ func (c *CreateSBOMConfig) Create(ctx context.Context, src source.Source) (*sbom
|
||||
|
||||
func findDefaultTags(src source.Description) ([]string, error) {
|
||||
switch m := src.Metadata.(type) {
|
||||
case source.ImageMetadata:
|
||||
case source.ImageMetadata, source.OCIModelMetadata:
|
||||
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||
case source.FileMetadata, source.DirectoryMetadata:
|
||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||
|
||||
@ -13,6 +13,7 @@ import (
|
||||
)
|
||||
|
||||
var _ Resolver = (*MockResolver)(nil)
|
||||
var _ OCIMediaTypeResolver = (*MockResolver)(nil)
|
||||
|
||||
// MockResolver implements the FileResolver interface and is intended for use *only in test code*.
|
||||
// It provides an implementation that can resolve local filesystem paths using only a provided discrete list of file
|
||||
@ -21,6 +22,7 @@ type MockResolver struct {
|
||||
locations []Location
|
||||
metadata map[Coordinates]Metadata
|
||||
mimeTypeIndex map[string][]Location
|
||||
mediaTypeIndex map[string][]Location
|
||||
extension map[string][]Location
|
||||
basename map[string][]Location
|
||||
}
|
||||
@ -72,6 +74,34 @@ func NewMockResolverForPathsWithMetadata(metadata map[Coordinates]Metadata) *Moc
|
||||
}
|
||||
}
|
||||
|
||||
// NewMockResolverForMediaTypes creates a MockResolver that can resolve files by media type.
|
||||
// The mediaTypes map specifies which locations should be returned for each media type.
|
||||
func NewMockResolverForMediaTypes(mediaTypes map[string][]Location) *MockResolver {
|
||||
var locations []Location
|
||||
mediaTypeIndex := make(map[string][]Location)
|
||||
extension := make(map[string][]Location)
|
||||
basename := make(map[string][]Location)
|
||||
|
||||
for mediaType, locs := range mediaTypes {
|
||||
mediaTypeIndex[mediaType] = append(mediaTypeIndex[mediaType], locs...)
|
||||
for _, l := range locs {
|
||||
locations = append(locations, l)
|
||||
ext := path.Ext(l.RealPath)
|
||||
extension[ext] = append(extension[ext], l)
|
||||
bn := path.Base(l.RealPath)
|
||||
basename[bn] = append(basename[bn], l)
|
||||
}
|
||||
}
|
||||
|
||||
return &MockResolver{
|
||||
locations: locations,
|
||||
metadata: make(map[Coordinates]Metadata),
|
||||
mediaTypeIndex: mediaTypeIndex,
|
||||
extension: extension,
|
||||
basename: basename,
|
||||
}
|
||||
}
|
||||
|
||||
// HasPath indicates if the given path exists in the underlying source.
|
||||
func (r MockResolver) HasPath(path string) bool {
|
||||
for _, l := range r.locations {
|
||||
@ -189,6 +219,14 @@ func (r MockResolver) FilesByMIMEType(types ...string) ([]Location, error) {
|
||||
return locations, nil
|
||||
}
|
||||
|
||||
func (r MockResolver) FilesByMediaType(types ...string) ([]Location, error) {
|
||||
var locations []Location
|
||||
for _, ty := range types {
|
||||
locations = append(locations, r.mediaTypeIndex[ty]...)
|
||||
}
|
||||
return locations, nil
|
||||
}
|
||||
|
||||
func (r MockResolver) FilesByExtension(extensions ...string) ([]Location, error) {
|
||||
var results []Location
|
||||
for _, ext := range extensions {
|
||||
|
||||
@ -52,6 +52,17 @@ type PathResolver interface {
|
||||
RelativeFileByPath(_ Location, path string) *Location
|
||||
}
|
||||
|
||||
// OCIMediaTypeResolver resolves single files as a layer in an OCI artifact for a given media type.
|
||||
type OCIMediaTypeResolver interface {
|
||||
// FilesByMediaType fetches a set of file references which the contents have been classified as one of the given Media Types.
|
||||
// The implementation for this may vary, however, this was first implemented to classify ai globs stored in OCI images.
|
||||
// The following considerations should be made when implementing:
|
||||
// - only return locations to files (NOT directories)
|
||||
// - locations for the implementer should be "/" and the fsid should be the layer digest the glob was found
|
||||
// - locations should be used with the FileContents API to return readers to the temporary data
|
||||
FilesByMediaType(types ...string) ([]Location, error)
|
||||
}
|
||||
|
||||
// LocationResolver provides iteration over all file locations in a source.
|
||||
type LocationResolver interface {
|
||||
// AllLocations returns a channel of all file references from the underlying source.
|
||||
|
||||
@ -35,6 +35,7 @@ const (
|
||||
spdxPrimaryPurposeOther = "OTHER"
|
||||
|
||||
prefixImage = "Image"
|
||||
prefixOCIModel = "OCIModel"
|
||||
prefixDirectory = "Directory"
|
||||
prefixFile = "File"
|
||||
prefixSnap = "Snap"
|
||||
@ -215,6 +216,36 @@ func toRootPackage(s source.Description) *spdx.Package {
|
||||
}
|
||||
}
|
||||
|
||||
case source.OCIModelMetadata:
|
||||
prefix = prefixOCIModel
|
||||
purpose = spdxPrimaryPurposeContainer
|
||||
|
||||
qualifiers := packageurl.Qualifiers{
|
||||
{
|
||||
Key: "arch",
|
||||
Value: m.Architecture,
|
||||
},
|
||||
}
|
||||
|
||||
ref, _ := reference.Parse(m.UserInput)
|
||||
if ref, ok := ref.(reference.NamedTagged); ok {
|
||||
qualifiers = append(qualifiers, packageurl.Qualifier{
|
||||
Key: "tag",
|
||||
Value: ref.Tag(),
|
||||
})
|
||||
}
|
||||
|
||||
c := toChecksum(m.ManifestDigest)
|
||||
if c != nil {
|
||||
checksums = append(checksums, *c)
|
||||
purl = &packageurl.PackageURL{
|
||||
Type: "oci",
|
||||
Name: s.Name,
|
||||
Version: m.ManifestDigest,
|
||||
Qualifiers: qualifiers,
|
||||
}
|
||||
}
|
||||
|
||||
case source.DirectoryMetadata:
|
||||
prefix = prefixDirectory
|
||||
purpose = spdxPrimaryPurposeFile
|
||||
|
||||
@ -316,6 +316,81 @@ func Test_toFormatModel(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
in: sbom.SBOM{
|
||||
Source: source.Description{
|
||||
Name: "llama",
|
||||
Version: "sha256:d34db33f",
|
||||
Supplier: "Model Provider",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "model-repo/llama:latest",
|
||||
ManifestDigest: "sha256:d34db33f",
|
||||
},
|
||||
},
|
||||
Artifacts: sbom.Artifacts{
|
||||
Packages: pkg.NewCollection(pkg.Package{
|
||||
Name: "pkg-1",
|
||||
Version: "version-1",
|
||||
}),
|
||||
},
|
||||
},
|
||||
expected: &spdx.Document{
|
||||
SPDXIdentifier: "DOCUMENT",
|
||||
SPDXVersion: spdx.Version,
|
||||
DataLicense: spdx.DataLicense,
|
||||
DocumentName: "llama",
|
||||
Packages: []*spdx.Package{
|
||||
{
|
||||
PackageSPDXIdentifier: "Package-pkg-1-pkg-1",
|
||||
PackageName: "pkg-1",
|
||||
PackageVersion: "version-1",
|
||||
PackageSupplier: &spdx.Supplier{
|
||||
Supplier: "Model Provider",
|
||||
SupplierType: "Organization",
|
||||
},
|
||||
},
|
||||
{
|
||||
PackageSPDXIdentifier: "DocumentRoot-OCIModel-llama",
|
||||
PackageName: "llama",
|
||||
PackageVersion: "sha256:d34db33f",
|
||||
PrimaryPackagePurpose: "CONTAINER",
|
||||
PackageChecksums: []spdx.Checksum{{Algorithm: "SHA256", Value: "d34db33f"}},
|
||||
PackageExternalReferences: []*v2_3.PackageExternalReference{
|
||||
{
|
||||
Category: "PACKAGE-MANAGER",
|
||||
RefType: "purl",
|
||||
Locator: "pkg:oci/llama@sha256%3Ad34db33f?arch=&tag=latest",
|
||||
},
|
||||
},
|
||||
PackageSupplier: &spdx.Supplier{
|
||||
Supplier: "Model Provider",
|
||||
SupplierType: "Organization",
|
||||
},
|
||||
},
|
||||
},
|
||||
Relationships: []*spdx.Relationship{
|
||||
{
|
||||
RefA: spdx.DocElementID{
|
||||
ElementRefID: "DocumentRoot-OCIModel-llama",
|
||||
},
|
||||
RefB: spdx.DocElementID{
|
||||
ElementRefID: "Package-pkg-1-pkg-1",
|
||||
},
|
||||
Relationship: spdx.RelationshipContains,
|
||||
},
|
||||
{
|
||||
RefA: spdx.DocElementID{
|
||||
ElementRefID: "DOCUMENT",
|
||||
},
|
||||
RefB: spdx.DocElementID{
|
||||
ElementRefID: "DocumentRoot-OCIModel-llama",
|
||||
},
|
||||
Relationship: spdx.RelationshipDescribes,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
@ -122,6 +122,9 @@ func toPath(s source.Description, p pkg.Package) string {
|
||||
case source.ImageMetadata:
|
||||
image := strings.ReplaceAll(metadata.UserInput, ":/", "//")
|
||||
return fmt.Sprintf("%s:/%s", image, packagePath)
|
||||
case source.OCIModelMetadata:
|
||||
image := strings.ReplaceAll(metadata.UserInput, ":/", "//")
|
||||
return fmt.Sprintf("%s:/%s", image, packagePath)
|
||||
case source.FileMetadata:
|
||||
path := trimRelative(metadata.Path)
|
||||
if isArchive(metadata.Path) {
|
||||
|
||||
@ -178,6 +178,11 @@ func Test_toGithubModel(t *testing.T) {
|
||||
metadata: source.SnapMetadata{},
|
||||
testPath: "name:/etc",
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
metadata: source.OCIModelMetadata{UserInput: "model-repo/llama:latest"},
|
||||
testPath: "model-repo/llama:latest:/etc",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
@ -12,6 +12,8 @@ func DocumentName(src source.Description) string {
|
||||
switch metadata := src.Metadata.(type) {
|
||||
case source.ImageMetadata:
|
||||
return metadata.UserInput
|
||||
case source.OCIModelMetadata:
|
||||
return metadata.UserInput
|
||||
case source.DirectoryMetadata:
|
||||
return metadata.Path
|
||||
case source.FileMetadata:
|
||||
|
||||
@ -54,6 +54,17 @@ func Test_DocumentName(t *testing.T) {
|
||||
},
|
||||
expected: "some/name",
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
srcMetadata: source.Description{
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "model-repo/name:tag",
|
||||
ID: "id",
|
||||
ManifestDigest: "digest",
|
||||
},
|
||||
},
|
||||
expected: "model-repo/name:tag",
|
||||
},
|
||||
{
|
||||
name: "named",
|
||||
srcMetadata: source.Description{
|
||||
|
||||
@ -14,6 +14,7 @@ import (
|
||||
|
||||
const (
|
||||
InputImage = "image"
|
||||
InputOCIModel = "oci-model"
|
||||
InputDirectory = "dir"
|
||||
InputFile = "file"
|
||||
InputSnap = "snap"
|
||||
@ -30,6 +31,8 @@ func DocumentNamespace(name string, src source.Description, desc sbom.Descriptor
|
||||
switch src.Metadata.(type) {
|
||||
case source.ImageMetadata:
|
||||
input = InputImage
|
||||
case source.OCIModelMetadata:
|
||||
input = InputOCIModel
|
||||
case source.DirectoryMetadata:
|
||||
input = InputDirectory
|
||||
case source.FileMetadata:
|
||||
|
||||
@ -61,6 +61,18 @@ func Test_DocumentNamespace(t *testing.T) {
|
||||
},
|
||||
expected: "https://anchore.com/syft/snap/my-name-",
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
inputName: "my-name",
|
||||
src: source.Description{
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "model-repo/name:tag",
|
||||
ID: "id",
|
||||
ManifestDigest: "digest",
|
||||
},
|
||||
},
|
||||
expected: "https://anchore.com/syft/oci-model/my-name-",
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
@ -83,7 +83,7 @@ func SourceInfo(p pkg.Package) string {
|
||||
case pkg.TerraformPkg:
|
||||
answer = "acquired package info from Terraform dependency lock file"
|
||||
case pkg.ModelPkg:
|
||||
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||
answer = "acquired package info from AI artifact (e.g. GGUF File)"
|
||||
default:
|
||||
answer = "acquired package info from the following paths"
|
||||
}
|
||||
|
||||
@ -190,6 +190,37 @@ func TestSource_UnmarshalJSON(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
input: []byte(`{
|
||||
"id": "foobar",
|
||||
"type": "oci-model",
|
||||
"metadata": {
|
||||
"userInput": "model-repo/llama:latest",
|
||||
"imageID": "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a",
|
||||
"manifestDigest": "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c",
|
||||
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||
"tags": [],
|
||||
"imageSize": 5576169,
|
||||
"layers": [],
|
||||
"repoDigests": []
|
||||
}
|
||||
}`),
|
||||
expected: &Source{
|
||||
ID: "foobar",
|
||||
Type: "oci-model",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "model-repo/llama:latest",
|
||||
ID: "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a",
|
||||
ManifestDigest: "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c",
|
||||
MediaType: "application/vnd.oci.image.manifest.v1+json",
|
||||
Tags: []string{},
|
||||
Size: 5576169,
|
||||
Layers: []source.LayerMetadata{},
|
||||
RepoDigests: []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "unknown source type",
|
||||
input: []byte(`{
|
||||
|
||||
@ -325,7 +325,17 @@ func toSourceModel(src source.Description) model.Source {
|
||||
Metadata: src.Metadata,
|
||||
}
|
||||
|
||||
if metadata, ok := src.Metadata.(source.ImageMetadata); ok {
|
||||
switch metadata := src.Metadata.(type) {
|
||||
case source.ImageMetadata:
|
||||
// ensure that empty collections are not shown as null
|
||||
if metadata.RepoDigests == nil {
|
||||
metadata.RepoDigests = []string{}
|
||||
}
|
||||
if metadata.Tags == nil {
|
||||
metadata.Tags = []string{}
|
||||
}
|
||||
m.Metadata = metadata
|
||||
case source.OCIModelMetadata:
|
||||
// ensure that empty collections are not shown as null
|
||||
if metadata.RepoDigests == nil {
|
||||
metadata.RepoDigests = []string{}
|
||||
|
||||
@ -161,6 +161,34 @@ func Test_toSourceModel(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
src: source.Description{
|
||||
ID: "test-id",
|
||||
Name: "some-name",
|
||||
Version: "some-version",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
expected: model.Source{
|
||||
ID: "test-id",
|
||||
Name: "some-name",
|
||||
Version: "some-version",
|
||||
Type: "oci-model",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
RepoDigests: []string{},
|
||||
Tags: []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
// below are regression tests for when the name/version are not provided
|
||||
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
||||
{
|
||||
@ -225,6 +253,30 @@ func Test_toSourceModel(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model - no name/version",
|
||||
src: source.Description{
|
||||
ID: "test-id",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
expected: model.Source{
|
||||
ID: "test-id",
|
||||
Type: "oci-model",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
RepoDigests: []string{},
|
||||
Tags: []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
@ -130,6 +130,32 @@ func Test_toSyftSourceData(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
src: model.Source{
|
||||
ID: "the-id",
|
||||
Name: "some-name",
|
||||
Version: "some-version",
|
||||
Type: "oci-model",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
expected: &source.Description{
|
||||
ID: "the-id",
|
||||
Name: "some-name",
|
||||
Version: "some-version",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
},
|
||||
// below are regression tests for when the name/version are not provided
|
||||
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
||||
{
|
||||
@ -192,6 +218,28 @@ func Test_toSyftSourceData(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "oci-model - no name/version",
|
||||
src: model.Source{
|
||||
ID: "the-id",
|
||||
Type: "oci-model",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
expected: &source.Description{
|
||||
ID: "the-id",
|
||||
Metadata: source.OCIModelMetadata{
|
||||
UserInput: "user-input",
|
||||
ID: "id...",
|
||||
ManifestDigest: "digest...",
|
||||
MediaType: "type...",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
@ -65,6 +65,8 @@ func validateSourcePlatform(src source.Source, cfg *GetSourceConfig) error {
|
||||
switch meta.(type) {
|
||||
case *source.ImageMetadata, source.ImageMetadata:
|
||||
return nil
|
||||
case *source.OCIModelMetadata, source.OCIModelMetadata:
|
||||
return nil
|
||||
case *source.SnapMetadata, source.SnapMetadata:
|
||||
return nil
|
||||
default:
|
||||
|
||||
@ -31,7 +31,8 @@ func TestGetProviders_Sources(t *testing.T) {
|
||||
t.Errorf("Expected no error for Sources parameter, got: %v", err)
|
||||
}
|
||||
|
||||
if len(providers) != 1 {
|
||||
t.Errorf("Expected 1 providers, got %d", len(providers))
|
||||
// Registry tag has two providers: OCIModel and Image
|
||||
if len(providers) != 2 {
|
||||
t.Errorf("Expected 2 providers, got %d", len(providers))
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,6 +111,10 @@ func TestValidateSourcePlatform_SupportedMetadataTypes(t *testing.T) {
|
||||
metadata: source.FileMetadata{},
|
||||
wantErr: require.Error,
|
||||
},
|
||||
{
|
||||
name: "oci-model",
|
||||
metadata: source.OCIModelMetadata{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
141
syft/internal/fileresolver/container_image_model.go
Normal file
141
syft/internal/fileresolver/container_image_model.go
Normal file
@ -0,0 +1,141 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
)
|
||||
|
||||
var _ file.Resolver = (*ContainerImageModel)(nil)
|
||||
var _ file.OCIMediaTypeResolver = (*ContainerImageModel)(nil)
|
||||
|
||||
// LayerInfo holds information about an OCI model layer file stored on disk.
|
||||
type LayerInfo struct {
|
||||
TempPath string // Path to the temp file on disk
|
||||
MediaType string // OCI media type of the layer
|
||||
}
|
||||
|
||||
// ContainerImageModel is a file.Resolver implementation that provides access to
|
||||
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||
// This does not fetch the entire model from the registry, only a sliver of it.
|
||||
type ContainerImageModel struct {
|
||||
tempDir string // temp directory containing all layer files
|
||||
layerFiles map[string]LayerInfo // digest -> layer info (temp path + media type)
|
||||
locations map[string]file.Location // digest -> location
|
||||
}
|
||||
|
||||
// NewContainerImageModel creates a new resolver with the given temp directory and layer files.
|
||||
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo) *ContainerImageModel {
|
||||
// Create locations for all layer files
|
||||
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"
|
||||
locations := make(map[string]file.Location, len(layerFiles))
|
||||
for digest := range layerFiles {
|
||||
// Use NewVirtualLocationFromCoordinates with digest as FileSystemID
|
||||
coords := file.NewCoordinates("/", digest)
|
||||
locations[digest] = file.NewVirtualLocationFromCoordinates(coords, "/")
|
||||
}
|
||||
|
||||
return &ContainerImageModel{
|
||||
tempDir: tempDir,
|
||||
layerFiles: layerFiles,
|
||||
locations: locations,
|
||||
}
|
||||
}
|
||||
|
||||
// FilesByMediaType returns locations for layers matching the given media type patterns.
|
||||
// Patterns support glob-style matching (e.g., "application/vnd.docker.ai*").
|
||||
func (r *ContainerImageModel) FilesByMediaType(types ...string) ([]file.Location, error) {
|
||||
var matches []file.Location
|
||||
|
||||
for digest, info := range r.layerFiles {
|
||||
for _, pattern := range types {
|
||||
matched, err := filepath.Match(pattern, info.MediaType)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid media type pattern %q: %w", pattern, err)
|
||||
}
|
||||
if matched {
|
||||
if loc, ok := r.locations[digest]; ok {
|
||||
matches = append(matches, loc)
|
||||
}
|
||||
break // Don't add the same location twice
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return matches, nil
|
||||
}
|
||||
|
||||
// FileContentsByLocation returns the contents of the file at the given location.
|
||||
// The location's FileSystemID contains the layer digest, which is used to look up the temp file.
|
||||
// This method is used as part of the content selection in the generic cataloger when locations
|
||||
// are returned by searching for contents by media type.
|
||||
func (r *ContainerImageModel) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||
// Look up the temp file path using the digest stored in FileSystemID
|
||||
digest := location.FileSystemID
|
||||
info, ok := r.layerFiles[digest]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no file found for digest %q", digest)
|
||||
}
|
||||
return os.Open(info.TempPath)
|
||||
}
|
||||
|
||||
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||
func (r *ContainerImageModel) FileMetadataByLocation(_ file.Location) (m file.Metadata, err error) {
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// HasPath checks if the given path exists in the resolver.
|
||||
func (r *ContainerImageModel) HasPath(path string) bool {
|
||||
// The virtual path is "/" for all files
|
||||
if path == "/" && len(r.layerFiles) > 0 {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// FilesByPath returns locations for files matching the given paths.
|
||||
func (r *ContainerImageModel) FilesByPath(_ ...string) ([]file.Location, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||
func (r *ContainerImageModel) FilesByGlob(_ ...string) ([]file.Location, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||
func (r *ContainerImageModel) FilesByMIMEType(_ ...string) ([]file.Location, error) {
|
||||
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||
// This is not applicable for OCI model artifacts.
|
||||
func (r *ContainerImageModel) RelativeFileByPath(_ file.Location, _ string) *file.Location {
|
||||
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||
return nil
|
||||
}
|
||||
|
||||
// AllLocations returns all file locations in the resolver.
|
||||
func (r *ContainerImageModel) AllLocations(ctx context.Context) <-chan file.Location {
|
||||
ch := make(chan file.Location)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
for _, loc := range r.locations {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case ch <- loc:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
130
syft/internal/fileresolver/container_image_model_test.go
Normal file
130
syft/internal/fileresolver/container_image_model_test.go
Normal file
@ -0,0 +1,130 @@
|
||||
package fileresolver
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
)
|
||||
|
||||
const ggufLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||
|
||||
func TestOCIModelResolver_FilesByMediaType(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
layerFiles map[string]LayerInfo
|
||||
patterns []string
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "exact match GGUF",
|
||||
layerFiles: map[string]LayerInfo{
|
||||
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||
},
|
||||
patterns: []string{ggufLayerMediaType},
|
||||
expected: 1,
|
||||
},
|
||||
{
|
||||
name: "glob match docker ai",
|
||||
layerFiles: map[string]LayerInfo{
|
||||
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||
},
|
||||
patterns: []string{"application/vnd.docker.ai*"},
|
||||
expected: 1,
|
||||
},
|
||||
{
|
||||
name: "no match",
|
||||
layerFiles: map[string]LayerInfo{
|
||||
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||
},
|
||||
patterns: []string{"application/json"},
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "multiple patterns match multiple files",
|
||||
layerFiles: map[string]LayerInfo{
|
||||
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||
"sha256:def456": {TempPath: filepath.Join(tempDir, "f2"), MediaType: "application/octet-stream"},
|
||||
},
|
||||
patterns: []string{ggufLayerMediaType, "application/octet-stream"},
|
||||
expected: 2,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
resolver := NewContainerImageModel(tempDir, test.layerFiles)
|
||||
|
||||
locations, err := resolver.FilesByMediaType(test.patterns...)
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, locations, test.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestOCIModelResolver_FileContentsByLocation(t *testing.T) {
|
||||
tempDir := t.TempDir()
|
||||
content := []byte("test gguf content")
|
||||
|
||||
tempFile := filepath.Join(tempDir, "test.gguf")
|
||||
require.NoError(t, os.WriteFile(tempFile, content, 0600))
|
||||
|
||||
digest := "sha256:abc123"
|
||||
layerFiles := map[string]LayerInfo{
|
||||
digest: {TempPath: tempFile, MediaType: ggufLayerMediaType},
|
||||
}
|
||||
|
||||
resolver := NewContainerImageModel(tempDir, layerFiles)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
digest string
|
||||
wantErr bool
|
||||
wantData []byte
|
||||
errSubstr string
|
||||
}{
|
||||
{
|
||||
name: "valid location returns content",
|
||||
digest: digest,
|
||||
wantErr: false,
|
||||
wantData: content,
|
||||
},
|
||||
{
|
||||
name: "invalid digest returns error",
|
||||
digest: "sha256:invalid",
|
||||
wantErr: true,
|
||||
errSubstr: "no file found for digest",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
loc := file.NewVirtualLocationFromCoordinates(
|
||||
file.NewCoordinates("/", test.digest),
|
||||
"/",
|
||||
)
|
||||
|
||||
reader, err := resolver.FileContentsByLocation(loc)
|
||||
|
||||
if test.wantErr {
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), test.errSubstr)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
defer reader.Close()
|
||||
|
||||
data, err := io.ReadAll(reader)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, test.wantData, data)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -9,8 +9,17 @@ import (
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
const (
|
||||
catalogerName = "gguf-cataloger"
|
||||
ggufLayerMediaType = "application/vnd.docker.ai*"
|
||||
)
|
||||
|
||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||
// It supports both traditional file-based discovery and OCI layer-aware discovery
|
||||
// when the source for the SBOM is the oci model source
|
||||
func NewGGUFCataloger() pkg.Cataloger {
|
||||
return generic.NewCataloger("gguf-cataloger").
|
||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||
return generic.NewCataloger(catalogerName).
|
||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf").
|
||||
WithParserByMediaType(parseGGUFModel, ggufLayerMediaType).
|
||||
WithProcessors(ggufMergeProcessor)
|
||||
}
|
||||
|
||||
@ -122,6 +122,10 @@ func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||
|
||||
// extractModelNameFromPath extracts the model name from the file path
|
||||
func extractModelNameFromPath(path string) string {
|
||||
// we do not want to return a name from filepath if it's not a distinct gguf file
|
||||
if !strings.Contains(path, ".gguf") {
|
||||
return ""
|
||||
}
|
||||
// Get the base filename
|
||||
base := filepath.Base(path)
|
||||
|
||||
|
||||
59
syft/pkg/cataloger/ai/processor.go
Normal file
59
syft/pkg/cataloger/ai/processor.go
Normal file
@ -0,0 +1,59 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
// ggufMergeProcessor consolidates multiple GGUF packages into a single package
|
||||
// representing the AI model. When scanning OCI images with multiple layers,
|
||||
// each layer may produce a separate package. This processor finds the package
|
||||
// with a name and merges metadata from nameless packages into its GGUFFileParts field.
|
||||
// Only packages with a non-empty name are returned in the final result.
|
||||
func ggufMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
if err != nil {
|
||||
return pkgs, rels, err
|
||||
}
|
||||
|
||||
if len(pkgs) == 0 {
|
||||
return pkgs, rels, err
|
||||
}
|
||||
|
||||
// Separate packages with names from those without
|
||||
var namedPkgs []pkg.Package
|
||||
var namelessHeaders []pkg.GGUFFileHeader
|
||||
|
||||
for _, p := range pkgs {
|
||||
if p.Name != "" {
|
||||
namedPkgs = append(namedPkgs, p)
|
||||
} else {
|
||||
if header, ok := p.Metadata.(pkg.GGUFFileHeader); ok {
|
||||
// We do not want a kv hash for nameless headers
|
||||
header.MetadataKeyValuesHash = ""
|
||||
namelessHeaders = append(namelessHeaders, header)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If there are no named packages, return nothing
|
||||
if len(namedPkgs) == 0 {
|
||||
return nil, rels, err
|
||||
}
|
||||
|
||||
// merge nameless headers into a single named package;
|
||||
// if there are multiple named packages, return them without trying to merge headers.
|
||||
// we cannot determine which nameless headers belong to which package
|
||||
// this is because the order we receive the gguf headers in is not guaranteed
|
||||
// to match the layer order in the original oci image
|
||||
if len(namedPkgs) == 1 && len(namelessHeaders) > 0 {
|
||||
winner := &namedPkgs[0]
|
||||
if header, ok := winner.Metadata.(pkg.GGUFFileHeader); ok {
|
||||
header.Parts = namelessHeaders
|
||||
winner.Metadata = header
|
||||
}
|
||||
}
|
||||
|
||||
// Largest number of key value
|
||||
|
||||
return namedPkgs, rels, err
|
||||
}
|
||||
63
syft/pkg/cataloger/ai/processor_test.go
Normal file
63
syft/pkg/cataloger/ai/processor_test.go
Normal file
@ -0,0 +1,63 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func Test_ggufMergeProcessor(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
pkgs []pkg.Package
|
||||
wantPkgCount int
|
||||
wantFilePartCount int
|
||||
}{
|
||||
{
|
||||
name: "single named package merges nameless headers",
|
||||
pkgs: []pkg.Package{
|
||||
{Name: "model", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "abc"}},
|
||||
{Name: "", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "part1"}},
|
||||
{Name: "", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "part2"}},
|
||||
},
|
||||
wantPkgCount: 1,
|
||||
wantFilePartCount: 2,
|
||||
},
|
||||
{
|
||||
name: "multiple named packages returns all without merging",
|
||||
pkgs: []pkg.Package{
|
||||
{Name: "model1", Metadata: pkg.GGUFFileHeader{}},
|
||||
{Name: "model2", Metadata: pkg.GGUFFileHeader{}},
|
||||
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||
},
|
||||
wantPkgCount: 2,
|
||||
wantFilePartCount: 0,
|
||||
},
|
||||
{
|
||||
name: "no named packages returns empty result",
|
||||
pkgs: []pkg.Package{
|
||||
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||
},
|
||||
wantPkgCount: 0,
|
||||
wantFilePartCount: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
got, _, err := ggufMergeProcessor(test.pkgs, nil, nil)
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, got, test.wantPkgCount)
|
||||
|
||||
if test.wantPkgCount == 1 && test.wantFilePartCount > 0 {
|
||||
header, ok := got[0].Metadata.(pkg.GGUFFileHeader)
|
||||
require.True(t, ok)
|
||||
assert.Len(t, header.Parts, test.wantFilePartCount)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -2164,6 +2164,11 @@ func (p *panicyResolver) FilesByMIMEType(_ ...string) ([]file.Location, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (p *panicyResolver) FilesByMediaType(_ ...string) ([]file.Location, error) {
|
||||
p.searchCalled = true
|
||||
return nil, errors.New("not implemented")
|
||||
}
|
||||
|
||||
func (p *panicyResolver) RelativeFileByPath(_ file.Location, _ string) *file.Location {
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -114,6 +114,26 @@ func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Cataloger) WithParserByMediaType(parser Parser, types ...string) *Cataloger {
|
||||
c.requesters = append(c.requesters,
|
||||
func(resolver file.Resolver, _ Environment) []request {
|
||||
var requests []request
|
||||
log.WithFields("mediatypes", types).Trace("searching content matching mediatypes")
|
||||
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
matches, err := ociResolver.FilesByMediaType(types...)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
requests = append(requests, makeRequests(parser, matches)...)
|
||||
return requests
|
||||
},
|
||||
)
|
||||
return c
|
||||
}
|
||||
|
||||
func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
|
||||
for _, p := range processors {
|
||||
c.processors = append(c.processors, processorWrapper{Processor: p})
|
||||
|
||||
@ -138,6 +138,10 @@ func (m spyReturningFileResolver) FilesByMIMEType(types ...string) ([]file.Locat
|
||||
return m.m.FilesByMIMEType(types...)
|
||||
}
|
||||
|
||||
func (m spyReturningFileResolver) FilesByMediaType(types ...string) ([]file.Location, error) {
|
||||
return m.m.FilesByMediaType(types...)
|
||||
}
|
||||
|
||||
func (m spyReturningFileResolver) RelativeFileByPath(f file.Location, path string) *file.Location {
|
||||
return m.m.RelativeFileByPath(f, path)
|
||||
}
|
||||
@ -189,6 +193,55 @@ func TestClosesFileOnParserPanic(t *testing.T) {
|
||||
require.True(t, spy.closed)
|
||||
}
|
||||
|
||||
func Test_CatalogerWithParserByMediaType(t *testing.T) {
|
||||
allParsedPaths := make(map[string]bool)
|
||||
parser := func(_ context.Context, resolver file.Resolver, env *Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
allParsedPaths[reader.Path()] = true
|
||||
contents, err := io.ReadAll(reader)
|
||||
require.NoError(t, err)
|
||||
|
||||
if len(contents) == 0 {
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
p := pkg.Package{
|
||||
Name: string(contents),
|
||||
Locations: file.NewLocationSet(reader.Location),
|
||||
}
|
||||
|
||||
return []pkg.Package{p}, nil, nil
|
||||
}
|
||||
|
||||
upstream := "media-type-cataloger"
|
||||
|
||||
// Create locations with test fixtures that exist on disk
|
||||
loc1 := file.NewLocation("test-fixtures/a-path.txt")
|
||||
loc2 := file.NewLocation("test-fixtures/another-path.txt")
|
||||
|
||||
// Create a mock resolver that maps media types to locations
|
||||
resolver := file.NewMockResolverForMediaTypes(map[string][]file.Location{
|
||||
"application/vnd.test.model": {loc1, loc2},
|
||||
})
|
||||
|
||||
cataloger := NewCataloger(upstream).
|
||||
WithParserByMediaType(parser, "application/vnd.test.model")
|
||||
|
||||
actualPkgs, _, err := cataloger.Catalog(context.Background(), resolver)
|
||||
assert.NoError(t, err)
|
||||
|
||||
// Verify both files were parsed
|
||||
assert.True(t, allParsedPaths["test-fixtures/a-path.txt"], "expected a-path.txt to be parsed")
|
||||
assert.True(t, allParsedPaths["test-fixtures/another-path.txt"], "expected another-path.txt to be parsed")
|
||||
|
||||
// Verify packages were created
|
||||
assert.Len(t, actualPkgs, 2)
|
||||
|
||||
// Verify FoundBy is set correctly
|
||||
for _, p := range actualPkgs {
|
||||
assert.Equal(t, upstream, p.FoundBy)
|
||||
}
|
||||
}
|
||||
|
||||
func Test_genericCatalogerReturnsErrors(t *testing.T) {
|
||||
genericErrorReturning := NewCataloger("error returning").WithParserByGlobs(func(ctx context.Context, resolver file.Resolver, environment *Environment, locationReader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
return []pkg.Package{
|
||||
|
||||
@ -208,6 +208,11 @@ func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]file.Location, e
|
||||
return locs, err
|
||||
}
|
||||
|
||||
// FilesByMediaType returns files matching the given media types.
|
||||
func (r *ObservingResolver) FilesByMediaType(_ ...string) ([]file.Location, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// RelativeFileByPath returns a file at a path relative to the given location.
|
||||
func (r *ObservingResolver) RelativeFileByPath(location file.Location, path string) *file.Location {
|
||||
const methodName = "RelativeFileByPath"
|
||||
|
||||
@ -80,6 +80,10 @@ func (r *rpmdbTestFileResolverMock) FilesByMIMEType(...string) ([]file.Location,
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func (r *rpmdbTestFileResolverMock) FilesByMediaType(...string) ([]file.Location, error) {
|
||||
return nil, fmt.Errorf("not implemented")
|
||||
}
|
||||
|
||||
func TestParseRpmDB(t *testing.T) {
|
||||
ctx := context.TODO()
|
||||
packagesLocation := file.NewLocation("test-fixtures/Packages")
|
||||
|
||||
@ -34,4 +34,8 @@ type GGUFFileHeader struct {
|
||||
// across different file locations or remotes. It allows matching identical models even
|
||||
// when stored in different repositories or with different filenames.
|
||||
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
||||
|
||||
// Parts contains headers from additional GGUF files that were merged
|
||||
// into this package during post-processing (e.g., from OCI layers without model names).
|
||||
Parts []GGUFFileHeader `json:"parts,omitempty" cyclonedx:"parts"`
|
||||
}
|
||||
|
||||
@ -17,6 +17,7 @@ type ImageMetadata struct {
|
||||
Variant string `json:"architectureVariant,omitempty"`
|
||||
OS string `json:"os"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
Annotations map[string]string `json:"annotations,omitempty" id:"-"` // critical: do not consider annotations as an identifiable part of the source image
|
||||
}
|
||||
|
||||
// LayerMetadata represents all static metadata that defines what a container image layer is.
|
||||
|
||||
66
syft/source/internal/image_id.go
Normal file
66
syft/source/internal/image_id.go
Normal file
@ -0,0 +1,66 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
// DeriveImageID derives an artifact ID from the given image metadata. The order of data precedence is:
|
||||
// 1. prefer a digest of the raw container image manifest
|
||||
// 2. if no manifest digest is available, calculate a chain ID from the image layer metadata
|
||||
// 3. if no layer metadata is available, use the user input string
|
||||
//
|
||||
// in all cases, if an alias is provided, it is additionally considered in the ID calculation. This allows for the
|
||||
// same image to be scanned multiple times with different aliases and be considered logically different.
|
||||
func DeriveImageID(alias source.Alias, metadata source.ImageMetadata) artifact.ID {
|
||||
var input string
|
||||
|
||||
if len(metadata.RawManifest) > 0 {
|
||||
input = digest.Canonical.FromBytes(metadata.RawManifest).String()
|
||||
} else {
|
||||
// calculate chain ID for image sources where manifestDigest is not available
|
||||
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
||||
input = calculateChainID(metadata.Layers)
|
||||
if input == "" {
|
||||
// TODO what happens here if image has no layers?
|
||||
// is this case possible?
|
||||
input = digest.Canonical.FromString(metadata.UserInput).String()
|
||||
}
|
||||
}
|
||||
|
||||
if !alias.IsEmpty() {
|
||||
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
|
||||
// scans the same item but is considered to be logically different, then ID will express that.
|
||||
aliasStr := fmt.Sprintf(":%s@%s", alias.Name, alias.Version)
|
||||
input = digest.Canonical.FromString(input + aliasStr).String()
|
||||
}
|
||||
|
||||
return ArtifactIDFromDigest(input)
|
||||
}
|
||||
|
||||
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
||||
func calculateChainID(lm []source.LayerMetadata) string {
|
||||
if len(lm) < 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// DiffID(L0) = digest of layer 0
|
||||
// https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32
|
||||
chainID := lm[0].Digest
|
||||
id := chain(chainID, lm[1:])
|
||||
|
||||
return id
|
||||
}
|
||||
|
||||
func chain(chainID string, layers []source.LayerMetadata) string {
|
||||
if len(layers) < 1 {
|
||||
return chainID
|
||||
}
|
||||
|
||||
chainID = digest.Canonical.FromString(layers[0].Digest + " " + chainID).String()
|
||||
return chain(chainID, layers[1:])
|
||||
}
|
||||
278
syft/source/internal/image_id_test.go
Normal file
278
syft/source/internal/image_id_test.go
Normal file
@ -0,0 +1,278 @@
|
||||
package internal
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
func TestDeriveImageID(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
alias source.Alias
|
||||
metadata source.ImageMetadata
|
||||
want artifact.ID
|
||||
}{
|
||||
{
|
||||
name: "use raw manifest over chain ID or user input",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
hasher := sha256.New()
|
||||
hasher.Write([]byte("raw-manifest"))
|
||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "use chain ID over user input",
|
||||
metadata: source.ImageMetadata{
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
metadata := []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
}
|
||||
return artifact.ID(strings.TrimPrefix(calculateChainID(metadata), "sha256:"))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "use user input last",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
hasher := sha256.New()
|
||||
hasher.Write([]byte("user-input"))
|
||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "without alias (first)",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: "85298926ecd92ed57688f13039017160cd728f04dd0d2d10a10629007106f107",
|
||||
},
|
||||
{
|
||||
name: "always consider alias (first)",
|
||||
alias: source.Alias{
|
||||
Name: "alias",
|
||||
Version: "version",
|
||||
},
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: "a8717e42449960c1dd4963f2f22bd69c7c105e7e82445be0a65aa1825d62ff0d",
|
||||
},
|
||||
{
|
||||
name: "without alias (last)",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: "ab0dff627d80b9753193d7280bec8f45e8ec6b4cb0912c6fffcf7cd782d9739e",
|
||||
},
|
||||
{
|
||||
name: "always consider alias (last)",
|
||||
alias: source.Alias{
|
||||
Name: "alias",
|
||||
Version: "version",
|
||||
},
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: "fe86c0eecd5654d3c0c0b2176aa394aef6440347c241aa8d9b628dfdde4287cf",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.want, DeriveImageID(tt.alias, tt.metadata))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// ensures same metadata produces identical IDs
|
||||
// regardless of whether the source is stereoscope-based or OCI model-based. Both source types
|
||||
// use DeriveImageID with ImageMetadata
|
||||
// this test captures known-good IDs that must remain
|
||||
// stable across refactors to maintain consistency.
|
||||
//
|
||||
// IMPORTANT: If any of these tests fail after a refactor, it means the artifact ID generation
|
||||
// has changed and will break consistency between stereoscope images and OCI model sources.
|
||||
func TestDeriveImageID_CrossSourceConsistency(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
alias source.Alias
|
||||
metadata source.ImageMetadata
|
||||
wantID artifact.ID
|
||||
}{
|
||||
{
|
||||
name: "raw manifest with layers - typical container image",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "docker.io/library/alpine:latest",
|
||||
ManifestDigest: "sha256:abc123",
|
||||
Layers: []source.LayerMetadata{
|
||||
{Digest: "sha256:layer1", MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", Size: 1000},
|
||||
{Digest: "sha256:layer2", MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", Size: 2000},
|
||||
},
|
||||
RawManifest: []byte(`{"schemaVersion":2,"mediaType":"application/vnd.oci.image.manifest.v1+json"}`),
|
||||
},
|
||||
// snapshot: this ID must remain stable for stereoscope/oci-model consistency
|
||||
wantID: "b22c7289dd3b4785a3795c90e15d16bd66bd29b444b8974fe29ed0443ce50405",
|
||||
},
|
||||
{
|
||||
name: "raw manifest only - minimal image",
|
||||
metadata: source.ImageMetadata{
|
||||
RawManifest: []byte(`{"schemaVersion":2}`),
|
||||
},
|
||||
// snapshot: this ID must remain stable
|
||||
wantID: "bafebd36189ad3688b7b3915ea55d461e0bfcfbdde11e54b0a123999fb6be50f",
|
||||
},
|
||||
{
|
||||
name: "chain ID fallback - no raw manifest",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "some-image",
|
||||
Layers: []source.LayerMetadata{
|
||||
{Digest: "sha256:aaa111"},
|
||||
{Digest: "sha256:bbb222"},
|
||||
},
|
||||
},
|
||||
// snapshot: chain ID calculation must remain stable
|
||||
wantID: "0ba9c8d271e6708871505d362e37267c5fb7910066c04d3115b89ba4d34aa180",
|
||||
},
|
||||
{
|
||||
name: "user input fallback - no manifest or layers",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "registry.example.com/org/model:v1.0",
|
||||
},
|
||||
// snapshot: user input hash must remain stable
|
||||
wantID: "a5a8733a3ba3eb99a8ebebcd40c4053f9b896ea6e2217ebc6e885573f20baccf",
|
||||
},
|
||||
{
|
||||
name: "with alias - same image different logical identity",
|
||||
alias: source.Alias{
|
||||
Name: "my-custom-name",
|
||||
Version: "1.0.0",
|
||||
},
|
||||
metadata: source.ImageMetadata{
|
||||
RawManifest: []byte(`{"schemaVersion":2}`),
|
||||
},
|
||||
// snapshot: alias must affect ID deterministically
|
||||
wantID: "9eae41c0efc30023368c29089bac007f2c9d0b40a0ee034081a17c4c22f55ac6",
|
||||
},
|
||||
{
|
||||
name: "annotations has no effect on ID",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "registry.example.com/org/model:v1.0",
|
||||
Annotations: map[string]string{
|
||||
"annotation1": "value1",
|
||||
},
|
||||
},
|
||||
// snapshot: user input hash must remain stable
|
||||
wantID: "a5a8733a3ba3eb99a8ebebcd40c4053f9b896ea6e2217ebc6e885573f20baccf",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := DeriveImageID(tt.alias, tt.metadata)
|
||||
assert.Equal(t, tt.wantID, got, "ID must remain stable for cross-source consistency")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalculateChainID(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
layers []source.LayerMetadata
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "empty layers returns empty string",
|
||||
layers: []source.LayerMetadata{},
|
||||
want: "",
|
||||
},
|
||||
{
|
||||
name: "single layer returns digest",
|
||||
layers: []source.LayerMetadata{
|
||||
{Digest: "sha256:abc123"},
|
||||
},
|
||||
want: "sha256:abc123",
|
||||
},
|
||||
{
|
||||
name: "multiple layers calculates chain ID",
|
||||
layers: []source.LayerMetadata{
|
||||
{Digest: "a"},
|
||||
{Digest: "b"},
|
||||
{Digest: "c"},
|
||||
},
|
||||
// snapshot - this value should not change
|
||||
want: "sha256:1dfe230e220ef0e6bc0a8978d23d72b95769e76a62879a5f49267d8c007ab43d",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.want, calculateChainID(tt.layers))
|
||||
})
|
||||
}
|
||||
}
|
||||
4
syft/source/oci_model_metadata.go
Normal file
4
syft/source/oci_model_metadata.go
Normal file
@ -0,0 +1,4 @@
|
||||
package source
|
||||
|
||||
// OCIModelMetadata is an AI model from an OCI registry, which is a specialized form of ImageMetadata.
|
||||
type OCIModelMetadata ImageMetadata
|
||||
245
syft/source/ocimodelsource/oci_model_source.go
Normal file
245
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,245 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/google/go-containerregistry/pkg/name"
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/internal/fileresolver"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/anchore/syft/syft/source/internal"
|
||||
)
|
||||
|
||||
var _ source.Source = (*ociModelSource)(nil)
|
||||
|
||||
// Config holds the input configuration for an OCI model artifact source.
|
||||
type Config struct {
|
||||
Reference string
|
||||
RegistryOptions *image.RegistryOptions
|
||||
Alias source.Alias
|
||||
}
|
||||
|
||||
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||
type ociModelSource struct {
|
||||
id artifact.ID
|
||||
reference string
|
||||
alias source.Alias
|
||||
metadata source.OCIModelMetadata
|
||||
tempDir string
|
||||
resolver interface {
|
||||
file.Resolver
|
||||
file.OCIMediaTypeResolver
|
||||
}
|
||||
mutex *sync.Mutex
|
||||
}
|
||||
|
||||
// NewFromRegistry creates a new OCI model source by fetching the model artifact from a registry.
|
||||
func NewFromRegistry(ctx context.Context, cfg Config) (source.Source, error) {
|
||||
client := newRegistryClient(cfg.RegistryOptions)
|
||||
art, err := validateAndFetchArtifact(ctx, client, cfg.Reference)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
metadata := buildMetadata(art)
|
||||
tempDir, resolver, err := fetchAndStoreGGUFHeaders(ctx, client, art)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
id := internal.DeriveImageID(cfg.Alias, source.ImageMetadata(metadata))
|
||||
return &ociModelSource{
|
||||
id: id,
|
||||
reference: cfg.Reference,
|
||||
alias: cfg.Alias,
|
||||
metadata: metadata,
|
||||
tempDir: tempDir,
|
||||
resolver: resolver,
|
||||
mutex: &sync.Mutex{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// validateAndFetchArtifact fetches and validates a model artifact in a single registry call.
|
||||
func validateAndFetchArtifact(ctx context.Context, client *registryClient, reference string) (*modelArtifact, error) {
|
||||
art, err := client.fetchModelArtifact(ctx, reference)
|
||||
if err != nil {
|
||||
// errNotModelArtifact is wrapped, so callers can use errors.Is() to check
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(art.GGUFLayers) == 0 {
|
||||
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||
}
|
||||
|
||||
return art, nil
|
||||
}
|
||||
|
||||
// fetchAndStoreGGUFHeaders fetches GGUF layer headers and stores them in temp files.
|
||||
func fetchAndStoreGGUFHeaders(ctx context.Context, client *registryClient, artifact *modelArtifact) (string, *fileresolver.ContainerImageModel, error) {
|
||||
tempDir, err := os.MkdirTemp("", "syft-oci-gguf")
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("failed to create temp directory: %w", err)
|
||||
}
|
||||
|
||||
layerFiles := make(map[string]fileresolver.LayerInfo)
|
||||
for _, layer := range artifact.GGUFLayers {
|
||||
li, err := fetchSingleGGUFHeader(ctx, client, artifact.Reference, layer, tempDir)
|
||||
if err != nil {
|
||||
osErr := os.RemoveAll(tempDir)
|
||||
if osErr != nil {
|
||||
log.Errorf("unable to remove temp directory (%s): %v", tempDir, err)
|
||||
}
|
||||
return "", nil, err
|
||||
}
|
||||
layerFiles[layer.Digest.String()] = li
|
||||
}
|
||||
|
||||
resolver := fileresolver.NewContainerImageModel(tempDir, layerFiles)
|
||||
|
||||
return tempDir, resolver, nil
|
||||
}
|
||||
|
||||
// fetchSingleGGUFHeader fetches a single GGUF layer header and writes it to a temp file.
|
||||
func fetchSingleGGUFHeader(ctx context.Context, client *registryClient, ref name.Reference, layer v1.Descriptor, tempDir string) (fileresolver.LayerInfo, error) {
|
||||
headerData, err := client.fetchBlobRange(ctx, ref, layer.Digest, maxHeaderBytes)
|
||||
if err != nil {
|
||||
return fileresolver.LayerInfo{}, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||
}
|
||||
|
||||
digestStr := layer.Digest.String()
|
||||
safeDigest := strings.ReplaceAll(digestStr, ":", "-")
|
||||
tempPath := filepath.Join(tempDir, safeDigest+".gguf")
|
||||
if err := os.WriteFile(tempPath, headerData, 0600); err != nil {
|
||||
return fileresolver.LayerInfo{}, fmt.Errorf("failed to write temp file: %w", err)
|
||||
}
|
||||
|
||||
return fileresolver.LayerInfo{
|
||||
TempPath: tempPath,
|
||||
MediaType: string(layer.MediaType),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// buildMetadata constructs OCIModelMetadata from a modelArtifact.
|
||||
func buildMetadata(artifact *modelArtifact) source.OCIModelMetadata {
|
||||
// layers
|
||||
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||
for i, layer := range artifact.Manifest.Layers {
|
||||
layers[i] = source.LayerMetadata{
|
||||
MediaType: string(layer.MediaType),
|
||||
Digest: layer.Digest.String(),
|
||||
Size: layer.Size,
|
||||
}
|
||||
}
|
||||
|
||||
// tags
|
||||
var tags []string
|
||||
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||
if tag := tagged.TagStr(); tag != "" {
|
||||
tags = []string{tag}
|
||||
}
|
||||
}
|
||||
|
||||
// digests
|
||||
var repoDigests []string
|
||||
if artifact.ManifestDigest != "" {
|
||||
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||
}
|
||||
|
||||
// metadata
|
||||
return source.OCIModelMetadata{
|
||||
UserInput: artifact.Reference.String(),
|
||||
ID: artifact.ManifestDigest,
|
||||
ManifestDigest: artifact.ManifestDigest,
|
||||
MediaType: string(artifact.Manifest.MediaType),
|
||||
Tags: tags,
|
||||
Size: calculateTotalSize(layers),
|
||||
Layers: layers,
|
||||
RawManifest: artifact.RawManifest,
|
||||
RawConfig: artifact.RawConfig,
|
||||
RepoDigests: repoDigests,
|
||||
Architecture: artifact.Config.Architecture,
|
||||
Variant: artifact.Config.Variant,
|
||||
OS: artifact.Config.OS,
|
||||
Labels: artifact.Config.Config.Labels,
|
||||
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||
}
|
||||
}
|
||||
|
||||
// extractManifestAnnotations extracts annotations from the manifest.
|
||||
func extractManifestAnnotations(manifest *v1.Manifest) map[string]string {
|
||||
if manifest == nil || manifest.Annotations == nil {
|
||||
return make(map[string]string)
|
||||
}
|
||||
return manifest.Annotations
|
||||
}
|
||||
|
||||
// calculateTotalSize sums up the size of all layers.
|
||||
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||
var total int64
|
||||
for _, layer := range layers {
|
||||
total += layer.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// ID returns the artifact ID.
|
||||
func (s *ociModelSource) ID() artifact.ID {
|
||||
return s.id
|
||||
}
|
||||
|
||||
// Describe returns a description of the source.
|
||||
func (s *ociModelSource) Describe() source.Description {
|
||||
name := s.reference
|
||||
version := ""
|
||||
supplier := ""
|
||||
|
||||
if !s.alias.IsEmpty() {
|
||||
if s.alias.Name != "" {
|
||||
name = s.alias.Name
|
||||
}
|
||||
if s.alias.Version != "" {
|
||||
version = s.alias.Version
|
||||
}
|
||||
if s.alias.Supplier != "" {
|
||||
supplier = s.alias.Supplier
|
||||
}
|
||||
}
|
||||
|
||||
return source.Description{
|
||||
ID: string(s.id),
|
||||
Name: name,
|
||||
Version: version,
|
||||
Supplier: supplier,
|
||||
Metadata: s.metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// FileResolver returns a file resolver for accessing header of GGUF files.
|
||||
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||
return s.resolver, nil
|
||||
}
|
||||
|
||||
// Close cleans up temporary files. Safe to call multiple times.
|
||||
func (s *ociModelSource) Close() error {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if s.tempDir == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
err := os.RemoveAll(s.tempDir)
|
||||
s.tempDir = ""
|
||||
s.resolver = nil
|
||||
return err
|
||||
}
|
||||
36
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
36
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,36 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
type ociModelSourceProvider struct {
|
||||
reference string
|
||||
registryOpts *image.RegistryOptions
|
||||
alias source.Alias
|
||||
}
|
||||
|
||||
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||
return &ociModelSourceProvider{
|
||||
reference: reference,
|
||||
registryOpts: registryOpts,
|
||||
alias: alias,
|
||||
}
|
||||
}
|
||||
|
||||
func (p *ociModelSourceProvider) Name() string {
|
||||
return "oci-model"
|
||||
}
|
||||
|
||||
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||
cfg := Config{
|
||||
Reference: p.reference,
|
||||
RegistryOptions: p.registryOpts,
|
||||
Alias: p.alias,
|
||||
}
|
||||
return NewFromRegistry(ctx, cfg)
|
||||
}
|
||||
217
syft/source/ocimodelsource/registry_client.go
Normal file
217
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,217 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/google/go-containerregistry/pkg/authn"
|
||||
"github.com/google/go-containerregistry/pkg/name"
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
)
|
||||
|
||||
// errNotModelArtifact is returned when a reference does not point to a model artifact.
|
||||
var errNotModelArtifact = errors.New("not an OCI model artifact")
|
||||
|
||||
const (
|
||||
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||
modelConfigMediaTypePrefix = "application/vnd.docker.ai.model.config."
|
||||
ggufLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||
|
||||
// Maximum bytes to read/return for GGUF headers
|
||||
maxHeaderBytes = 8 * 1024 * 1024 // 8 MB
|
||||
)
|
||||
|
||||
// registryClient handles OCI registry interactions for model artifacts.
|
||||
type registryClient struct {
|
||||
options []remote.Option
|
||||
}
|
||||
|
||||
// newRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||
func newRegistryClient(registryOpts *image.RegistryOptions) *registryClient {
|
||||
opts := buildRemoteOptions(registryOpts)
|
||||
|
||||
return ®istryClient{
|
||||
options: opts,
|
||||
}
|
||||
}
|
||||
|
||||
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||
func buildRemoteOptions(registryOpts *image.RegistryOptions) []remote.Option {
|
||||
var opts []remote.Option
|
||||
|
||||
if registryOpts == nil {
|
||||
return opts
|
||||
}
|
||||
|
||||
// Build authenticator
|
||||
authenticator := buildAuthenticator(registryOpts)
|
||||
opts = append(opts, remote.WithAuth(authenticator))
|
||||
|
||||
// Handle TLS settings
|
||||
if registryOpts.InsecureSkipTLSVerify {
|
||||
if transport, ok := remote.DefaultTransport.(*http.Transport); ok {
|
||||
transport = transport.Clone()
|
||||
if transport.TLSClientConfig == nil {
|
||||
transport.TLSClientConfig = &tls.Config{
|
||||
MinVersion: tls.VersionTLS12,
|
||||
}
|
||||
}
|
||||
transport.TLSClientConfig.InsecureSkipVerify = true //#nosec G402 -- user explicitly requested insecure TLS
|
||||
opts = append(opts, remote.WithTransport(transport))
|
||||
}
|
||||
}
|
||||
|
||||
// Handle insecure HTTP
|
||||
if registryOpts.InsecureUseHTTP {
|
||||
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||
// If credentials are provided, use them
|
||||
if len(registryOpts.Credentials) > 0 {
|
||||
// Use the first credential set (we could enhance this to match by authority)
|
||||
cred := registryOpts.Credentials[0]
|
||||
|
||||
if cred.Token != "" {
|
||||
return &authn.Bearer{Token: cred.Token}
|
||||
}
|
||||
|
||||
if cred.Username != "" || cred.Password != "" {
|
||||
return &authn.Basic{
|
||||
Username: cred.Username,
|
||||
Password: cred.Password,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to anonymous authenticator
|
||||
return authn.Anonymous
|
||||
}
|
||||
|
||||
// modelArtifact represents a parsed OCI model artifact.
|
||||
type modelArtifact struct {
|
||||
Reference name.Reference
|
||||
Manifest *v1.Manifest
|
||||
Config *v1.ConfigFile
|
||||
RawManifest []byte
|
||||
RawConfig []byte
|
||||
ManifestDigest string
|
||||
GGUFLayers []v1.Descriptor
|
||||
}
|
||||
|
||||
func (c *registryClient) fetchModelArtifact(ctx context.Context, refStr string) (*modelArtifact, error) {
|
||||
ref, err := name.ParseReference(refStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||
}
|
||||
|
||||
opts := c.options
|
||||
opts = append(opts, remote.WithContext(ctx))
|
||||
desc, err := remote.Get(ref, opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||
}
|
||||
|
||||
manifest := &v1.Manifest{}
|
||||
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||
}
|
||||
|
||||
if !isModelArtifact(manifest) {
|
||||
return nil, fmt.Errorf("%w (config media type: %s)", errNotModelArtifact, manifest.Config.MediaType)
|
||||
}
|
||||
|
||||
img, err := desc.Image()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := img.ConfigFile()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||
}
|
||||
|
||||
rawConfig, err := img.RawConfigFile()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||
}
|
||||
|
||||
ggufLayers := extractGGUFLayers(manifest)
|
||||
|
||||
return &modelArtifact{
|
||||
Reference: ref,
|
||||
Manifest: manifest,
|
||||
Config: configFile,
|
||||
RawManifest: desc.Manifest,
|
||||
RawConfig: rawConfig,
|
||||
ManifestDigest: desc.Digest.String(),
|
||||
GGUFLayers: ggufLayers,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isModelArtifact checks if the manifest represents a model artifact.
|
||||
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||
return strings.HasPrefix(string(manifest.Config.MediaType), modelConfigMediaTypePrefix)
|
||||
}
|
||||
|
||||
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||
var ggufLayers []v1.Descriptor
|
||||
for _, layer := range manifest.Layers {
|
||||
if string(layer.MediaType) == ggufLayerMediaType {
|
||||
ggufLayers = append(ggufLayers, layer)
|
||||
}
|
||||
}
|
||||
return ggufLayers
|
||||
}
|
||||
|
||||
func (c *registryClient) fetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||
repo := ref.Context()
|
||||
|
||||
opts := c.options
|
||||
opts = append(opts, remote.WithContext(ctx))
|
||||
layer, err := remote.Layer(repo.Digest(digest.String()), opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||
}
|
||||
|
||||
reader, err := layer.Compressed()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||
}
|
||||
// this defer is what causes the download to stop
|
||||
// 1. io.ReadFull(reader, data) reads exactly 8MB into the buffer
|
||||
// 2. The function returns with data[:n]
|
||||
// 3. defer reader.Close() executes, closing the HTTP response body
|
||||
// 4. Closing the response body closes the underlying TCP connection
|
||||
// 5. The server receives TCP FIN/RST and stops sending
|
||||
// note: some data is already in flight when we close so we will see > 8mb over the wire
|
||||
// the full image will not download given we terminate the reader early here
|
||||
defer reader.Close()
|
||||
|
||||
// Note: this is not some arbitrary number picked out of the blue.
|
||||
// This is based on the specification of header data found here:
|
||||
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#file-structure
|
||||
data := make([]byte, maxBytes)
|
||||
n, err := io.ReadFull(reader, data)
|
||||
if err != nil && err != io.ErrUnexpectedEOF {
|
||||
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||
}
|
||||
|
||||
return data[:n], nil
|
||||
}
|
||||
114
syft/source/ocimodelsource/registry_client_test.go
Normal file
114
syft/source/ocimodelsource/registry_client_test.go
Normal file
@ -0,0 +1,114 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
"github.com/google/go-containerregistry/pkg/v1/types"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestIsModelArtifact(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
manifest *v1.Manifest
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
name: "valid model artifact",
|
||||
manifest: &v1.Manifest{
|
||||
Config: v1.Descriptor{
|
||||
MediaType: modelConfigMediaTypePrefix + "v1+json",
|
||||
},
|
||||
},
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "container image",
|
||||
manifest: &v1.Manifest{
|
||||
Config: v1.Descriptor{
|
||||
MediaType: types.DockerConfigJSON,
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "empty media type",
|
||||
manifest: &v1.Manifest{
|
||||
Config: v1.Descriptor{
|
||||
MediaType: "",
|
||||
},
|
||||
},
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := isModelArtifact(test.manifest)
|
||||
assert.Equal(t, test.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractGGUFLayers(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
manifest *v1.Manifest
|
||||
expected int
|
||||
}{
|
||||
{
|
||||
name: "single GGUF layer",
|
||||
manifest: &v1.Manifest{
|
||||
Layers: []v1.Descriptor{
|
||||
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||
},
|
||||
},
|
||||
expected: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple GGUF layers",
|
||||
manifest: &v1.Manifest{
|
||||
Layers: []v1.Descriptor{
|
||||
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "def"}},
|
||||
},
|
||||
},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "mixed layers",
|
||||
manifest: &v1.Manifest{
|
||||
Layers: []v1.Descriptor{
|
||||
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||
{MediaType: types.DockerLayer, Digest: v1.Hash{Algorithm: "sha256", Hex: "def"}},
|
||||
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "ghi"}},
|
||||
},
|
||||
},
|
||||
expected: 2,
|
||||
},
|
||||
{
|
||||
name: "no GGUF layers",
|
||||
manifest: &v1.Manifest{
|
||||
Layers: []v1.Descriptor{
|
||||
{MediaType: types.DockerLayer},
|
||||
},
|
||||
},
|
||||
expected: 0,
|
||||
},
|
||||
{
|
||||
name: "empty layers",
|
||||
manifest: &v1.Manifest{
|
||||
Layers: []v1.Descriptor{},
|
||||
},
|
||||
expected: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
result := extractGGUFLayers(test.manifest)
|
||||
assert.Len(t, result, test.expected)
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -7,6 +7,7 @@ import (
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/anchore/syft/syft/source/directorysource"
|
||||
"github.com/anchore/syft/syft/source/filesource"
|
||||
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||
"github.com/anchore/syft/syft/source/snapsource"
|
||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||
)
|
||||
@ -43,6 +44,14 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
||||
// --from docker, registry, etc.
|
||||
Join(stereoscopeProviders.Select(PullTag)...).
|
||||
|
||||
// --from oci-model, registry (for select cases only)
|
||||
// OCI model artifacts with header-only fetching
|
||||
// note: we don't want to use the "pull" tag since it's not actually pulling the full image,
|
||||
// instead we want to match on registry since these models are stored in OCI registries.
|
||||
// This does mean that this must be placed after the pull provider, which is ideal since we don't want to
|
||||
// unnecessarily pull registry headers first if the more common case is the pull providers.
|
||||
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), "registry")).
|
||||
|
||||
// --from snap (remote only)
|
||||
Join(tagProvider(snapsource.NewRemoteSourceProvider(userInput, cfg.Exclude, cfg.DigestAlgorithms, cfg.Alias), SnapTag))
|
||||
}
|
||||
|
||||
@ -5,7 +5,6 @@ import (
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/distribution/reference"
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
@ -36,7 +35,7 @@ type stereoscopeImageSource struct {
|
||||
func New(img *image.Image, cfg ImageConfig) source.Source {
|
||||
metadata := imageMetadataFromStereoscopeImage(img, cfg.Reference)
|
||||
return &stereoscopeImageSource{
|
||||
id: deriveIDFromStereoscopeImage(cfg.Alias, metadata),
|
||||
id: internal.DeriveImageID(cfg.Alias, metadata),
|
||||
config: cfg,
|
||||
image: img,
|
||||
metadata: metadata,
|
||||
@ -163,61 +162,6 @@ func imageMetadataFromStereoscopeImage(img *image.Image, reference string) sourc
|
||||
}
|
||||
}
|
||||
|
||||
// deriveIDFromStereoscopeImage derives an artifact ID from the given image metadata. The order of data precedence is:
|
||||
// 1. prefer a digest of the raw container image manifest
|
||||
// 2. if no manifest digest is available, calculate a chain ID from the image layer metadata
|
||||
// 3. if no layer metadata is available, use the user input string
|
||||
//
|
||||
// in all cases, if an alias is provided, it is additionally considered in the ID calculation. This allows for the
|
||||
// same image to be scanned multiple times with different aliases and be considered logically different.
|
||||
func deriveIDFromStereoscopeImage(alias source.Alias, metadata source.ImageMetadata) artifact.ID {
|
||||
var input string
|
||||
|
||||
if len(metadata.RawManifest) > 0 {
|
||||
input = digest.Canonical.FromBytes(metadata.RawManifest).String()
|
||||
} else {
|
||||
// calculate chain ID for image sources where manifestDigest is not available
|
||||
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
||||
input = calculateChainID(metadata.Layers)
|
||||
if input == "" {
|
||||
// TODO what happens here if image has no layers?
|
||||
// is this case possible?
|
||||
input = digest.Canonical.FromString(metadata.UserInput).String()
|
||||
}
|
||||
}
|
||||
|
||||
if !alias.IsEmpty() {
|
||||
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
|
||||
// scans the same item but is considered to be logically different, then ID will express that.
|
||||
aliasStr := fmt.Sprintf(":%s@%s", alias.Name, alias.Version)
|
||||
input = digest.Canonical.FromString(input + aliasStr).String()
|
||||
}
|
||||
|
||||
return internal.ArtifactIDFromDigest(input)
|
||||
}
|
||||
|
||||
func calculateChainID(lm []source.LayerMetadata) string {
|
||||
if len(lm) < 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
// DiffID(L0) = digest of layer 0
|
||||
// https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32
|
||||
chainID := lm[0].Digest
|
||||
id := chain(chainID, lm[1:])
|
||||
|
||||
return id
|
||||
}
|
||||
|
||||
func chain(chainID string, layers []source.LayerMetadata) string {
|
||||
if len(layers) < 1 {
|
||||
return chainID
|
||||
}
|
||||
|
||||
chainID = digest.Canonical.FromString(layers[0].Digest + " " + chainID).String()
|
||||
return chain(chainID, layers[1:])
|
||||
}
|
||||
|
||||
func getImageExclusionFunction(exclusions []string) func(string) bool {
|
||||
if len(exclusions) == 0 {
|
||||
return nil
|
||||
|
||||
@ -2,8 +2,6 @@ package stereoscopesource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
@ -12,7 +10,6 @@ import (
|
||||
|
||||
"github.com/anchore/stereoscope"
|
||||
"github.com/anchore/stereoscope/pkg/imagetest"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/internal/testutil"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
@ -112,146 +109,6 @@ func Test_StereoscopeImage_Exclusions(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func Test_StereoscopeImageSource_ID(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
alias source.Alias
|
||||
metadata source.ImageMetadata
|
||||
want artifact.ID
|
||||
}{
|
||||
{
|
||||
name: "use raw manifest over chain ID or user input",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
hasher := sha256.New()
|
||||
hasher.Write([]byte("raw-manifest"))
|
||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "use chain ID over user input",
|
||||
metadata: source.ImageMetadata{
|
||||
//UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
metadata := []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
}
|
||||
return artifact.ID(strings.TrimPrefix(calculateChainID(metadata), "sha256:"))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "use user input last",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: func() artifact.ID {
|
||||
hasher := sha256.New()
|
||||
hasher.Write([]byte("user-input"))
|
||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||
}(),
|
||||
},
|
||||
{
|
||||
name: "without alias (first)",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: "85298926ecd92ed57688f13039017160cd728f04dd0d2d10a10629007106f107",
|
||||
},
|
||||
{
|
||||
name: "always consider alias (first)",
|
||||
alias: source.Alias{
|
||||
Name: "alias",
|
||||
Version: "version",
|
||||
},
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
Layers: []source.LayerMetadata{
|
||||
{
|
||||
Digest: "a",
|
||||
},
|
||||
{
|
||||
Digest: "b",
|
||||
},
|
||||
{
|
||||
Digest: "c",
|
||||
},
|
||||
},
|
||||
RawManifest: []byte("raw-manifest"),
|
||||
},
|
||||
want: "a8717e42449960c1dd4963f2f22bd69c7c105e7e82445be0a65aa1825d62ff0d",
|
||||
},
|
||||
{
|
||||
name: "without alias (last)",
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: "ab0dff627d80b9753193d7280bec8f45e8ec6b4cb0912c6fffcf7cd782d9739e",
|
||||
},
|
||||
{
|
||||
name: "always consider alias (last)",
|
||||
alias: source.Alias{
|
||||
Name: "alias",
|
||||
Version: "version",
|
||||
},
|
||||
metadata: source.ImageMetadata{
|
||||
UserInput: "user-input",
|
||||
},
|
||||
want: "fe86c0eecd5654d3c0c0b2176aa394aef6440347c241aa8d9b628dfdde4287cf",
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equal(t, tt.want, deriveIDFromStereoscopeImage(tt.alias, tt.metadata))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_Describe(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user