mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 02:26:42 +01:00
feat: Add support for scanning GGUF models from OCI registries (#4335)
--------- Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com> Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com> Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
3a23cfff1d
commit
2c5e193f7a
@ -59,7 +59,9 @@ const (
|
|||||||
nonImageSchemeHelp = ` {{.appName}} {{.command}} dir:path/to/yourproject read directly from a path on disk (any directory)
|
nonImageSchemeHelp = ` {{.appName}} {{.command}} dir:path/to/yourproject read directly from a path on disk (any directory)
|
||||||
{{.appName}} {{.command}} file:path/to/yourproject/file read directly from a path on disk (any single file)
|
{{.appName}} {{.command}} file:path/to/yourproject/file read directly from a path on disk (any single file)
|
||||||
`
|
`
|
||||||
scanSchemeHelp = "\n " + schemeHelpHeader + "\n" + imageSchemeHelp + nonImageSchemeHelp
|
modelSchemeHelp = ` {{.appName}} {{.command}} oci-model-registry:ai/llama3.2 scan an OCI model artifact from a registry (e.g. Docker Hub AI models)
|
||||||
|
`
|
||||||
|
scanSchemeHelp = "\n " + schemeHelpHeader + "\n" + imageSchemeHelp + modelSchemeHelp + nonImageSchemeHelp
|
||||||
|
|
||||||
scanHelp = scanExample + scanSchemeHelp
|
scanHelp = scanExample + scanSchemeHelp
|
||||||
)
|
)
|
||||||
|
|||||||
@ -3,10 +3,12 @@ package internal
|
|||||||
const (
|
const (
|
||||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||||
JSONSchemaVersion = "16.1.2"
|
JSONSchemaVersion = "16.1.3"
|
||||||
|
|
||||||
// Changelog
|
// Changelog
|
||||||
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).
|
// 16.1.0 - reformulated the python pdm fields (added "URL" and removed the unused "path" field).
|
||||||
// 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field)
|
// 16.1.1 - correct elf package osCpe field according to the document of systemd (also add appCpe field)
|
||||||
|
// 16.1.2 - placeholder for 16.1.2 changelog
|
||||||
|
// 16.1.3 - add GGUFFileParts to GGUFFileHeader metadata
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
@ -82,12 +82,29 @@ func assembleTypeContainer(items []any) (any, map[string]string) {
|
|||||||
return reflect.New(structType).Elem().Interface(), mapping
|
return reflect.New(structType).Elem().Interface(), mapping
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//nolint:funlen
|
||||||
func build() *jsonschema.Schema {
|
func build() *jsonschema.Schema {
|
||||||
|
// create metadata mapping first so we can use it in the Namer function for self-referential types
|
||||||
|
pkgMetadataContainer, pkgMetadataMapping := assembleTypeContainer(packagemetadata.AllTypes())
|
||||||
|
pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer)
|
||||||
|
|
||||||
|
// create a set of valid metadata display names for lookup
|
||||||
|
// (since Namer now returns display names, the schema definitions use display names as keys)
|
||||||
|
pkgMetadataDisplayNames := make(map[string]struct{}, len(pkgMetadataMapping))
|
||||||
|
for _, displayName := range pkgMetadataMapping {
|
||||||
|
pkgMetadataDisplayNames[displayName] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
reflector := &jsonschema.Reflector{
|
reflector := &jsonschema.Reflector{
|
||||||
BaseSchemaID: schemaID(),
|
BaseSchemaID: schemaID(),
|
||||||
AllowAdditionalProperties: true,
|
AllowAdditionalProperties: true,
|
||||||
Namer: func(r reflect.Type) string {
|
Namer: func(r reflect.Type) string {
|
||||||
return strings.TrimPrefix(r.Name(), "JSON")
|
name := strings.TrimPrefix(r.Name(), "JSON")
|
||||||
|
// if this is a metadata type, use the mapped name for consistent references
|
||||||
|
if mappedName, ok := pkgMetadataMapping[name]; ok {
|
||||||
|
return mappedName
|
||||||
|
}
|
||||||
|
return name
|
||||||
},
|
},
|
||||||
CommentMap: make(map[string]string),
|
CommentMap: make(map[string]string),
|
||||||
}
|
}
|
||||||
@ -123,9 +140,6 @@ func build() *jsonschema.Schema {
|
|||||||
copyAliasFieldComments(reflector.CommentMap, repoRoot)
|
copyAliasFieldComments(reflector.CommentMap, repoRoot)
|
||||||
}
|
}
|
||||||
|
|
||||||
pkgMetadataContainer, pkgMetadataMapping := assembleTypeContainer(packagemetadata.AllTypes())
|
|
||||||
pkgMetadataContainerType := reflect.TypeOf(pkgMetadataContainer)
|
|
||||||
|
|
||||||
// srcMetadataContainer := assembleTypeContainer(sourcemetadata.AllTypes())
|
// srcMetadataContainer := assembleTypeContainer(sourcemetadata.AllTypes())
|
||||||
// srcMetadataContainerType := reflect.TypeOf(srcMetadataContainer)
|
// srcMetadataContainerType := reflect.TypeOf(srcMetadataContainer)
|
||||||
|
|
||||||
@ -144,11 +158,10 @@ func build() *jsonschema.Schema {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
displayName, ok := pkgMetadataMapping[typeName]
|
if _, ok := pkgMetadataDisplayNames[typeName]; ok {
|
||||||
if ok {
|
// this is a package metadata type (typeName is already the display name from Namer)
|
||||||
// this is a package metadata type...
|
documentSchema.Definitions[typeName] = definition
|
||||||
documentSchema.Definitions[displayName] = definition
|
metadataNames = append(metadataNames, typeName)
|
||||||
metadataNames = append(metadataNames, displayName)
|
|
||||||
} else {
|
} else {
|
||||||
// this is a type that the metadata type uses (e.g. DpkgFileRecord)
|
// this is a type that the metadata type uses (e.g. DpkgFileRecord)
|
||||||
documentSchema.Definitions[typeName] = definition
|
documentSchema.Definitions[typeName] = definition
|
||||||
|
|||||||
@ -26,6 +26,7 @@ var knownNonMetadataTypeNames = strset.New(
|
|||||||
// known to be metadata types themselves. Adding to this list will prevent the removal of the type from the schema.
|
// known to be metadata types themselves. Adding to this list will prevent the removal of the type from the schema.
|
||||||
var knownMetadataTypeNames = strset.New(
|
var knownMetadataTypeNames = strset.New(
|
||||||
"DotnetPortableExecutableEntry",
|
"DotnetPortableExecutableEntry",
|
||||||
|
"GGUFFileHeader",
|
||||||
)
|
)
|
||||||
|
|
||||||
func DiscoverTypeNames() ([]string, error) {
|
func DiscoverTypeNames() ([]string, error) {
|
||||||
|
|||||||
@ -94,25 +94,33 @@ func findMetadataDefinitionNamesInFile(path string) ([]string, []string, error)
|
|||||||
|
|
||||||
// loop over all types declared in the type declaration
|
// loop over all types declared in the type declaration
|
||||||
for _, typ := range spec.Specs {
|
for _, typ := range spec.Specs {
|
||||||
// check if the type is a struct type
|
typeSpec, ok := typ.(*ast.TypeSpec)
|
||||||
spec, ok := typ.(*ast.TypeSpec)
|
if !ok || typeSpec.Type == nil {
|
||||||
if !ok || spec.Type == nil {
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
structType, ok := spec.Type.(*ast.StructType)
|
name := typeSpec.Name.String()
|
||||||
if !ok {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if the struct type ends with "Metadata"
|
|
||||||
name := spec.Name.String()
|
|
||||||
|
|
||||||
// only look for exported types that end with "Metadata"
|
// only look for exported types that end with "Metadata"
|
||||||
if isMetadataTypeCandidate(name) {
|
if !isMetadataTypeCandidate(name) {
|
||||||
// print the full declaration of the struct type
|
continue
|
||||||
metadataDefinitions = append(metadataDefinitions, name)
|
}
|
||||||
|
|
||||||
|
metadataDefinitions = append(metadataDefinitions, name)
|
||||||
|
|
||||||
|
// handle struct types (e.g., "type FooMetadata struct {...}")
|
||||||
|
if structType, ok := typeSpec.Type.(*ast.StructType); ok {
|
||||||
usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...)
|
usedTypeNames = append(usedTypeNames, typeNamesUsedInStruct(structType)...)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle type definitions from another type (e.g., "type FooMetadata BarMetadata")
|
||||||
|
// if the base type is NOT a metadata candidate, track it as used
|
||||||
|
// (e.g., we want both ImageMetadata and OCIModelMetadata which is an alias to it)
|
||||||
|
if ident, ok := typeSpec.Type.(*ast.Ident); ok {
|
||||||
|
if !isMetadataTypeCandidate(ident.Name) {
|
||||||
|
usedTypeNames = append(usedTypeNames, ident.Name)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -6,5 +6,5 @@ import "github.com/anchore/syft/syft/source"
|
|||||||
|
|
||||||
// AllTypes returns a list of all source metadata types that syft supports (that are represented in the source.Description.Metadata field).
|
// AllTypes returns a list of all source metadata types that syft supports (that are represented in the source.Description.Metadata field).
|
||||||
func AllTypes() []any {
|
func AllTypes() []any {
|
||||||
return []any{source.DirectoryMetadata{}, source.FileMetadata{}, source.ImageMetadata{}, source.SnapMetadata{}}
|
return []any{source.DirectoryMetadata{}, source.FileMetadata{}, source.ImageMetadata{}, source.OCIModelMetadata{}, source.SnapMetadata{}}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -12,6 +12,7 @@ var jsonNameFromType = map[reflect.Type][]string{
|
|||||||
reflect.TypeOf(source.FileMetadata{}): {"file"},
|
reflect.TypeOf(source.FileMetadata{}): {"file"},
|
||||||
reflect.TypeOf(source.ImageMetadata{}): {"image"},
|
reflect.TypeOf(source.ImageMetadata{}): {"image"},
|
||||||
reflect.TypeOf(source.SnapMetadata{}): {"snap"},
|
reflect.TypeOf(source.SnapMetadata{}): {"snap"},
|
||||||
|
reflect.TypeOf(source.OCIModelMetadata{}): {"oci-model"},
|
||||||
}
|
}
|
||||||
|
|
||||||
func AllTypeNames() []string {
|
func AllTypeNames() []string {
|
||||||
|
|||||||
4248
schema/json/schema-16.1.3.json
Normal file
4248
schema/json/schema-16.1.3.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||||
"$id": "anchore.io/schema/syft/json/16.1.2/document",
|
"$id": "anchore.io/schema/syft/json/16.1.3/document",
|
||||||
"$ref": "#/$defs/Document",
|
"$ref": "#/$defs/Document",
|
||||||
"$defs": {
|
"$defs": {
|
||||||
"AlpmDbEntry": {
|
"AlpmDbEntry": {
|
||||||
@ -1478,6 +1478,13 @@
|
|||||||
"metadataHash": {
|
"metadataHash": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
"description": "MetadataKeyValuesHash is a xx64 hash of all key-value pairs from the GGUF header metadata.\nThis hash is computed over the complete header metadata (including the fields extracted\ninto typed fields above) and provides a stable identifier for the model configuration\nacross different file locations or remotes. It allows matching identical models even\nwhen stored in different repositories or with different filenames."
|
||||||
|
},
|
||||||
|
"parts": {
|
||||||
|
"items": {
|
||||||
|
"$ref": "#/$defs/GgufFileHeader"
|
||||||
|
},
|
||||||
|
"type": "array",
|
||||||
|
"description": "Parts contains headers from additional GGUF files that were merged\ninto this package during post-processing (e.g., from OCI layers without model names)."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"type": "object",
|
"type": "object",
|
||||||
|
|||||||
@ -477,7 +477,7 @@ func (c *CreateSBOMConfig) Create(ctx context.Context, src source.Source) (*sbom
|
|||||||
|
|
||||||
func findDefaultTags(src source.Description) ([]string, error) {
|
func findDefaultTags(src source.Description) ([]string, error) {
|
||||||
switch m := src.Metadata.(type) {
|
switch m := src.Metadata.(type) {
|
||||||
case source.ImageMetadata:
|
case source.ImageMetadata, source.OCIModelMetadata:
|
||||||
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||||
case source.FileMetadata, source.DirectoryMetadata:
|
case source.FileMetadata, source.DirectoryMetadata:
|
||||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||||
|
|||||||
@ -13,16 +13,18 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var _ Resolver = (*MockResolver)(nil)
|
var _ Resolver = (*MockResolver)(nil)
|
||||||
|
var _ OCIMediaTypeResolver = (*MockResolver)(nil)
|
||||||
|
|
||||||
// MockResolver implements the FileResolver interface and is intended for use *only in test code*.
|
// MockResolver implements the FileResolver interface and is intended for use *only in test code*.
|
||||||
// It provides an implementation that can resolve local filesystem paths using only a provided discrete list of file
|
// It provides an implementation that can resolve local filesystem paths using only a provided discrete list of file
|
||||||
// paths, which are typically paths to test fixtures.
|
// paths, which are typically paths to test fixtures.
|
||||||
type MockResolver struct {
|
type MockResolver struct {
|
||||||
locations []Location
|
locations []Location
|
||||||
metadata map[Coordinates]Metadata
|
metadata map[Coordinates]Metadata
|
||||||
mimeTypeIndex map[string][]Location
|
mimeTypeIndex map[string][]Location
|
||||||
extension map[string][]Location
|
mediaTypeIndex map[string][]Location
|
||||||
basename map[string][]Location
|
extension map[string][]Location
|
||||||
|
basename map[string][]Location
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewMockResolverForPaths creates a new MockResolver, where the only resolvable
|
// NewMockResolverForPaths creates a new MockResolver, where the only resolvable
|
||||||
@ -72,6 +74,34 @@ func NewMockResolverForPathsWithMetadata(metadata map[Coordinates]Metadata) *Moc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NewMockResolverForMediaTypes creates a MockResolver that can resolve files by media type.
|
||||||
|
// The mediaTypes map specifies which locations should be returned for each media type.
|
||||||
|
func NewMockResolverForMediaTypes(mediaTypes map[string][]Location) *MockResolver {
|
||||||
|
var locations []Location
|
||||||
|
mediaTypeIndex := make(map[string][]Location)
|
||||||
|
extension := make(map[string][]Location)
|
||||||
|
basename := make(map[string][]Location)
|
||||||
|
|
||||||
|
for mediaType, locs := range mediaTypes {
|
||||||
|
mediaTypeIndex[mediaType] = append(mediaTypeIndex[mediaType], locs...)
|
||||||
|
for _, l := range locs {
|
||||||
|
locations = append(locations, l)
|
||||||
|
ext := path.Ext(l.RealPath)
|
||||||
|
extension[ext] = append(extension[ext], l)
|
||||||
|
bn := path.Base(l.RealPath)
|
||||||
|
basename[bn] = append(basename[bn], l)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &MockResolver{
|
||||||
|
locations: locations,
|
||||||
|
metadata: make(map[Coordinates]Metadata),
|
||||||
|
mediaTypeIndex: mediaTypeIndex,
|
||||||
|
extension: extension,
|
||||||
|
basename: basename,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// HasPath indicates if the given path exists in the underlying source.
|
// HasPath indicates if the given path exists in the underlying source.
|
||||||
func (r MockResolver) HasPath(path string) bool {
|
func (r MockResolver) HasPath(path string) bool {
|
||||||
for _, l := range r.locations {
|
for _, l := range r.locations {
|
||||||
@ -189,6 +219,14 @@ func (r MockResolver) FilesByMIMEType(types ...string) ([]Location, error) {
|
|||||||
return locations, nil
|
return locations, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r MockResolver) FilesByMediaType(types ...string) ([]Location, error) {
|
||||||
|
var locations []Location
|
||||||
|
for _, ty := range types {
|
||||||
|
locations = append(locations, r.mediaTypeIndex[ty]...)
|
||||||
|
}
|
||||||
|
return locations, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (r MockResolver) FilesByExtension(extensions ...string) ([]Location, error) {
|
func (r MockResolver) FilesByExtension(extensions ...string) ([]Location, error) {
|
||||||
var results []Location
|
var results []Location
|
||||||
for _, ext := range extensions {
|
for _, ext := range extensions {
|
||||||
|
|||||||
@ -52,6 +52,17 @@ type PathResolver interface {
|
|||||||
RelativeFileByPath(_ Location, path string) *Location
|
RelativeFileByPath(_ Location, path string) *Location
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OCIMediaTypeResolver resolves single files as a layer in an OCI artifact for a given media type.
|
||||||
|
type OCIMediaTypeResolver interface {
|
||||||
|
// FilesByMediaType fetches a set of file references which the contents have been classified as one of the given Media Types.
|
||||||
|
// The implementation for this may vary, however, this was first implemented to classify ai globs stored in OCI images.
|
||||||
|
// The following considerations should be made when implementing:
|
||||||
|
// - only return locations to files (NOT directories)
|
||||||
|
// - locations for the implementer should be "/" and the fsid should be the layer digest the glob was found
|
||||||
|
// - locations should be used with the FileContents API to return readers to the temporary data
|
||||||
|
FilesByMediaType(types ...string) ([]Location, error)
|
||||||
|
}
|
||||||
|
|
||||||
// LocationResolver provides iteration over all file locations in a source.
|
// LocationResolver provides iteration over all file locations in a source.
|
||||||
type LocationResolver interface {
|
type LocationResolver interface {
|
||||||
// AllLocations returns a channel of all file references from the underlying source.
|
// AllLocations returns a channel of all file references from the underlying source.
|
||||||
|
|||||||
@ -35,6 +35,7 @@ const (
|
|||||||
spdxPrimaryPurposeOther = "OTHER"
|
spdxPrimaryPurposeOther = "OTHER"
|
||||||
|
|
||||||
prefixImage = "Image"
|
prefixImage = "Image"
|
||||||
|
prefixOCIModel = "OCIModel"
|
||||||
prefixDirectory = "Directory"
|
prefixDirectory = "Directory"
|
||||||
prefixFile = "File"
|
prefixFile = "File"
|
||||||
prefixSnap = "Snap"
|
prefixSnap = "Snap"
|
||||||
@ -215,6 +216,36 @@ func toRootPackage(s source.Description) *spdx.Package {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case source.OCIModelMetadata:
|
||||||
|
prefix = prefixOCIModel
|
||||||
|
purpose = spdxPrimaryPurposeContainer
|
||||||
|
|
||||||
|
qualifiers := packageurl.Qualifiers{
|
||||||
|
{
|
||||||
|
Key: "arch",
|
||||||
|
Value: m.Architecture,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ref, _ := reference.Parse(m.UserInput)
|
||||||
|
if ref, ok := ref.(reference.NamedTagged); ok {
|
||||||
|
qualifiers = append(qualifiers, packageurl.Qualifier{
|
||||||
|
Key: "tag",
|
||||||
|
Value: ref.Tag(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
c := toChecksum(m.ManifestDigest)
|
||||||
|
if c != nil {
|
||||||
|
checksums = append(checksums, *c)
|
||||||
|
purl = &packageurl.PackageURL{
|
||||||
|
Type: "oci",
|
||||||
|
Name: s.Name,
|
||||||
|
Version: m.ManifestDigest,
|
||||||
|
Qualifiers: qualifiers,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
case source.DirectoryMetadata:
|
case source.DirectoryMetadata:
|
||||||
prefix = prefixDirectory
|
prefix = prefixDirectory
|
||||||
purpose = spdxPrimaryPurposeFile
|
purpose = spdxPrimaryPurposeFile
|
||||||
|
|||||||
@ -316,6 +316,81 @@ func Test_toFormatModel(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
in: sbom.SBOM{
|
||||||
|
Source: source.Description{
|
||||||
|
Name: "llama",
|
||||||
|
Version: "sha256:d34db33f",
|
||||||
|
Supplier: "Model Provider",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "model-repo/llama:latest",
|
||||||
|
ManifestDigest: "sha256:d34db33f",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Artifacts: sbom.Artifacts{
|
||||||
|
Packages: pkg.NewCollection(pkg.Package{
|
||||||
|
Name: "pkg-1",
|
||||||
|
Version: "version-1",
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: &spdx.Document{
|
||||||
|
SPDXIdentifier: "DOCUMENT",
|
||||||
|
SPDXVersion: spdx.Version,
|
||||||
|
DataLicense: spdx.DataLicense,
|
||||||
|
DocumentName: "llama",
|
||||||
|
Packages: []*spdx.Package{
|
||||||
|
{
|
||||||
|
PackageSPDXIdentifier: "Package-pkg-1-pkg-1",
|
||||||
|
PackageName: "pkg-1",
|
||||||
|
PackageVersion: "version-1",
|
||||||
|
PackageSupplier: &spdx.Supplier{
|
||||||
|
Supplier: "Model Provider",
|
||||||
|
SupplierType: "Organization",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
PackageSPDXIdentifier: "DocumentRoot-OCIModel-llama",
|
||||||
|
PackageName: "llama",
|
||||||
|
PackageVersion: "sha256:d34db33f",
|
||||||
|
PrimaryPackagePurpose: "CONTAINER",
|
||||||
|
PackageChecksums: []spdx.Checksum{{Algorithm: "SHA256", Value: "d34db33f"}},
|
||||||
|
PackageExternalReferences: []*v2_3.PackageExternalReference{
|
||||||
|
{
|
||||||
|
Category: "PACKAGE-MANAGER",
|
||||||
|
RefType: "purl",
|
||||||
|
Locator: "pkg:oci/llama@sha256%3Ad34db33f?arch=&tag=latest",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
PackageSupplier: &spdx.Supplier{
|
||||||
|
Supplier: "Model Provider",
|
||||||
|
SupplierType: "Organization",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Relationships: []*spdx.Relationship{
|
||||||
|
{
|
||||||
|
RefA: spdx.DocElementID{
|
||||||
|
ElementRefID: "DocumentRoot-OCIModel-llama",
|
||||||
|
},
|
||||||
|
RefB: spdx.DocElementID{
|
||||||
|
ElementRefID: "Package-pkg-1-pkg-1",
|
||||||
|
},
|
||||||
|
Relationship: spdx.RelationshipContains,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
RefA: spdx.DocElementID{
|
||||||
|
ElementRefID: "DOCUMENT",
|
||||||
|
},
|
||||||
|
RefB: spdx.DocElementID{
|
||||||
|
ElementRefID: "DocumentRoot-OCIModel-llama",
|
||||||
|
},
|
||||||
|
Relationship: spdx.RelationshipDescribes,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
@ -122,6 +122,9 @@ func toPath(s source.Description, p pkg.Package) string {
|
|||||||
case source.ImageMetadata:
|
case source.ImageMetadata:
|
||||||
image := strings.ReplaceAll(metadata.UserInput, ":/", "//")
|
image := strings.ReplaceAll(metadata.UserInput, ":/", "//")
|
||||||
return fmt.Sprintf("%s:/%s", image, packagePath)
|
return fmt.Sprintf("%s:/%s", image, packagePath)
|
||||||
|
case source.OCIModelMetadata:
|
||||||
|
image := strings.ReplaceAll(metadata.UserInput, ":/", "//")
|
||||||
|
return fmt.Sprintf("%s:/%s", image, packagePath)
|
||||||
case source.FileMetadata:
|
case source.FileMetadata:
|
||||||
path := trimRelative(metadata.Path)
|
path := trimRelative(metadata.Path)
|
||||||
if isArchive(metadata.Path) {
|
if isArchive(metadata.Path) {
|
||||||
|
|||||||
@ -178,6 +178,11 @@ func Test_toGithubModel(t *testing.T) {
|
|||||||
metadata: source.SnapMetadata{},
|
metadata: source.SnapMetadata{},
|
||||||
testPath: "name:/etc",
|
testPath: "name:/etc",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
metadata: source.OCIModelMetadata{UserInput: "model-repo/llama:latest"},
|
||||||
|
testPath: "model-repo/llama:latest:/etc",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
|
|||||||
@ -12,6 +12,8 @@ func DocumentName(src source.Description) string {
|
|||||||
switch metadata := src.Metadata.(type) {
|
switch metadata := src.Metadata.(type) {
|
||||||
case source.ImageMetadata:
|
case source.ImageMetadata:
|
||||||
return metadata.UserInput
|
return metadata.UserInput
|
||||||
|
case source.OCIModelMetadata:
|
||||||
|
return metadata.UserInput
|
||||||
case source.DirectoryMetadata:
|
case source.DirectoryMetadata:
|
||||||
return metadata.Path
|
return metadata.Path
|
||||||
case source.FileMetadata:
|
case source.FileMetadata:
|
||||||
|
|||||||
@ -54,6 +54,17 @@ func Test_DocumentName(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expected: "some/name",
|
expected: "some/name",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
srcMetadata: source.Description{
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "model-repo/name:tag",
|
||||||
|
ID: "id",
|
||||||
|
ManifestDigest: "digest",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "model-repo/name:tag",
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "named",
|
name: "named",
|
||||||
srcMetadata: source.Description{
|
srcMetadata: source.Description{
|
||||||
|
|||||||
@ -14,6 +14,7 @@ import (
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
InputImage = "image"
|
InputImage = "image"
|
||||||
|
InputOCIModel = "oci-model"
|
||||||
InputDirectory = "dir"
|
InputDirectory = "dir"
|
||||||
InputFile = "file"
|
InputFile = "file"
|
||||||
InputSnap = "snap"
|
InputSnap = "snap"
|
||||||
@ -30,6 +31,8 @@ func DocumentNamespace(name string, src source.Description, desc sbom.Descriptor
|
|||||||
switch src.Metadata.(type) {
|
switch src.Metadata.(type) {
|
||||||
case source.ImageMetadata:
|
case source.ImageMetadata:
|
||||||
input = InputImage
|
input = InputImage
|
||||||
|
case source.OCIModelMetadata:
|
||||||
|
input = InputOCIModel
|
||||||
case source.DirectoryMetadata:
|
case source.DirectoryMetadata:
|
||||||
input = InputDirectory
|
input = InputDirectory
|
||||||
case source.FileMetadata:
|
case source.FileMetadata:
|
||||||
|
|||||||
@ -61,6 +61,18 @@ func Test_DocumentNamespace(t *testing.T) {
|
|||||||
},
|
},
|
||||||
expected: "https://anchore.com/syft/snap/my-name-",
|
expected: "https://anchore.com/syft/snap/my-name-",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
inputName: "my-name",
|
||||||
|
src: source.Description{
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "model-repo/name:tag",
|
||||||
|
ID: "id",
|
||||||
|
ManifestDigest: "digest",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: "https://anchore.com/syft/oci-model/my-name-",
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
|||||||
@ -83,7 +83,7 @@ func SourceInfo(p pkg.Package) string {
|
|||||||
case pkg.TerraformPkg:
|
case pkg.TerraformPkg:
|
||||||
answer = "acquired package info from Terraform dependency lock file"
|
answer = "acquired package info from Terraform dependency lock file"
|
||||||
case pkg.ModelPkg:
|
case pkg.ModelPkg:
|
||||||
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
answer = "acquired package info from AI artifact (e.g. GGUF File)"
|
||||||
default:
|
default:
|
||||||
answer = "acquired package info from the following paths"
|
answer = "acquired package info from the following paths"
|
||||||
}
|
}
|
||||||
|
|||||||
@ -190,6 +190,37 @@ func TestSource_UnmarshalJSON(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
input: []byte(`{
|
||||||
|
"id": "foobar",
|
||||||
|
"type": "oci-model",
|
||||||
|
"metadata": {
|
||||||
|
"userInput": "model-repo/llama:latest",
|
||||||
|
"imageID": "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a",
|
||||||
|
"manifestDigest": "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c",
|
||||||
|
"mediaType": "application/vnd.oci.image.manifest.v1+json",
|
||||||
|
"tags": [],
|
||||||
|
"imageSize": 5576169,
|
||||||
|
"layers": [],
|
||||||
|
"repoDigests": []
|
||||||
|
}
|
||||||
|
}`),
|
||||||
|
expected: &Source{
|
||||||
|
ID: "foobar",
|
||||||
|
Type: "oci-model",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "model-repo/llama:latest",
|
||||||
|
ID: "sha256:e7b300aee9f9bf3433d32bc9305bfdd22183beb59d933b48d77ab56ba53a197a",
|
||||||
|
ManifestDigest: "sha256:e515aad2ed234a5072c4d2ef86a1cb77d5bfe4b11aa865d9214875734c4eeb3c",
|
||||||
|
MediaType: "application/vnd.oci.image.manifest.v1+json",
|
||||||
|
Tags: []string{},
|
||||||
|
Size: 5576169,
|
||||||
|
Layers: []source.LayerMetadata{},
|
||||||
|
RepoDigests: []string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "unknown source type",
|
name: "unknown source type",
|
||||||
input: []byte(`{
|
input: []byte(`{
|
||||||
|
|||||||
@ -325,7 +325,17 @@ func toSourceModel(src source.Description) model.Source {
|
|||||||
Metadata: src.Metadata,
|
Metadata: src.Metadata,
|
||||||
}
|
}
|
||||||
|
|
||||||
if metadata, ok := src.Metadata.(source.ImageMetadata); ok {
|
switch metadata := src.Metadata.(type) {
|
||||||
|
case source.ImageMetadata:
|
||||||
|
// ensure that empty collections are not shown as null
|
||||||
|
if metadata.RepoDigests == nil {
|
||||||
|
metadata.RepoDigests = []string{}
|
||||||
|
}
|
||||||
|
if metadata.Tags == nil {
|
||||||
|
metadata.Tags = []string{}
|
||||||
|
}
|
||||||
|
m.Metadata = metadata
|
||||||
|
case source.OCIModelMetadata:
|
||||||
// ensure that empty collections are not shown as null
|
// ensure that empty collections are not shown as null
|
||||||
if metadata.RepoDigests == nil {
|
if metadata.RepoDigests == nil {
|
||||||
metadata.RepoDigests = []string{}
|
metadata.RepoDigests = []string{}
|
||||||
|
|||||||
@ -161,6 +161,34 @@ func Test_toSourceModel(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
src: source.Description{
|
||||||
|
ID: "test-id",
|
||||||
|
Name: "some-name",
|
||||||
|
Version: "some-version",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: model.Source{
|
||||||
|
ID: "test-id",
|
||||||
|
Name: "some-name",
|
||||||
|
Version: "some-version",
|
||||||
|
Type: "oci-model",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
RepoDigests: []string{},
|
||||||
|
Tags: []string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
// below are regression tests for when the name/version are not provided
|
// below are regression tests for when the name/version are not provided
|
||||||
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
||||||
{
|
{
|
||||||
@ -225,6 +253,30 @@ func Test_toSourceModel(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model - no name/version",
|
||||||
|
src: source.Description{
|
||||||
|
ID: "test-id",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: model.Source{
|
||||||
|
ID: "test-id",
|
||||||
|
Type: "oci-model",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
RepoDigests: []string{},
|
||||||
|
Tags: []string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
|||||||
@ -130,6 +130,32 @@ func Test_toSyftSourceData(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
src: model.Source{
|
||||||
|
ID: "the-id",
|
||||||
|
Name: "some-name",
|
||||||
|
Version: "some-version",
|
||||||
|
Type: "oci-model",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: &source.Description{
|
||||||
|
ID: "the-id",
|
||||||
|
Name: "some-name",
|
||||||
|
Version: "some-version",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
// below are regression tests for when the name/version are not provided
|
// below are regression tests for when the name/version are not provided
|
||||||
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
// historically we've hoisted up the name/version from the metadata, now it is a simple pass-through
|
||||||
{
|
{
|
||||||
@ -192,6 +218,28 @@ func Test_toSyftSourceData(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model - no name/version",
|
||||||
|
src: model.Source{
|
||||||
|
ID: "the-id",
|
||||||
|
Type: "oci-model",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: &source.Description{
|
||||||
|
ID: "the-id",
|
||||||
|
Metadata: source.OCIModelMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
ID: "id...",
|
||||||
|
ManifestDigest: "digest...",
|
||||||
|
MediaType: "type...",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
t.Run(test.name, func(t *testing.T) {
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
|||||||
@ -65,6 +65,8 @@ func validateSourcePlatform(src source.Source, cfg *GetSourceConfig) error {
|
|||||||
switch meta.(type) {
|
switch meta.(type) {
|
||||||
case *source.ImageMetadata, source.ImageMetadata:
|
case *source.ImageMetadata, source.ImageMetadata:
|
||||||
return nil
|
return nil
|
||||||
|
case *source.OCIModelMetadata, source.OCIModelMetadata:
|
||||||
|
return nil
|
||||||
case *source.SnapMetadata, source.SnapMetadata:
|
case *source.SnapMetadata, source.SnapMetadata:
|
||||||
return nil
|
return nil
|
||||||
default:
|
default:
|
||||||
|
|||||||
@ -31,7 +31,8 @@ func TestGetProviders_Sources(t *testing.T) {
|
|||||||
t.Errorf("Expected no error for Sources parameter, got: %v", err)
|
t.Errorf("Expected no error for Sources parameter, got: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(providers) != 1 {
|
// Registry tag has two providers: OCIModel and Image
|
||||||
t.Errorf("Expected 1 providers, got %d", len(providers))
|
if len(providers) != 2 {
|
||||||
|
t.Errorf("Expected 2 providers, got %d", len(providers))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -111,6 +111,10 @@ func TestValidateSourcePlatform_SupportedMetadataTypes(t *testing.T) {
|
|||||||
metadata: source.FileMetadata{},
|
metadata: source.FileMetadata{},
|
||||||
wantErr: require.Error,
|
wantErr: require.Error,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "oci-model",
|
||||||
|
metadata: source.OCIModelMetadata{},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
|
|||||||
141
syft/internal/fileresolver/container_image_model.go
Normal file
141
syft/internal/fileresolver/container_image_model.go
Normal file
@ -0,0 +1,141 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ file.Resolver = (*ContainerImageModel)(nil)
|
||||||
|
var _ file.OCIMediaTypeResolver = (*ContainerImageModel)(nil)
|
||||||
|
|
||||||
|
// LayerInfo holds information about an OCI model layer file stored on disk.
|
||||||
|
type LayerInfo struct {
|
||||||
|
TempPath string // Path to the temp file on disk
|
||||||
|
MediaType string // OCI media type of the layer
|
||||||
|
}
|
||||||
|
|
||||||
|
// ContainerImageModel is a file.Resolver implementation that provides access to
|
||||||
|
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||||
|
// This does not fetch the entire model from the registry, only a sliver of it.
|
||||||
|
type ContainerImageModel struct {
|
||||||
|
tempDir string // temp directory containing all layer files
|
||||||
|
layerFiles map[string]LayerInfo // digest -> layer info (temp path + media type)
|
||||||
|
locations map[string]file.Location // digest -> location
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewContainerImageModel creates a new resolver with the given temp directory and layer files.
|
||||||
|
func NewContainerImageModel(tempDir string, layerFiles map[string]LayerInfo) *ContainerImageModel {
|
||||||
|
// Create locations for all layer files
|
||||||
|
// Each location has RealPath="/", FileSystemID=digest, AccessPath="/"
|
||||||
|
locations := make(map[string]file.Location, len(layerFiles))
|
||||||
|
for digest := range layerFiles {
|
||||||
|
// Use NewVirtualLocationFromCoordinates with digest as FileSystemID
|
||||||
|
coords := file.NewCoordinates("/", digest)
|
||||||
|
locations[digest] = file.NewVirtualLocationFromCoordinates(coords, "/")
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ContainerImageModel{
|
||||||
|
tempDir: tempDir,
|
||||||
|
layerFiles: layerFiles,
|
||||||
|
locations: locations,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByMediaType returns locations for layers matching the given media type patterns.
|
||||||
|
// Patterns support glob-style matching (e.g., "application/vnd.docker.ai*").
|
||||||
|
func (r *ContainerImageModel) FilesByMediaType(types ...string) ([]file.Location, error) {
|
||||||
|
var matches []file.Location
|
||||||
|
|
||||||
|
for digest, info := range r.layerFiles {
|
||||||
|
for _, pattern := range types {
|
||||||
|
matched, err := filepath.Match(pattern, info.MediaType)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid media type pattern %q: %w", pattern, err)
|
||||||
|
}
|
||||||
|
if matched {
|
||||||
|
if loc, ok := r.locations[digest]; ok {
|
||||||
|
matches = append(matches, loc)
|
||||||
|
}
|
||||||
|
break // Don't add the same location twice
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return matches, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileContentsByLocation returns the contents of the file at the given location.
|
||||||
|
// The location's FileSystemID contains the layer digest, which is used to look up the temp file.
|
||||||
|
// This method is used as part of the content selection in the generic cataloger when locations
|
||||||
|
// are returned by searching for contents by media type.
|
||||||
|
func (r *ContainerImageModel) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||||
|
// Look up the temp file path using the digest stored in FileSystemID
|
||||||
|
digest := location.FileSystemID
|
||||||
|
info, ok := r.layerFiles[digest]
|
||||||
|
if !ok {
|
||||||
|
return nil, fmt.Errorf("no file found for digest %q", digest)
|
||||||
|
}
|
||||||
|
return os.Open(info.TempPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||||
|
func (r *ContainerImageModel) FileMetadataByLocation(_ file.Location) (m file.Metadata, err error) {
|
||||||
|
return m, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasPath checks if the given path exists in the resolver.
|
||||||
|
func (r *ContainerImageModel) HasPath(path string) bool {
|
||||||
|
// The virtual path is "/" for all files
|
||||||
|
if path == "/" && len(r.layerFiles) > 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByPath returns locations for files matching the given paths.
|
||||||
|
func (r *ContainerImageModel) FilesByPath(_ ...string) ([]file.Location, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||||
|
func (r *ContainerImageModel) FilesByGlob(_ ...string) ([]file.Location, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||||
|
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||||
|
func (r *ContainerImageModel) FilesByMIMEType(_ ...string) ([]file.Location, error) {
|
||||||
|
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||||
|
// This is not applicable for OCI model artifacts.
|
||||||
|
func (r *ContainerImageModel) RelativeFileByPath(_ file.Location, _ string) *file.Location {
|
||||||
|
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AllLocations returns all file locations in the resolver.
|
||||||
|
func (r *ContainerImageModel) AllLocations(ctx context.Context) <-chan file.Location {
|
||||||
|
ch := make(chan file.Location)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
for _, loc := range r.locations {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case ch <- loc:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch
|
||||||
|
}
|
||||||
130
syft/internal/fileresolver/container_image_model_test.go
Normal file
130
syft/internal/fileresolver/container_image_model_test.go
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
package fileresolver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
const ggufLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||||
|
|
||||||
|
func TestOCIModelResolver_FilesByMediaType(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
layerFiles map[string]LayerInfo
|
||||||
|
patterns []string
|
||||||
|
expected int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "exact match GGUF",
|
||||||
|
layerFiles: map[string]LayerInfo{
|
||||||
|
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||||
|
},
|
||||||
|
patterns: []string{ggufLayerMediaType},
|
||||||
|
expected: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "glob match docker ai",
|
||||||
|
layerFiles: map[string]LayerInfo{
|
||||||
|
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||||
|
},
|
||||||
|
patterns: []string{"application/vnd.docker.ai*"},
|
||||||
|
expected: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no match",
|
||||||
|
layerFiles: map[string]LayerInfo{
|
||||||
|
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||||
|
},
|
||||||
|
patterns: []string{"application/json"},
|
||||||
|
expected: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple patterns match multiple files",
|
||||||
|
layerFiles: map[string]LayerInfo{
|
||||||
|
"sha256:abc123": {TempPath: filepath.Join(tempDir, "f1"), MediaType: ggufLayerMediaType},
|
||||||
|
"sha256:def456": {TempPath: filepath.Join(tempDir, "f2"), MediaType: "application/octet-stream"},
|
||||||
|
},
|
||||||
|
patterns: []string{ggufLayerMediaType, "application/octet-stream"},
|
||||||
|
expected: 2,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
resolver := NewContainerImageModel(tempDir, test.layerFiles)
|
||||||
|
|
||||||
|
locations, err := resolver.FilesByMediaType(test.patterns...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, locations, test.expected)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOCIModelResolver_FileContentsByLocation(t *testing.T) {
|
||||||
|
tempDir := t.TempDir()
|
||||||
|
content := []byte("test gguf content")
|
||||||
|
|
||||||
|
tempFile := filepath.Join(tempDir, "test.gguf")
|
||||||
|
require.NoError(t, os.WriteFile(tempFile, content, 0600))
|
||||||
|
|
||||||
|
digest := "sha256:abc123"
|
||||||
|
layerFiles := map[string]LayerInfo{
|
||||||
|
digest: {TempPath: tempFile, MediaType: ggufLayerMediaType},
|
||||||
|
}
|
||||||
|
|
||||||
|
resolver := NewContainerImageModel(tempDir, layerFiles)
|
||||||
|
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
digest string
|
||||||
|
wantErr bool
|
||||||
|
wantData []byte
|
||||||
|
errSubstr string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid location returns content",
|
||||||
|
digest: digest,
|
||||||
|
wantErr: false,
|
||||||
|
wantData: content,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "invalid digest returns error",
|
||||||
|
digest: "sha256:invalid",
|
||||||
|
wantErr: true,
|
||||||
|
errSubstr: "no file found for digest",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
loc := file.NewVirtualLocationFromCoordinates(
|
||||||
|
file.NewCoordinates("/", test.digest),
|
||||||
|
"/",
|
||||||
|
)
|
||||||
|
|
||||||
|
reader, err := resolver.FileContentsByLocation(loc)
|
||||||
|
|
||||||
|
if test.wantErr {
|
||||||
|
require.Error(t, err)
|
||||||
|
assert.Contains(t, err.Error(), test.errSubstr)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
data, err := io.ReadAll(reader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, test.wantData, data)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -9,8 +9,17 @@ import (
|
|||||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
catalogerName = "gguf-cataloger"
|
||||||
|
ggufLayerMediaType = "application/vnd.docker.ai*"
|
||||||
|
)
|
||||||
|
|
||||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||||
|
// It supports both traditional file-based discovery and OCI layer-aware discovery
|
||||||
|
// when the source for the SBOM is the oci model source
|
||||||
func NewGGUFCataloger() pkg.Cataloger {
|
func NewGGUFCataloger() pkg.Cataloger {
|
||||||
return generic.NewCataloger("gguf-cataloger").
|
return generic.NewCataloger(catalogerName).
|
||||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
WithParserByGlobs(parseGGUFModel, "**/*.gguf").
|
||||||
|
WithParserByMediaType(parseGGUFModel, ggufLayerMediaType).
|
||||||
|
WithProcessors(ggufMergeProcessor)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -122,6 +122,10 @@ func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
|||||||
|
|
||||||
// extractModelNameFromPath extracts the model name from the file path
|
// extractModelNameFromPath extracts the model name from the file path
|
||||||
func extractModelNameFromPath(path string) string {
|
func extractModelNameFromPath(path string) string {
|
||||||
|
// we do not want to return a name from filepath if it's not a distinct gguf file
|
||||||
|
if !strings.Contains(path, ".gguf") {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
// Get the base filename
|
// Get the base filename
|
||||||
base := filepath.Base(path)
|
base := filepath.Base(path)
|
||||||
|
|
||||||
|
|||||||
59
syft/pkg/cataloger/ai/processor.go
Normal file
59
syft/pkg/cataloger/ai/processor.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ggufMergeProcessor consolidates multiple GGUF packages into a single package
|
||||||
|
// representing the AI model. When scanning OCI images with multiple layers,
|
||||||
|
// each layer may produce a separate package. This processor finds the package
|
||||||
|
// with a name and merges metadata from nameless packages into its GGUFFileParts field.
|
||||||
|
// Only packages with a non-empty name are returned in the final result.
|
||||||
|
func ggufMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
if err != nil {
|
||||||
|
return pkgs, rels, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(pkgs) == 0 {
|
||||||
|
return pkgs, rels, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Separate packages with names from those without
|
||||||
|
var namedPkgs []pkg.Package
|
||||||
|
var namelessHeaders []pkg.GGUFFileHeader
|
||||||
|
|
||||||
|
for _, p := range pkgs {
|
||||||
|
if p.Name != "" {
|
||||||
|
namedPkgs = append(namedPkgs, p)
|
||||||
|
} else {
|
||||||
|
if header, ok := p.Metadata.(pkg.GGUFFileHeader); ok {
|
||||||
|
// We do not want a kv hash for nameless headers
|
||||||
|
header.MetadataKeyValuesHash = ""
|
||||||
|
namelessHeaders = append(namelessHeaders, header)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If there are no named packages, return nothing
|
||||||
|
if len(namedPkgs) == 0 {
|
||||||
|
return nil, rels, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// merge nameless headers into a single named package;
|
||||||
|
// if there are multiple named packages, return them without trying to merge headers.
|
||||||
|
// we cannot determine which nameless headers belong to which package
|
||||||
|
// this is because the order we receive the gguf headers in is not guaranteed
|
||||||
|
// to match the layer order in the original oci image
|
||||||
|
if len(namedPkgs) == 1 && len(namelessHeaders) > 0 {
|
||||||
|
winner := &namedPkgs[0]
|
||||||
|
if header, ok := winner.Metadata.(pkg.GGUFFileHeader); ok {
|
||||||
|
header.Parts = namelessHeaders
|
||||||
|
winner.Metadata = header
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Largest number of key value
|
||||||
|
|
||||||
|
return namedPkgs, rels, err
|
||||||
|
}
|
||||||
63
syft/pkg/cataloger/ai/processor_test.go
Normal file
63
syft/pkg/cataloger/ai/processor_test.go
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/pkg"
|
||||||
|
)
|
||||||
|
|
||||||
|
func Test_ggufMergeProcessor(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
pkgs []pkg.Package
|
||||||
|
wantPkgCount int
|
||||||
|
wantFilePartCount int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single named package merges nameless headers",
|
||||||
|
pkgs: []pkg.Package{
|
||||||
|
{Name: "model", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "abc"}},
|
||||||
|
{Name: "", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "part1"}},
|
||||||
|
{Name: "", Metadata: pkg.GGUFFileHeader{MetadataKeyValuesHash: "part2"}},
|
||||||
|
},
|
||||||
|
wantPkgCount: 1,
|
||||||
|
wantFilePartCount: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple named packages returns all without merging",
|
||||||
|
pkgs: []pkg.Package{
|
||||||
|
{Name: "model1", Metadata: pkg.GGUFFileHeader{}},
|
||||||
|
{Name: "model2", Metadata: pkg.GGUFFileHeader{}},
|
||||||
|
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||||
|
},
|
||||||
|
wantPkgCount: 2,
|
||||||
|
wantFilePartCount: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no named packages returns empty result",
|
||||||
|
pkgs: []pkg.Package{
|
||||||
|
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||||
|
{Name: "", Metadata: pkg.GGUFFileHeader{}},
|
||||||
|
},
|
||||||
|
wantPkgCount: 0,
|
||||||
|
wantFilePartCount: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
got, _, err := ggufMergeProcessor(test.pkgs, nil, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Len(t, got, test.wantPkgCount)
|
||||||
|
|
||||||
|
if test.wantPkgCount == 1 && test.wantFilePartCount > 0 {
|
||||||
|
header, ok := got[0].Metadata.(pkg.GGUFFileHeader)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Len(t, header.Parts, test.wantFilePartCount)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -2164,6 +2164,11 @@ func (p *panicyResolver) FilesByMIMEType(_ ...string) ([]file.Location, error) {
|
|||||||
return nil, errors.New("not implemented")
|
return nil, errors.New("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *panicyResolver) FilesByMediaType(_ ...string) ([]file.Location, error) {
|
||||||
|
p.searchCalled = true
|
||||||
|
return nil, errors.New("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
func (p *panicyResolver) RelativeFileByPath(_ file.Location, _ string) *file.Location {
|
func (p *panicyResolver) RelativeFileByPath(_ file.Location, _ string) *file.Location {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@ -114,6 +114,26 @@ func (c *Cataloger) WithParserByPath(parser Parser, paths ...string) *Cataloger
|
|||||||
return c
|
return c
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (c *Cataloger) WithParserByMediaType(parser Parser, types ...string) *Cataloger {
|
||||||
|
c.requesters = append(c.requesters,
|
||||||
|
func(resolver file.Resolver, _ Environment) []request {
|
||||||
|
var requests []request
|
||||||
|
log.WithFields("mediatypes", types).Trace("searching content matching mediatypes")
|
||||||
|
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
|
||||||
|
if !ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
matches, err := ociResolver.FilesByMediaType(types...)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
requests = append(requests, makeRequests(parser, matches)...)
|
||||||
|
return requests
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
|
func (c *Cataloger) WithProcessors(processors ...Processor) *Cataloger {
|
||||||
for _, p := range processors {
|
for _, p := range processors {
|
||||||
c.processors = append(c.processors, processorWrapper{Processor: p})
|
c.processors = append(c.processors, processorWrapper{Processor: p})
|
||||||
|
|||||||
@ -138,6 +138,10 @@ func (m spyReturningFileResolver) FilesByMIMEType(types ...string) ([]file.Locat
|
|||||||
return m.m.FilesByMIMEType(types...)
|
return m.m.FilesByMIMEType(types...)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (m spyReturningFileResolver) FilesByMediaType(types ...string) ([]file.Location, error) {
|
||||||
|
return m.m.FilesByMediaType(types...)
|
||||||
|
}
|
||||||
|
|
||||||
func (m spyReturningFileResolver) RelativeFileByPath(f file.Location, path string) *file.Location {
|
func (m spyReturningFileResolver) RelativeFileByPath(f file.Location, path string) *file.Location {
|
||||||
return m.m.RelativeFileByPath(f, path)
|
return m.m.RelativeFileByPath(f, path)
|
||||||
}
|
}
|
||||||
@ -189,6 +193,55 @@ func TestClosesFileOnParserPanic(t *testing.T) {
|
|||||||
require.True(t, spy.closed)
|
require.True(t, spy.closed)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_CatalogerWithParserByMediaType(t *testing.T) {
|
||||||
|
allParsedPaths := make(map[string]bool)
|
||||||
|
parser := func(_ context.Context, resolver file.Resolver, env *Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
|
allParsedPaths[reader.Path()] = true
|
||||||
|
contents, err := io.ReadAll(reader)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
if len(contents) == 0 {
|
||||||
|
return nil, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
p := pkg.Package{
|
||||||
|
Name: string(contents),
|
||||||
|
Locations: file.NewLocationSet(reader.Location),
|
||||||
|
}
|
||||||
|
|
||||||
|
return []pkg.Package{p}, nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
upstream := "media-type-cataloger"
|
||||||
|
|
||||||
|
// Create locations with test fixtures that exist on disk
|
||||||
|
loc1 := file.NewLocation("test-fixtures/a-path.txt")
|
||||||
|
loc2 := file.NewLocation("test-fixtures/another-path.txt")
|
||||||
|
|
||||||
|
// Create a mock resolver that maps media types to locations
|
||||||
|
resolver := file.NewMockResolverForMediaTypes(map[string][]file.Location{
|
||||||
|
"application/vnd.test.model": {loc1, loc2},
|
||||||
|
})
|
||||||
|
|
||||||
|
cataloger := NewCataloger(upstream).
|
||||||
|
WithParserByMediaType(parser, "application/vnd.test.model")
|
||||||
|
|
||||||
|
actualPkgs, _, err := cataloger.Catalog(context.Background(), resolver)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Verify both files were parsed
|
||||||
|
assert.True(t, allParsedPaths["test-fixtures/a-path.txt"], "expected a-path.txt to be parsed")
|
||||||
|
assert.True(t, allParsedPaths["test-fixtures/another-path.txt"], "expected another-path.txt to be parsed")
|
||||||
|
|
||||||
|
// Verify packages were created
|
||||||
|
assert.Len(t, actualPkgs, 2)
|
||||||
|
|
||||||
|
// Verify FoundBy is set correctly
|
||||||
|
for _, p := range actualPkgs {
|
||||||
|
assert.Equal(t, upstream, p.FoundBy)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func Test_genericCatalogerReturnsErrors(t *testing.T) {
|
func Test_genericCatalogerReturnsErrors(t *testing.T) {
|
||||||
genericErrorReturning := NewCataloger("error returning").WithParserByGlobs(func(ctx context.Context, resolver file.Resolver, environment *Environment, locationReader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
genericErrorReturning := NewCataloger("error returning").WithParserByGlobs(func(ctx context.Context, resolver file.Resolver, environment *Environment, locationReader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
return []pkg.Package{
|
return []pkg.Package{
|
||||||
|
|||||||
@ -208,6 +208,11 @@ func (r *ObservingResolver) FilesByMIMEType(types ...string) ([]file.Location, e
|
|||||||
return locs, err
|
return locs, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FilesByMediaType returns files matching the given media types.
|
||||||
|
func (r *ObservingResolver) FilesByMediaType(_ ...string) ([]file.Location, error) {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
// RelativeFileByPath returns a file at a path relative to the given location.
|
// RelativeFileByPath returns a file at a path relative to the given location.
|
||||||
func (r *ObservingResolver) RelativeFileByPath(location file.Location, path string) *file.Location {
|
func (r *ObservingResolver) RelativeFileByPath(location file.Location, path string) *file.Location {
|
||||||
const methodName = "RelativeFileByPath"
|
const methodName = "RelativeFileByPath"
|
||||||
|
|||||||
@ -80,6 +80,10 @@ func (r *rpmdbTestFileResolverMock) FilesByMIMEType(...string) ([]file.Location,
|
|||||||
return nil, fmt.Errorf("not implemented")
|
return nil, fmt.Errorf("not implemented")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r *rpmdbTestFileResolverMock) FilesByMediaType(...string) ([]file.Location, error) {
|
||||||
|
return nil, fmt.Errorf("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseRpmDB(t *testing.T) {
|
func TestParseRpmDB(t *testing.T) {
|
||||||
ctx := context.TODO()
|
ctx := context.TODO()
|
||||||
packagesLocation := file.NewLocation("test-fixtures/Packages")
|
packagesLocation := file.NewLocation("test-fixtures/Packages")
|
||||||
|
|||||||
@ -34,4 +34,8 @@ type GGUFFileHeader struct {
|
|||||||
// across different file locations or remotes. It allows matching identical models even
|
// across different file locations or remotes. It allows matching identical models even
|
||||||
// when stored in different repositories or with different filenames.
|
// when stored in different repositories or with different filenames.
|
||||||
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
MetadataKeyValuesHash string `json:"metadataHash,omitempty" cyclonedx:"metadataHash"`
|
||||||
|
|
||||||
|
// Parts contains headers from additional GGUF files that were merged
|
||||||
|
// into this package during post-processing (e.g., from OCI layers without model names).
|
||||||
|
Parts []GGUFFileHeader `json:"parts,omitempty" cyclonedx:"parts"`
|
||||||
}
|
}
|
||||||
|
|||||||
@ -17,6 +17,7 @@ type ImageMetadata struct {
|
|||||||
Variant string `json:"architectureVariant,omitempty"`
|
Variant string `json:"architectureVariant,omitempty"`
|
||||||
OS string `json:"os"`
|
OS string `json:"os"`
|
||||||
Labels map[string]string `json:"labels,omitempty"`
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty" id:"-"` // critical: do not consider annotations as an identifiable part of the source image
|
||||||
}
|
}
|
||||||
|
|
||||||
// LayerMetadata represents all static metadata that defines what a container image layer is.
|
// LayerMetadata represents all static metadata that defines what a container image layer is.
|
||||||
|
|||||||
66
syft/source/internal/image_id.go
Normal file
66
syft/source/internal/image_id.go
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DeriveImageID derives an artifact ID from the given image metadata. The order of data precedence is:
|
||||||
|
// 1. prefer a digest of the raw container image manifest
|
||||||
|
// 2. if no manifest digest is available, calculate a chain ID from the image layer metadata
|
||||||
|
// 3. if no layer metadata is available, use the user input string
|
||||||
|
//
|
||||||
|
// in all cases, if an alias is provided, it is additionally considered in the ID calculation. This allows for the
|
||||||
|
// same image to be scanned multiple times with different aliases and be considered logically different.
|
||||||
|
func DeriveImageID(alias source.Alias, metadata source.ImageMetadata) artifact.ID {
|
||||||
|
var input string
|
||||||
|
|
||||||
|
if len(metadata.RawManifest) > 0 {
|
||||||
|
input = digest.Canonical.FromBytes(metadata.RawManifest).String()
|
||||||
|
} else {
|
||||||
|
// calculate chain ID for image sources where manifestDigest is not available
|
||||||
|
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
||||||
|
input = calculateChainID(metadata.Layers)
|
||||||
|
if input == "" {
|
||||||
|
// TODO what happens here if image has no layers?
|
||||||
|
// is this case possible?
|
||||||
|
input = digest.Canonical.FromString(metadata.UserInput).String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !alias.IsEmpty() {
|
||||||
|
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
|
||||||
|
// scans the same item but is considered to be logically different, then ID will express that.
|
||||||
|
aliasStr := fmt.Sprintf(":%s@%s", alias.Name, alias.Version)
|
||||||
|
input = digest.Canonical.FromString(input + aliasStr).String()
|
||||||
|
}
|
||||||
|
|
||||||
|
return ArtifactIDFromDigest(input)
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
||||||
|
func calculateChainID(lm []source.LayerMetadata) string {
|
||||||
|
if len(lm) < 1 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// DiffID(L0) = digest of layer 0
|
||||||
|
// https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32
|
||||||
|
chainID := lm[0].Digest
|
||||||
|
id := chain(chainID, lm[1:])
|
||||||
|
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
func chain(chainID string, layers []source.LayerMetadata) string {
|
||||||
|
if len(layers) < 1 {
|
||||||
|
return chainID
|
||||||
|
}
|
||||||
|
|
||||||
|
chainID = digest.Canonical.FromString(layers[0].Digest + " " + chainID).String()
|
||||||
|
return chain(chainID, layers[1:])
|
||||||
|
}
|
||||||
278
syft/source/internal/image_id_test.go
Normal file
278
syft/source/internal/image_id_test.go
Normal file
@ -0,0 +1,278 @@
|
|||||||
|
package internal
|
||||||
|
|
||||||
|
import (
|
||||||
|
"crypto/sha256"
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDeriveImageID(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
alias source.Alias
|
||||||
|
metadata source.ImageMetadata
|
||||||
|
want artifact.ID
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "use raw manifest over chain ID or user input",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{
|
||||||
|
Digest: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "c",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
RawManifest: []byte("raw-manifest"),
|
||||||
|
},
|
||||||
|
want: func() artifact.ID {
|
||||||
|
hasher := sha256.New()
|
||||||
|
hasher.Write([]byte("raw-manifest"))
|
||||||
|
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||||
|
}(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "use chain ID over user input",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{
|
||||||
|
Digest: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "c",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
want: func() artifact.ID {
|
||||||
|
metadata := []source.LayerMetadata{
|
||||||
|
{
|
||||||
|
Digest: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "c",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
return artifact.ID(strings.TrimPrefix(calculateChainID(metadata), "sha256:"))
|
||||||
|
}(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "use user input last",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
},
|
||||||
|
want: func() artifact.ID {
|
||||||
|
hasher := sha256.New()
|
||||||
|
hasher.Write([]byte("user-input"))
|
||||||
|
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
||||||
|
}(),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "without alias (first)",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{
|
||||||
|
Digest: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "c",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
RawManifest: []byte("raw-manifest"),
|
||||||
|
},
|
||||||
|
want: "85298926ecd92ed57688f13039017160cd728f04dd0d2d10a10629007106f107",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "always consider alias (first)",
|
||||||
|
alias: source.Alias{
|
||||||
|
Name: "alias",
|
||||||
|
Version: "version",
|
||||||
|
},
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{
|
||||||
|
Digest: "a",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "b",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Digest: "c",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
RawManifest: []byte("raw-manifest"),
|
||||||
|
},
|
||||||
|
want: "a8717e42449960c1dd4963f2f22bd69c7c105e7e82445be0a65aa1825d62ff0d",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "without alias (last)",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
},
|
||||||
|
want: "ab0dff627d80b9753193d7280bec8f45e8ec6b4cb0912c6fffcf7cd782d9739e",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "always consider alias (last)",
|
||||||
|
alias: source.Alias{
|
||||||
|
Name: "alias",
|
||||||
|
Version: "version",
|
||||||
|
},
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "user-input",
|
||||||
|
},
|
||||||
|
want: "fe86c0eecd5654d3c0c0b2176aa394aef6440347c241aa8d9b628dfdde4287cf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
assert.Equal(t, tt.want, DeriveImageID(tt.alias, tt.metadata))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensures same metadata produces identical IDs
|
||||||
|
// regardless of whether the source is stereoscope-based or OCI model-based. Both source types
|
||||||
|
// use DeriveImageID with ImageMetadata
|
||||||
|
// this test captures known-good IDs that must remain
|
||||||
|
// stable across refactors to maintain consistency.
|
||||||
|
//
|
||||||
|
// IMPORTANT: If any of these tests fail after a refactor, it means the artifact ID generation
|
||||||
|
// has changed and will break consistency between stereoscope images and OCI model sources.
|
||||||
|
func TestDeriveImageID_CrossSourceConsistency(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
alias source.Alias
|
||||||
|
metadata source.ImageMetadata
|
||||||
|
wantID artifact.ID
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "raw manifest with layers - typical container image",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "docker.io/library/alpine:latest",
|
||||||
|
ManifestDigest: "sha256:abc123",
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{Digest: "sha256:layer1", MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", Size: 1000},
|
||||||
|
{Digest: "sha256:layer2", MediaType: "application/vnd.oci.image.layer.v1.tar+gzip", Size: 2000},
|
||||||
|
},
|
||||||
|
RawManifest: []byte(`{"schemaVersion":2,"mediaType":"application/vnd.oci.image.manifest.v1+json"}`),
|
||||||
|
},
|
||||||
|
// snapshot: this ID must remain stable for stereoscope/oci-model consistency
|
||||||
|
wantID: "b22c7289dd3b4785a3795c90e15d16bd66bd29b444b8974fe29ed0443ce50405",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "raw manifest only - minimal image",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
RawManifest: []byte(`{"schemaVersion":2}`),
|
||||||
|
},
|
||||||
|
// snapshot: this ID must remain stable
|
||||||
|
wantID: "bafebd36189ad3688b7b3915ea55d461e0bfcfbdde11e54b0a123999fb6be50f",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "chain ID fallback - no raw manifest",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "some-image",
|
||||||
|
Layers: []source.LayerMetadata{
|
||||||
|
{Digest: "sha256:aaa111"},
|
||||||
|
{Digest: "sha256:bbb222"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// snapshot: chain ID calculation must remain stable
|
||||||
|
wantID: "0ba9c8d271e6708871505d362e37267c5fb7910066c04d3115b89ba4d34aa180",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "user input fallback - no manifest or layers",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "registry.example.com/org/model:v1.0",
|
||||||
|
},
|
||||||
|
// snapshot: user input hash must remain stable
|
||||||
|
wantID: "a5a8733a3ba3eb99a8ebebcd40c4053f9b896ea6e2217ebc6e885573f20baccf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "with alias - same image different logical identity",
|
||||||
|
alias: source.Alias{
|
||||||
|
Name: "my-custom-name",
|
||||||
|
Version: "1.0.0",
|
||||||
|
},
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
RawManifest: []byte(`{"schemaVersion":2}`),
|
||||||
|
},
|
||||||
|
// snapshot: alias must affect ID deterministically
|
||||||
|
wantID: "9eae41c0efc30023368c29089bac007f2c9d0b40a0ee034081a17c4c22f55ac6",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "annotations has no effect on ID",
|
||||||
|
metadata: source.ImageMetadata{
|
||||||
|
UserInput: "registry.example.com/org/model:v1.0",
|
||||||
|
Annotations: map[string]string{
|
||||||
|
"annotation1": "value1",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
// snapshot: user input hash must remain stable
|
||||||
|
wantID: "a5a8733a3ba3eb99a8ebebcd40c4053f9b896ea6e2217ebc6e885573f20baccf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
got := DeriveImageID(tt.alias, tt.metadata)
|
||||||
|
assert.Equal(t, tt.wantID, got, "ID must remain stable for cross-source consistency")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateChainID(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
layers []source.LayerMetadata
|
||||||
|
want string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "empty layers returns empty string",
|
||||||
|
layers: []source.LayerMetadata{},
|
||||||
|
want: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "single layer returns digest",
|
||||||
|
layers: []source.LayerMetadata{
|
||||||
|
{Digest: "sha256:abc123"},
|
||||||
|
},
|
||||||
|
want: "sha256:abc123",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple layers calculates chain ID",
|
||||||
|
layers: []source.LayerMetadata{
|
||||||
|
{Digest: "a"},
|
||||||
|
{Digest: "b"},
|
||||||
|
{Digest: "c"},
|
||||||
|
},
|
||||||
|
// snapshot - this value should not change
|
||||||
|
want: "sha256:1dfe230e220ef0e6bc0a8978d23d72b95769e76a62879a5f49267d8c007ab43d",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
assert.Equal(t, tt.want, calculateChainID(tt.layers))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
4
syft/source/oci_model_metadata.go
Normal file
4
syft/source/oci_model_metadata.go
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
package source
|
||||||
|
|
||||||
|
// OCIModelMetadata is an AI model from an OCI registry, which is a specialized form of ImageMetadata.
|
||||||
|
type OCIModelMetadata ImageMetadata
|
||||||
245
syft/source/ocimodelsource/oci_model_source.go
Normal file
245
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/google/go-containerregistry/pkg/name"
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/internal/fileresolver"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ source.Source = (*ociModelSource)(nil)
|
||||||
|
|
||||||
|
// Config holds the input configuration for an OCI model artifact source.
|
||||||
|
type Config struct {
|
||||||
|
Reference string
|
||||||
|
RegistryOptions *image.RegistryOptions
|
||||||
|
Alias source.Alias
|
||||||
|
}
|
||||||
|
|
||||||
|
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||||
|
type ociModelSource struct {
|
||||||
|
id artifact.ID
|
||||||
|
reference string
|
||||||
|
alias source.Alias
|
||||||
|
metadata source.OCIModelMetadata
|
||||||
|
tempDir string
|
||||||
|
resolver interface {
|
||||||
|
file.Resolver
|
||||||
|
file.OCIMediaTypeResolver
|
||||||
|
}
|
||||||
|
mutex *sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFromRegistry creates a new OCI model source by fetching the model artifact from a registry.
|
||||||
|
func NewFromRegistry(ctx context.Context, cfg Config) (source.Source, error) {
|
||||||
|
client := newRegistryClient(cfg.RegistryOptions)
|
||||||
|
art, err := validateAndFetchArtifact(ctx, client, cfg.Reference)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
metadata := buildMetadata(art)
|
||||||
|
tempDir, resolver, err := fetchAndStoreGGUFHeaders(ctx, client, art)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
id := internal.DeriveImageID(cfg.Alias, source.ImageMetadata(metadata))
|
||||||
|
return &ociModelSource{
|
||||||
|
id: id,
|
||||||
|
reference: cfg.Reference,
|
||||||
|
alias: cfg.Alias,
|
||||||
|
metadata: metadata,
|
||||||
|
tempDir: tempDir,
|
||||||
|
resolver: resolver,
|
||||||
|
mutex: &sync.Mutex{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateAndFetchArtifact fetches and validates a model artifact in a single registry call.
|
||||||
|
func validateAndFetchArtifact(ctx context.Context, client *registryClient, reference string) (*modelArtifact, error) {
|
||||||
|
art, err := client.fetchModelArtifact(ctx, reference)
|
||||||
|
if err != nil {
|
||||||
|
// errNotModelArtifact is wrapped, so callers can use errors.Is() to check
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(art.GGUFLayers) == 0 {
|
||||||
|
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||||
|
}
|
||||||
|
|
||||||
|
return art, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchAndStoreGGUFHeaders fetches GGUF layer headers and stores them in temp files.
|
||||||
|
func fetchAndStoreGGUFHeaders(ctx context.Context, client *registryClient, artifact *modelArtifact) (string, *fileresolver.ContainerImageModel, error) {
|
||||||
|
tempDir, err := os.MkdirTemp("", "syft-oci-gguf")
|
||||||
|
if err != nil {
|
||||||
|
return "", nil, fmt.Errorf("failed to create temp directory: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
layerFiles := make(map[string]fileresolver.LayerInfo)
|
||||||
|
for _, layer := range artifact.GGUFLayers {
|
||||||
|
li, err := fetchSingleGGUFHeader(ctx, client, artifact.Reference, layer, tempDir)
|
||||||
|
if err != nil {
|
||||||
|
osErr := os.RemoveAll(tempDir)
|
||||||
|
if osErr != nil {
|
||||||
|
log.Errorf("unable to remove temp directory (%s): %v", tempDir, err)
|
||||||
|
}
|
||||||
|
return "", nil, err
|
||||||
|
}
|
||||||
|
layerFiles[layer.Digest.String()] = li
|
||||||
|
}
|
||||||
|
|
||||||
|
resolver := fileresolver.NewContainerImageModel(tempDir, layerFiles)
|
||||||
|
|
||||||
|
return tempDir, resolver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetchSingleGGUFHeader fetches a single GGUF layer header and writes it to a temp file.
|
||||||
|
func fetchSingleGGUFHeader(ctx context.Context, client *registryClient, ref name.Reference, layer v1.Descriptor, tempDir string) (fileresolver.LayerInfo, error) {
|
||||||
|
headerData, err := client.fetchBlobRange(ctx, ref, layer.Digest, maxHeaderBytes)
|
||||||
|
if err != nil {
|
||||||
|
return fileresolver.LayerInfo{}, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
digestStr := layer.Digest.String()
|
||||||
|
safeDigest := strings.ReplaceAll(digestStr, ":", "-")
|
||||||
|
tempPath := filepath.Join(tempDir, safeDigest+".gguf")
|
||||||
|
if err := os.WriteFile(tempPath, headerData, 0600); err != nil {
|
||||||
|
return fileresolver.LayerInfo{}, fmt.Errorf("failed to write temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fileresolver.LayerInfo{
|
||||||
|
TempPath: tempPath,
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildMetadata constructs OCIModelMetadata from a modelArtifact.
|
||||||
|
func buildMetadata(artifact *modelArtifact) source.OCIModelMetadata {
|
||||||
|
// layers
|
||||||
|
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||||
|
for i, layer := range artifact.Manifest.Layers {
|
||||||
|
layers[i] = source.LayerMetadata{
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tags
|
||||||
|
var tags []string
|
||||||
|
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||||
|
if tag := tagged.TagStr(); tag != "" {
|
||||||
|
tags = []string{tag}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// digests
|
||||||
|
var repoDigests []string
|
||||||
|
if artifact.ManifestDigest != "" {
|
||||||
|
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||||
|
}
|
||||||
|
|
||||||
|
// metadata
|
||||||
|
return source.OCIModelMetadata{
|
||||||
|
UserInput: artifact.Reference.String(),
|
||||||
|
ID: artifact.ManifestDigest,
|
||||||
|
ManifestDigest: artifact.ManifestDigest,
|
||||||
|
MediaType: string(artifact.Manifest.MediaType),
|
||||||
|
Tags: tags,
|
||||||
|
Size: calculateTotalSize(layers),
|
||||||
|
Layers: layers,
|
||||||
|
RawManifest: artifact.RawManifest,
|
||||||
|
RawConfig: artifact.RawConfig,
|
||||||
|
RepoDigests: repoDigests,
|
||||||
|
Architecture: artifact.Config.Architecture,
|
||||||
|
Variant: artifact.Config.Variant,
|
||||||
|
OS: artifact.Config.OS,
|
||||||
|
Labels: artifact.Config.Config.Labels,
|
||||||
|
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractManifestAnnotations extracts annotations from the manifest.
|
||||||
|
func extractManifestAnnotations(manifest *v1.Manifest) map[string]string {
|
||||||
|
if manifest == nil || manifest.Annotations == nil {
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
return manifest.Annotations
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateTotalSize sums up the size of all layers.
|
||||||
|
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||||
|
var total int64
|
||||||
|
for _, layer := range layers {
|
||||||
|
total += layer.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID returns the artifact ID.
|
||||||
|
func (s *ociModelSource) ID() artifact.ID {
|
||||||
|
return s.id
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe returns a description of the source.
|
||||||
|
func (s *ociModelSource) Describe() source.Description {
|
||||||
|
name := s.reference
|
||||||
|
version := ""
|
||||||
|
supplier := ""
|
||||||
|
|
||||||
|
if !s.alias.IsEmpty() {
|
||||||
|
if s.alias.Name != "" {
|
||||||
|
name = s.alias.Name
|
||||||
|
}
|
||||||
|
if s.alias.Version != "" {
|
||||||
|
version = s.alias.Version
|
||||||
|
}
|
||||||
|
if s.alias.Supplier != "" {
|
||||||
|
supplier = s.alias.Supplier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return source.Description{
|
||||||
|
ID: string(s.id),
|
||||||
|
Name: name,
|
||||||
|
Version: version,
|
||||||
|
Supplier: supplier,
|
||||||
|
Metadata: s.metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileResolver returns a file resolver for accessing header of GGUF files.
|
||||||
|
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||||
|
return s.resolver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close cleans up temporary files. Safe to call multiple times.
|
||||||
|
func (s *ociModelSource) Close() error {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.tempDir == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
err := os.RemoveAll(s.tempDir)
|
||||||
|
s.tempDir = ""
|
||||||
|
s.resolver = nil
|
||||||
|
return err
|
||||||
|
}
|
||||||
36
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
36
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
type ociModelSourceProvider struct {
|
||||||
|
reference string
|
||||||
|
registryOpts *image.RegistryOptions
|
||||||
|
alias source.Alias
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||||
|
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||||
|
return &ociModelSourceProvider{
|
||||||
|
reference: reference,
|
||||||
|
registryOpts: registryOpts,
|
||||||
|
alias: alias,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Name() string {
|
||||||
|
return "oci-model"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||||
|
cfg := Config{
|
||||||
|
Reference: p.reference,
|
||||||
|
RegistryOptions: p.registryOpts,
|
||||||
|
Alias: p.alias,
|
||||||
|
}
|
||||||
|
return NewFromRegistry(ctx, cfg)
|
||||||
|
}
|
||||||
217
syft/source/ocimodelsource/registry_client.go
Normal file
217
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,217 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/tls"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/google/go-containerregistry/pkg/authn"
|
||||||
|
"github.com/google/go-containerregistry/pkg/name"
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
)
|
||||||
|
|
||||||
|
// errNotModelArtifact is returned when a reference does not point to a model artifact.
|
||||||
|
var errNotModelArtifact = errors.New("not an OCI model artifact")
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||||
|
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||||
|
modelConfigMediaTypePrefix = "application/vnd.docker.ai.model.config."
|
||||||
|
ggufLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||||
|
|
||||||
|
// Maximum bytes to read/return for GGUF headers
|
||||||
|
maxHeaderBytes = 8 * 1024 * 1024 // 8 MB
|
||||||
|
)
|
||||||
|
|
||||||
|
// registryClient handles OCI registry interactions for model artifacts.
|
||||||
|
type registryClient struct {
|
||||||
|
options []remote.Option
|
||||||
|
}
|
||||||
|
|
||||||
|
// newRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||||
|
func newRegistryClient(registryOpts *image.RegistryOptions) *registryClient {
|
||||||
|
opts := buildRemoteOptions(registryOpts)
|
||||||
|
|
||||||
|
return ®istryClient{
|
||||||
|
options: opts,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||||
|
func buildRemoteOptions(registryOpts *image.RegistryOptions) []remote.Option {
|
||||||
|
var opts []remote.Option
|
||||||
|
|
||||||
|
if registryOpts == nil {
|
||||||
|
return opts
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build authenticator
|
||||||
|
authenticator := buildAuthenticator(registryOpts)
|
||||||
|
opts = append(opts, remote.WithAuth(authenticator))
|
||||||
|
|
||||||
|
// Handle TLS settings
|
||||||
|
if registryOpts.InsecureSkipTLSVerify {
|
||||||
|
if transport, ok := remote.DefaultTransport.(*http.Transport); ok {
|
||||||
|
transport = transport.Clone()
|
||||||
|
if transport.TLSClientConfig == nil {
|
||||||
|
transport.TLSClientConfig = &tls.Config{
|
||||||
|
MinVersion: tls.VersionTLS12,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
transport.TLSClientConfig.InsecureSkipVerify = true //#nosec G402 -- user explicitly requested insecure TLS
|
||||||
|
opts = append(opts, remote.WithTransport(transport))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle insecure HTTP
|
||||||
|
if registryOpts.InsecureUseHTTP {
|
||||||
|
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||||
|
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||||
|
// If credentials are provided, use them
|
||||||
|
if len(registryOpts.Credentials) > 0 {
|
||||||
|
// Use the first credential set (we could enhance this to match by authority)
|
||||||
|
cred := registryOpts.Credentials[0]
|
||||||
|
|
||||||
|
if cred.Token != "" {
|
||||||
|
return &authn.Bearer{Token: cred.Token}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cred.Username != "" || cred.Password != "" {
|
||||||
|
return &authn.Basic{
|
||||||
|
Username: cred.Username,
|
||||||
|
Password: cred.Password,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to anonymous authenticator
|
||||||
|
return authn.Anonymous
|
||||||
|
}
|
||||||
|
|
||||||
|
// modelArtifact represents a parsed OCI model artifact.
|
||||||
|
type modelArtifact struct {
|
||||||
|
Reference name.Reference
|
||||||
|
Manifest *v1.Manifest
|
||||||
|
Config *v1.ConfigFile
|
||||||
|
RawManifest []byte
|
||||||
|
RawConfig []byte
|
||||||
|
ManifestDigest string
|
||||||
|
GGUFLayers []v1.Descriptor
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *registryClient) fetchModelArtifact(ctx context.Context, refStr string) (*modelArtifact, error) {
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
opts := c.options
|
||||||
|
opts = append(opts, remote.WithContext(ctx))
|
||||||
|
desc, err := remote.Get(ref, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isModelArtifact(manifest) {
|
||||||
|
return nil, fmt.Errorf("%w (config media type: %s)", errNotModelArtifact, manifest.Config.MediaType)
|
||||||
|
}
|
||||||
|
|
||||||
|
img, err := desc.Image()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := img.ConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawConfig, err := img.RawConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ggufLayers := extractGGUFLayers(manifest)
|
||||||
|
|
||||||
|
return &modelArtifact{
|
||||||
|
Reference: ref,
|
||||||
|
Manifest: manifest,
|
||||||
|
Config: configFile,
|
||||||
|
RawManifest: desc.Manifest,
|
||||||
|
RawConfig: rawConfig,
|
||||||
|
ManifestDigest: desc.Digest.String(),
|
||||||
|
GGUFLayers: ggufLayers,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isModelArtifact checks if the manifest represents a model artifact.
|
||||||
|
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||||
|
return strings.HasPrefix(string(manifest.Config.MediaType), modelConfigMediaTypePrefix)
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||||
|
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||||
|
var ggufLayers []v1.Descriptor
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
if string(layer.MediaType) == ggufLayerMediaType {
|
||||||
|
ggufLayers = append(ggufLayers, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ggufLayers
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *registryClient) fetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||||
|
repo := ref.Context()
|
||||||
|
|
||||||
|
opts := c.options
|
||||||
|
opts = append(opts, remote.WithContext(ctx))
|
||||||
|
layer, err := remote.Layer(repo.Digest(digest.String()), opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, err := layer.Compressed()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||||
|
}
|
||||||
|
// this defer is what causes the download to stop
|
||||||
|
// 1. io.ReadFull(reader, data) reads exactly 8MB into the buffer
|
||||||
|
// 2. The function returns with data[:n]
|
||||||
|
// 3. defer reader.Close() executes, closing the HTTP response body
|
||||||
|
// 4. Closing the response body closes the underlying TCP connection
|
||||||
|
// 5. The server receives TCP FIN/RST and stops sending
|
||||||
|
// note: some data is already in flight when we close so we will see > 8mb over the wire
|
||||||
|
// the full image will not download given we terminate the reader early here
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
// Note: this is not some arbitrary number picked out of the blue.
|
||||||
|
// This is based on the specification of header data found here:
|
||||||
|
// https://github.com/ggml-org/ggml/blob/master/docs/gguf.md#file-structure
|
||||||
|
data := make([]byte, maxBytes)
|
||||||
|
n, err := io.ReadFull(reader, data)
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||||
|
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:n], nil
|
||||||
|
}
|
||||||
114
syft/source/ocimodelsource/registry_client_test.go
Normal file
114
syft/source/ocimodelsource/registry_client_test.go
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/types"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIsModelArtifact(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
manifest *v1.Manifest
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "valid model artifact",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Config: v1.Descriptor{
|
||||||
|
MediaType: modelConfigMediaTypePrefix + "v1+json",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "container image",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Config: v1.Descriptor{
|
||||||
|
MediaType: types.DockerConfigJSON,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty media type",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Config: v1.Descriptor{
|
||||||
|
MediaType: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
result := isModelArtifact(test.manifest)
|
||||||
|
assert.Equal(t, test.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExtractGGUFLayers(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
manifest *v1.Manifest
|
||||||
|
expected int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "single GGUF layer",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Layers: []v1.Descriptor{
|
||||||
|
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "multiple GGUF layers",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Layers: []v1.Descriptor{
|
||||||
|
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||||
|
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "def"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "mixed layers",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Layers: []v1.Descriptor{
|
||||||
|
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "abc"}},
|
||||||
|
{MediaType: types.DockerLayer, Digest: v1.Hash{Algorithm: "sha256", Hex: "def"}},
|
||||||
|
{MediaType: types.MediaType(ggufLayerMediaType), Digest: v1.Hash{Algorithm: "sha256", Hex: "ghi"}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "no GGUF layers",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Layers: []v1.Descriptor{
|
||||||
|
{MediaType: types.DockerLayer},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "empty layers",
|
||||||
|
manifest: &v1.Manifest{
|
||||||
|
Layers: []v1.Descriptor{},
|
||||||
|
},
|
||||||
|
expected: 0,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run(test.name, func(t *testing.T) {
|
||||||
|
result := extractGGUFLayers(test.manifest)
|
||||||
|
assert.Len(t, result, test.expected)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
"github.com/anchore/syft/syft/source/directorysource"
|
"github.com/anchore/syft/syft/source/directorysource"
|
||||||
"github.com/anchore/syft/syft/source/filesource"
|
"github.com/anchore/syft/syft/source/filesource"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
"github.com/anchore/syft/syft/source/snapsource"
|
"github.com/anchore/syft/syft/source/snapsource"
|
||||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||||
)
|
)
|
||||||
@ -43,6 +44,14 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
|||||||
// --from docker, registry, etc.
|
// --from docker, registry, etc.
|
||||||
Join(stereoscopeProviders.Select(PullTag)...).
|
Join(stereoscopeProviders.Select(PullTag)...).
|
||||||
|
|
||||||
|
// --from oci-model, registry (for select cases only)
|
||||||
|
// OCI model artifacts with header-only fetching
|
||||||
|
// note: we don't want to use the "pull" tag since it's not actually pulling the full image,
|
||||||
|
// instead we want to match on registry since these models are stored in OCI registries.
|
||||||
|
// This does mean that this must be placed after the pull provider, which is ideal since we don't want to
|
||||||
|
// unnecessarily pull registry headers first if the more common case is the pull providers.
|
||||||
|
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), "registry")).
|
||||||
|
|
||||||
// --from snap (remote only)
|
// --from snap (remote only)
|
||||||
Join(tagProvider(snapsource.NewRemoteSourceProvider(userInput, cfg.Exclude, cfg.DigestAlgorithms, cfg.Alias), SnapTag))
|
Join(tagProvider(snapsource.NewRemoteSourceProvider(userInput, cfg.Exclude, cfg.DigestAlgorithms, cfg.Alias), SnapTag))
|
||||||
}
|
}
|
||||||
|
|||||||
@ -5,7 +5,6 @@ import (
|
|||||||
|
|
||||||
"github.com/bmatcuk/doublestar/v4"
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
"github.com/distribution/reference"
|
"github.com/distribution/reference"
|
||||||
"github.com/opencontainers/go-digest"
|
|
||||||
|
|
||||||
"github.com/anchore/stereoscope/pkg/image"
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
"github.com/anchore/syft/internal/log"
|
"github.com/anchore/syft/internal/log"
|
||||||
@ -36,7 +35,7 @@ type stereoscopeImageSource struct {
|
|||||||
func New(img *image.Image, cfg ImageConfig) source.Source {
|
func New(img *image.Image, cfg ImageConfig) source.Source {
|
||||||
metadata := imageMetadataFromStereoscopeImage(img, cfg.Reference)
|
metadata := imageMetadataFromStereoscopeImage(img, cfg.Reference)
|
||||||
return &stereoscopeImageSource{
|
return &stereoscopeImageSource{
|
||||||
id: deriveIDFromStereoscopeImage(cfg.Alias, metadata),
|
id: internal.DeriveImageID(cfg.Alias, metadata),
|
||||||
config: cfg,
|
config: cfg,
|
||||||
image: img,
|
image: img,
|
||||||
metadata: metadata,
|
metadata: metadata,
|
||||||
@ -163,61 +162,6 @@ func imageMetadataFromStereoscopeImage(img *image.Image, reference string) sourc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// deriveIDFromStereoscopeImage derives an artifact ID from the given image metadata. The order of data precedence is:
|
|
||||||
// 1. prefer a digest of the raw container image manifest
|
|
||||||
// 2. if no manifest digest is available, calculate a chain ID from the image layer metadata
|
|
||||||
// 3. if no layer metadata is available, use the user input string
|
|
||||||
//
|
|
||||||
// in all cases, if an alias is provided, it is additionally considered in the ID calculation. This allows for the
|
|
||||||
// same image to be scanned multiple times with different aliases and be considered logically different.
|
|
||||||
func deriveIDFromStereoscopeImage(alias source.Alias, metadata source.ImageMetadata) artifact.ID {
|
|
||||||
var input string
|
|
||||||
|
|
||||||
if len(metadata.RawManifest) > 0 {
|
|
||||||
input = digest.Canonical.FromBytes(metadata.RawManifest).String()
|
|
||||||
} else {
|
|
||||||
// calculate chain ID for image sources where manifestDigest is not available
|
|
||||||
// https://github.com/opencontainers/image-spec/blob/main/config.md#layer-chainid
|
|
||||||
input = calculateChainID(metadata.Layers)
|
|
||||||
if input == "" {
|
|
||||||
// TODO what happens here if image has no layers?
|
|
||||||
// is this case possible?
|
|
||||||
input = digest.Canonical.FromString(metadata.UserInput).String()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !alias.IsEmpty() {
|
|
||||||
// if the user provided an alias, we want to consider that in the artifact ID. This way if the user
|
|
||||||
// scans the same item but is considered to be logically different, then ID will express that.
|
|
||||||
aliasStr := fmt.Sprintf(":%s@%s", alias.Name, alias.Version)
|
|
||||||
input = digest.Canonical.FromString(input + aliasStr).String()
|
|
||||||
}
|
|
||||||
|
|
||||||
return internal.ArtifactIDFromDigest(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
func calculateChainID(lm []source.LayerMetadata) string {
|
|
||||||
if len(lm) < 1 {
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
// DiffID(L0) = digest of layer 0
|
|
||||||
// https://github.com/anchore/stereoscope/blob/1b1b744a919964f38d14e1416fb3f25221b761ce/pkg/image/layer_metadata.go#L19-L32
|
|
||||||
chainID := lm[0].Digest
|
|
||||||
id := chain(chainID, lm[1:])
|
|
||||||
|
|
||||||
return id
|
|
||||||
}
|
|
||||||
|
|
||||||
func chain(chainID string, layers []source.LayerMetadata) string {
|
|
||||||
if len(layers) < 1 {
|
|
||||||
return chainID
|
|
||||||
}
|
|
||||||
|
|
||||||
chainID = digest.Canonical.FromString(layers[0].Digest + " " + chainID).String()
|
|
||||||
return chain(chainID, layers[1:])
|
|
||||||
}
|
|
||||||
|
|
||||||
func getImageExclusionFunction(exclusions []string) func(string) bool {
|
func getImageExclusionFunction(exclusions []string) func(string) bool {
|
||||||
if len(exclusions) == 0 {
|
if len(exclusions) == 0 {
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@ -2,8 +2,6 @@ package stereoscopesource
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/sha256"
|
|
||||||
"fmt"
|
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
@ -12,7 +10,6 @@ import (
|
|||||||
|
|
||||||
"github.com/anchore/stereoscope"
|
"github.com/anchore/stereoscope"
|
||||||
"github.com/anchore/stereoscope/pkg/imagetest"
|
"github.com/anchore/stereoscope/pkg/imagetest"
|
||||||
"github.com/anchore/syft/syft/artifact"
|
|
||||||
"github.com/anchore/syft/syft/internal/testutil"
|
"github.com/anchore/syft/syft/internal/testutil"
|
||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
)
|
)
|
||||||
@ -112,146 +109,6 @@ func Test_StereoscopeImage_Exclusions(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_StereoscopeImageSource_ID(t *testing.T) {
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
alias source.Alias
|
|
||||||
metadata source.ImageMetadata
|
|
||||||
want artifact.ID
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
name: "use raw manifest over chain ID or user input",
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
Layers: []source.LayerMetadata{
|
|
||||||
{
|
|
||||||
Digest: "a",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "b",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "c",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
RawManifest: []byte("raw-manifest"),
|
|
||||||
},
|
|
||||||
want: func() artifact.ID {
|
|
||||||
hasher := sha256.New()
|
|
||||||
hasher.Write([]byte("raw-manifest"))
|
|
||||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
|
||||||
}(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "use chain ID over user input",
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
//UserInput: "user-input",
|
|
||||||
Layers: []source.LayerMetadata{
|
|
||||||
{
|
|
||||||
Digest: "a",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "b",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "c",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
want: func() artifact.ID {
|
|
||||||
metadata := []source.LayerMetadata{
|
|
||||||
{
|
|
||||||
Digest: "a",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "b",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "c",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return artifact.ID(strings.TrimPrefix(calculateChainID(metadata), "sha256:"))
|
|
||||||
}(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "use user input last",
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
},
|
|
||||||
want: func() artifact.ID {
|
|
||||||
hasher := sha256.New()
|
|
||||||
hasher.Write([]byte("user-input"))
|
|
||||||
return artifact.ID(fmt.Sprintf("%x", hasher.Sum(nil)))
|
|
||||||
}(),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "without alias (first)",
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
Layers: []source.LayerMetadata{
|
|
||||||
{
|
|
||||||
Digest: "a",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "b",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "c",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
RawManifest: []byte("raw-manifest"),
|
|
||||||
},
|
|
||||||
want: "85298926ecd92ed57688f13039017160cd728f04dd0d2d10a10629007106f107",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "always consider alias (first)",
|
|
||||||
alias: source.Alias{
|
|
||||||
Name: "alias",
|
|
||||||
Version: "version",
|
|
||||||
},
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
Layers: []source.LayerMetadata{
|
|
||||||
{
|
|
||||||
Digest: "a",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "b",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Digest: "c",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
RawManifest: []byte("raw-manifest"),
|
|
||||||
},
|
|
||||||
want: "a8717e42449960c1dd4963f2f22bd69c7c105e7e82445be0a65aa1825d62ff0d",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "without alias (last)",
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
},
|
|
||||||
want: "ab0dff627d80b9753193d7280bec8f45e8ec6b4cb0912c6fffcf7cd782d9739e",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "always consider alias (last)",
|
|
||||||
alias: source.Alias{
|
|
||||||
Name: "alias",
|
|
||||||
Version: "version",
|
|
||||||
},
|
|
||||||
metadata: source.ImageMetadata{
|
|
||||||
UserInput: "user-input",
|
|
||||||
},
|
|
||||||
want: "fe86c0eecd5654d3c0c0b2176aa394aef6440347c241aa8d9b628dfdde4287cf",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
assert.Equal(t, tt.want, deriveIDFromStereoscopeImage(tt.alias, tt.metadata))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func Test_Describe(t *testing.T) {
|
func Test_Describe(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user