mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 00:13:15 +01:00
Compare commits
14 Commits
bfe63bb006
...
5853129c07
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5853129c07 | ||
|
|
9c5279cb99 | ||
|
|
f7a19db98b | ||
|
|
13756ec768 | ||
|
|
3326ae44fa | ||
|
|
a08d5b78d9 | ||
|
|
ce74ed0309 | ||
|
|
0ff6a1af58 | ||
|
|
cd4d0ce062 | ||
|
|
a721a854a9 | ||
|
|
c715e01cc2 | ||
|
|
165611d2e4 | ||
|
|
746f00ad68 | ||
|
|
3f117a3eb5 |
2
.gitignore
vendored
2
.gitignore
vendored
@ -73,3 +73,5 @@ cosign.pub
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
|
||||
|
||||
@ -88,6 +88,7 @@ func TestPkgCoverageImage(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.TerraformPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // we have coverage for pear instead
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
var cases []testCase
|
||||
cases = append(cases, commonTestCases...)
|
||||
@ -162,6 +163,7 @@ func TestPkgCoverageDirectory(t *testing.T) {
|
||||
definedPkgs.Remove(string(pkg.UnknownPkg))
|
||||
definedPkgs.Remove(string(pkg.CondaPkg))
|
||||
definedPkgs.Remove(string(pkg.PhpPeclPkg)) // this is covered as pear packages
|
||||
definedPkgs.Remove(string(pkg.ModelPkg))
|
||||
|
||||
// for directory scans we should not expect to see any of the following package types
|
||||
definedPkgs.Remove(string(pkg.KbPkg))
|
||||
|
||||
8
go.mod
8
go.mod
@ -286,6 +286,8 @@ require (
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
)
|
||||
|
||||
require github.com/gpustack/gguf-parser-go v0.22.1
|
||||
|
||||
require (
|
||||
github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect
|
||||
@ -306,6 +308,12 @@ require (
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect
|
||||
github.com/aws/smithy-go v1.22.4 // indirect
|
||||
github.com/hashicorp/aws-sdk-go-base/v2 v2.0.0-beta.65 // indirect
|
||||
github.com/henvic/httpretty v0.1.4 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d // indirect
|
||||
gonum.org/v1/gonum v0.15.1 // indirect
|
||||
)
|
||||
|
||||
retract (
|
||||
|
||||
11
go.sum
11
go.sum
@ -541,6 +541,8 @@ github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj
|
||||
github.com/gookit/color v1.2.5/go.mod h1:AhIE+pS6D4Ql0SQWbBeXPHw7gY0/sjHoA4s/n1KB7xg=
|
||||
github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA=
|
||||
github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1 h1:FRnEDWqT0Rcplr/R9ctCRSN2+3DhVsf6dnR5/i9JA4E=
|
||||
github.com/gpustack/gguf-parser-go v0.22.1/go.mod h1:y4TwTtDqFWTK+xvprOjRUh+dowgU2TKCX37vRKvGiZ0=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo=
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms=
|
||||
@ -590,6 +592,8 @@ github.com/hashicorp/memberlist v0.2.2/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOn
|
||||
github.com/hashicorp/memberlist v0.3.0/go.mod h1:MS2lj3INKhZjWNqd3N0m3J+Jxf3DAOnAH9VT3Sh9MUE=
|
||||
github.com/hashicorp/serf v0.9.5/go.mod h1:UWDWwZeL5cuWDJdl0C6wrvrUwEqtQ4ZKBKKENpqIUyk=
|
||||
github.com/hashicorp/serf v0.9.6/go.mod h1:TXZNMjZQijwlDvp+r0b63xZ45H7JmCmgg4gpTwn9UV4=
|
||||
github.com/henvic/httpretty v0.1.4 h1:Jo7uwIRWVFxkqOnErcoYfH90o3ddQyVrSANeS4cxYmU=
|
||||
github.com/henvic/httpretty v0.1.4/go.mod h1:Dn60sQTZfbt2dYsdUSNsCljyF4AfdqnuJFDLJA1I4AM=
|
||||
github.com/huandu/xstrings v1.5.0 h1:2ag3IFq9ZDANvthTwTiqSSZLjDc+BedvHPAp5tJy2TI=
|
||||
github.com/huandu/xstrings v1.5.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE=
|
||||
github.com/iancoleman/orderedmap v0.0.0-20190318233801-ac98e3ecb4b0/go.mod h1:N0Wam8K1arqPXNWjMo21EXnBPOPp36vB07FNRdD2geA=
|
||||
@ -617,6 +621,7 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
|
||||
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
|
||||
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
|
||||
@ -722,9 +727,11 @@ github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcY
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA=
|
||||
github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
|
||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
@ -851,6 +858,8 @@ github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af h1:Sp5TG9f7K39yf
|
||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8=
|
||||
github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d h1:3VwvTjiRPA7cqtgOWddEL+JrcijMlXUmj99c/6YyZoY=
|
||||
github.com/smallnest/ringbuffer v0.0.0-20241116012123-461381446e3d/go.mod h1:tAG61zBM1DYRaGIPloumExGvScf08oHuo0kFoOqdbT0=
|
||||
github.com/sorairolake/lzip-go v0.3.8 h1:j5Q2313INdTA80ureWYRhX+1K78mUXfMoPZCw/ivWik=
|
||||
github.com/sorairolake/lzip-go v0.3.8/go.mod h1:JcBqGMV0frlxwrsE9sMWXDjqn3EeVf0/54YPsw66qkU=
|
||||
github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo=
|
||||
@ -1304,6 +1313,8 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU=
|
||||
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
|
||||
gonum.org/v1/gonum v0.15.1 h1:FNy7N6OUZVUaWG9pTiD+jlhdQ3lMP+/LcTpJ6+a8sQ0=
|
||||
gonum.org/v1/gonum v0.15.1/go.mod h1:eZTZuRFrzu5pcyjN5wJhcIhnUdNijYxX1T2IcrOGY0o=
|
||||
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
|
||||
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
|
||||
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
|
||||
|
||||
@ -3,5 +3,5 @@ package internal
|
||||
const (
|
||||
// JSONSchemaVersion is the current schema version output by the JSON encoder
|
||||
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
|
||||
JSONSchemaVersion = "16.0.41"
|
||||
JSONSchemaVersion = "16.0.42"
|
||||
)
|
||||
|
||||
@ -27,6 +27,7 @@ func AllTypes() []any {
|
||||
pkg.ELFBinaryPackageNoteJSONPayload{},
|
||||
pkg.ElixirMixLockEntry{},
|
||||
pkg.ErlangRebarLockEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
pkg.GitHubActionsUseStatement{},
|
||||
pkg.GolangBinaryBuildinfoEntry{},
|
||||
pkg.GolangModuleEntry{},
|
||||
|
||||
@ -123,6 +123,7 @@ var jsonTypes = makeJSONTypes(
|
||||
jsonNames(pkg.TerraformLockProviderEntry{}, "terraform-lock-provider-entry"),
|
||||
jsonNames(pkg.DotnetPackagesLockEntry{}, "dotnet-packages-lock-entry"),
|
||||
jsonNames(pkg.CondaMetaPackage{}, "conda-metadata-entry", "CondaPackageMetadata"),
|
||||
jsonNames(pkg.GGUFFileHeader{}, "gguf-file-metadata"),
|
||||
)
|
||||
|
||||
func expandLegacyNameVariants(names ...string) []string {
|
||||
|
||||
@ -3,6 +3,7 @@ package task
|
||||
import (
|
||||
"github.com/anchore/syft/syft/cataloging/pkgcataloging"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/ai"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/alpine"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/arch"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/binary"
|
||||
@ -175,6 +176,7 @@ func DefaultPackageTaskFactories() Factories {
|
||||
newSimplePackageTaskFactory(homebrew.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "homebrew"),
|
||||
newSimplePackageTaskFactory(conda.NewCondaMetaCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.PackageTag, "conda"),
|
||||
newSimplePackageTaskFactory(snap.NewCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "snap"),
|
||||
newSimplePackageTaskFactory(ai.NewGGUFCataloger, pkgcataloging.DirectoryTag, pkgcataloging.ImageTag, "ai", "model", "gguf", "ml"),
|
||||
|
||||
// deprecated catalogers ////////////////////////////////////////
|
||||
// these are catalogers that should not be selectable other than specific inclusion via name or "deprecated" tag (to remain backwards compatible)
|
||||
|
||||
4078
schema/json/schema-16.0.42.json
Normal file
4078
schema/json/schema-16.0.42.json
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.41/document",
|
||||
"$id": "anchore.io/schema/syft/json/16.0.42/document",
|
||||
"$ref": "#/$defs/Document",
|
||||
"$defs": {
|
||||
"AlpmDbEntry": {
|
||||
@ -1399,6 +1399,70 @@
|
||||
"size"
|
||||
]
|
||||
},
|
||||
"GgufFileMetadata": {
|
||||
"properties": {
|
||||
"modelFormat": {
|
||||
"type": "string",
|
||||
"description": "ModelFormat is always \"gguf\""
|
||||
},
|
||||
"modelName": {
|
||||
"type": "string",
|
||||
"description": "ModelName is the name of the model (from general.name or filename)"
|
||||
},
|
||||
"modelVersion": {
|
||||
"type": "string",
|
||||
"description": "ModelVersion is the version of the model (if available in header, else \"unknown\")"
|
||||
},
|
||||
"fileSize": {
|
||||
"type": "integer",
|
||||
"description": "FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)"
|
||||
},
|
||||
"hash": {
|
||||
"type": "string",
|
||||
"description": "Hash is a content hash of the metadata (for stable global identifiers across remotes)"
|
||||
},
|
||||
"license": {
|
||||
"type": "string",
|
||||
"description": "License is the license identifier (from general.license if present)"
|
||||
},
|
||||
"ggufVersion": {
|
||||
"type": "integer",
|
||||
"description": "GGUFVersion is the GGUF format version (e.g., 3)"
|
||||
},
|
||||
"architecture": {
|
||||
"type": "string",
|
||||
"description": "Architecture is the model architecture (from general.architecture, e.g., \"qwen3moe\", \"llama\")"
|
||||
},
|
||||
"quantization": {
|
||||
"type": "string",
|
||||
"description": "Quantization is the quantization type (e.g., \"IQ4_NL\", \"Q4_K_M\")"
|
||||
},
|
||||
"parameters": {
|
||||
"type": "integer",
|
||||
"description": "Parameters is the number of model parameters (if present in header)"
|
||||
},
|
||||
"tensorCount": {
|
||||
"type": "integer",
|
||||
"description": "TensorCount is the number of tensors in the model"
|
||||
},
|
||||
"header": {
|
||||
"type": "object",
|
||||
"description": "Header contains the remaining key-value pairs from the GGUF header that are not already\nrepresented as typed fields above. This preserves additional metadata fields for reference\n(namespaced with general.*, llama.*, etc.) while avoiding duplication."
|
||||
},
|
||||
"truncatedHeader": {
|
||||
"type": "boolean",
|
||||
"description": "TruncatedHeader indicates if the header was truncated during parsing (for very large headers)"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
"required": [
|
||||
"modelFormat",
|
||||
"modelName",
|
||||
"ggufVersion",
|
||||
"tensorCount"
|
||||
],
|
||||
"description": "GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file."
|
||||
},
|
||||
"GithubActionsUseStatement": {
|
||||
"properties": {
|
||||
"value": {
|
||||
@ -2474,6 +2538,9 @@
|
||||
{
|
||||
"$ref": "#/$defs/ErlangRebarLockEntry"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GgufFileMetadata"
|
||||
},
|
||||
{
|
||||
"$ref": "#/$defs/GithubActionsUseStatement"
|
||||
},
|
||||
|
||||
@ -15,6 +15,7 @@ import (
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/sbom"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||
)
|
||||
|
||||
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
||||
@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
|
||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||
case source.SnapMetadata:
|
||||
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
||||
case *ocimodelsource.OCIModelMetadata:
|
||||
// OCI model artifacts should use image-like catalogers since they provide files to scan
|
||||
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
||||
}
|
||||
|
||||
@ -40,8 +40,11 @@ func EncodeComponent(p pkg.Package, supplier string, locationSorter func(a, b fi
|
||||
}
|
||||
|
||||
componentType := cyclonedx.ComponentTypeLibrary
|
||||
if p.Type == pkg.BinaryPkg {
|
||||
switch p.Type {
|
||||
case pkg.BinaryPkg:
|
||||
componentType = cyclonedx.ComponentTypeApplication
|
||||
case pkg.ModelPkg:
|
||||
componentType = cyclonedx.ComponentTypeMachineLearningModel
|
||||
}
|
||||
|
||||
return cyclonedx.Component{
|
||||
|
||||
@ -62,7 +62,7 @@ func collectPackages(component *cyclonedx.Component, s *sbom.SBOM, idMap map[str
|
||||
switch component.Type {
|
||||
case cyclonedx.ComponentTypeOS:
|
||||
case cyclonedx.ComponentTypeContainer:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary:
|
||||
case cyclonedx.ComponentTypeApplication, cyclonedx.ComponentTypeFramework, cyclonedx.ComponentTypeLibrary, cyclonedx.ComponentTypeMachineLearningModel:
|
||||
p := decodeComponent(component)
|
||||
idMap[component.BOMRef] = p
|
||||
if component.BOMRef != "" {
|
||||
|
||||
@ -54,6 +54,7 @@ func Test_OriginatorSupplier(t *testing.T) {
|
||||
pkg.OpamPackage{},
|
||||
pkg.YarnLockEntry{},
|
||||
pkg.TerraformLockProviderEntry{},
|
||||
pkg.GGUFFileHeader{},
|
||||
)
|
||||
tests := []struct {
|
||||
name string
|
||||
|
||||
@ -82,6 +82,8 @@ func SourceInfo(p pkg.Package) string {
|
||||
answer = "acquired package info from Homebrew formula"
|
||||
case pkg.TerraformPkg:
|
||||
answer = "acquired package info from Terraform dependency lock file"
|
||||
case pkg.ModelPkg:
|
||||
answer = "acquired package info from AI artifact (e.g. GGUF File"
|
||||
default:
|
||||
answer = "acquired package info from the following paths"
|
||||
}
|
||||
|
||||
@ -351,6 +351,14 @@ func Test_SourceInfo(t *testing.T) {
|
||||
"acquired package info from Terraform dependency lock file",
|
||||
},
|
||||
},
|
||||
{
|
||||
input: pkg.Package{
|
||||
Type: pkg.ModelPkg,
|
||||
},
|
||||
expected: []string{
|
||||
"",
|
||||
},
|
||||
},
|
||||
}
|
||||
var pkgTypes []pkg.Type
|
||||
for _, test := range tests {
|
||||
|
||||
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
16
syft/pkg/cataloger/ai/cataloger.go
Normal file
@ -0,0 +1,16 @@
|
||||
/*
|
||||
Package ai provides concrete Cataloger implementations for AI artifacts and machine learning models,
|
||||
including support for GGUF (GPT-Generated Unified Format) model files.
|
||||
*/
|
||||
package ai
|
||||
|
||||
import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
// NewGGUFCataloger returns a new cataloger instance for GGUF model files.
|
||||
func NewGGUFCataloger() pkg.Cataloger {
|
||||
return generic.NewCataloger("gguf-cataloger").
|
||||
WithParserByGlobs(parseGGUFModel, "**/*.gguf")
|
||||
}
|
||||
373
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
373
syft/pkg/cataloger/ai/cataloger_test.go
Normal file
@ -0,0 +1,373 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp/cmpopts"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
|
||||
)
|
||||
|
||||
func TestGGUFCataloger_Globs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string // returns fixture directory
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
name: "finds GGUF files in root",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
createTestGGUFInDir(t, dir, "model1.gguf")
|
||||
createTestGGUFInDir(t, dir, "model2.gguf")
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"model1.gguf",
|
||||
"model2.gguf",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "finds GGUF files in subdirectories",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
modelsDir := filepath.Join(dir, "models")
|
||||
os.MkdirAll(modelsDir, 0755)
|
||||
createTestGGUFInDir(t, modelsDir, "llama.gguf")
|
||||
|
||||
deepDir := filepath.Join(dir, "deep", "nested", "path")
|
||||
os.MkdirAll(deepDir, 0755)
|
||||
createTestGGUFInDir(t, deepDir, "mistral.gguf")
|
||||
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"models/llama.gguf",
|
||||
"deep/nested/path/mistral.gguf",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ignores non-GGUF files",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
createTestGGUFInDir(t, dir, "model.gguf")
|
||||
|
||||
// Create non-GGUF files
|
||||
os.WriteFile(filepath.Join(dir, "readme.txt"), []byte("readme"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "model.bin"), []byte("binary"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||
|
||||
return dir
|
||||
},
|
||||
expected: []string{
|
||||
"model.gguf",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir).
|
||||
ExpectsResolverContentQueries(tt.expected).
|
||||
TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Integration(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
setup func(t *testing.T) string
|
||||
expectedPackages []pkg.Package
|
||||
expectedRelationships []artifact.Relationship
|
||||
}{
|
||||
{
|
||||
name: "catalog single GGUF file",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "llama3-8b").
|
||||
withStringKV("general.version", "3.0").
|
||||
withStringKV("general.license", "Apache-2.0").
|
||||
withStringKV("general.quantization", "Q4_K_M").
|
||||
withUint64KV("general.parameter_count", 8030000000).
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, "llama3-8b.gguf")
|
||||
os.WriteFile(path, data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "llama3-8b",
|
||||
Version: "3.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(
|
||||
pkg.NewLicenseFromFields("Apache-2.0", "", nil),
|
||||
),
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "llama3-8b",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
Parameters: 0,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog multiple GGUF files",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create first model
|
||||
data1 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "model1").
|
||||
withStringKV("general.version", "1.0").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model1.gguf"), data1, 0644)
|
||||
|
||||
// Create second model
|
||||
data2 := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "mistral").
|
||||
withStringKV("general.name", "model2").
|
||||
withStringKV("general.version", "2.0").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model2.gguf"), data2, 0644)
|
||||
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "model1",
|
||||
Version: "1.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model1",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Unknown",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "model2",
|
||||
Version: "2.0",
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "model2",
|
||||
ModelVersion: "2.0",
|
||||
Architecture: "mistral",
|
||||
Quantization: "Unknown",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
{
|
||||
name: "catalog GGUF in nested directories",
|
||||
setup: func(t *testing.T) string {
|
||||
dir := t.TempDir()
|
||||
nestedDir := filepath.Join(dir, "models", "quantized")
|
||||
os.MkdirAll(nestedDir, 0755)
|
||||
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "qwen").
|
||||
withStringKV("general.name", "qwen-nested").
|
||||
build()
|
||||
|
||||
os.WriteFile(filepath.Join(nestedDir, "qwen.gguf"), data, 0644)
|
||||
return dir
|
||||
},
|
||||
expectedPackages: []pkg.Package{
|
||||
{
|
||||
Name: "qwen-nested",
|
||||
Version: unknownGGUFData,
|
||||
Type: pkg.ModelPkg,
|
||||
Metadata: pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "qwen-nested",
|
||||
ModelVersion: unknownGGUFData,
|
||||
Architecture: "qwen",
|
||||
Quantization: "Unknown",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 0,
|
||||
Header: map[string]interface{}{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedRelationships: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
fixtureDir := tt.setup(t)
|
||||
|
||||
// Use pkgtest to catalog and compare
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, fixtureDir).
|
||||
Expects(tt.expectedPackages, tt.expectedRelationships).
|
||||
IgnoreLocationLayer().
|
||||
IgnorePackageFields("FoundBy", "Locations"). // These are set by the cataloger
|
||||
WithCompareOptions(
|
||||
// Ignore Hash as it's computed dynamically
|
||||
cmpopts.IgnoreFields(pkg.GGUFFileHeader{}, "Hash"),
|
||||
)
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create a valid GGUF
|
||||
validData := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "valid-model").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "valid.gguf"), validData, 0644)
|
||||
|
||||
// Create an invalid GGUF (wrong magic)
|
||||
invalidData := newTestGGUFBuilder().buildInvalidMagic()
|
||||
os.WriteFile(filepath.Join(dir, "invalid.gguf"), invalidData, 0644)
|
||||
|
||||
// Create a truncated GGUF
|
||||
os.WriteFile(filepath.Join(dir, "truncated.gguf"), []byte{0x47}, 0644)
|
||||
|
||||
// Catalog should succeed and only return the valid package
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Should only find the valid model
|
||||
require.Len(t, pkgs, 1)
|
||||
assert.Equal(t, "valid-model", pkgs[0].Name)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_Name(t *testing.T) {
|
||||
cataloger := NewGGUFCataloger()
|
||||
assert.Equal(t, "gguf-cataloger", cataloger.Name())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_EmptyDirectory(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
// Create a subdirectory to ensure glob still runs
|
||||
os.MkdirAll(filepath.Join(dir, "models"), 0755)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, rels []artifact.Relationship) {
|
||||
assert.Empty(t, pkgs)
|
||||
assert.Empty(t, rels)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create GGUF file
|
||||
ggufData := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model.gguf"), ggufData, 0644)
|
||||
|
||||
// Create other file types
|
||||
os.WriteFile(filepath.Join(dir, "README.md"), []byte("# Models"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "config.json"), []byte("{}"), 0644)
|
||||
os.WriteFile(filepath.Join(dir, "weights.bin"), []byte("weights"), 0644)
|
||||
os.MkdirAll(filepath.Join(dir, "subdir"), 0755)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Should only find the GGUF model
|
||||
require.Len(t, pkgs, 1)
|
||||
assert.Equal(t, "test-model", pkgs[0].Name)
|
||||
assert.Equal(t, pkg.ModelPkg, pkgs[0].Type)
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
||||
// Test that the glob pattern is case-sensitive (as expected for **/*.gguf)
|
||||
dir := t.TempDir()
|
||||
|
||||
// Create lowercase .gguf
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "lowercase").
|
||||
build()
|
||||
os.WriteFile(filepath.Join(dir, "model.gguf"), data, 0644)
|
||||
|
||||
// Create uppercase .GGUF (should not match the glob)
|
||||
os.WriteFile(filepath.Join(dir, "MODEL.GGUF"), data, 0644)
|
||||
|
||||
tester := pkgtest.NewCatalogTester().
|
||||
FromDirectory(t, dir).
|
||||
ExpectsAssertion(func(t *testing.T, pkgs []pkg.Package, _ []artifact.Relationship) {
|
||||
// Depending on filesystem case-sensitivity, we may get 1 or 2 packages
|
||||
// On case-insensitive filesystems (macOS), both might match
|
||||
// On case-sensitive filesystems (Linux), only lowercase matches
|
||||
assert.GreaterOrEqual(t, len(pkgs), 1, "should find at least the lowercase file")
|
||||
})
|
||||
|
||||
tester.TestCataloger(t, NewGGUFCataloger())
|
||||
}
|
||||
|
||||
// createTestGGUFInDir creates a minimal test GGUF file in the specified directory
|
||||
func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
||||
t.Helper()
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
|
||||
path := filepath.Join(dir, filename)
|
||||
err := os.WriteFile(path, data, 0644)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
69
syft/pkg/cataloger/ai/package.go
Normal file
69
syft/pkg/cataloger/ai/package.go
Normal file
@ -0,0 +1,69 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func newGGUFPackage(metadata *pkg.GGUFFileHeader, locations ...file.Location) pkg.Package {
|
||||
// Compute hash if not already set
|
||||
if metadata.Hash == "" {
|
||||
metadata.Hash = computeMetadataHash(metadata)
|
||||
}
|
||||
|
||||
p := pkg.Package{
|
||||
Name: metadata.ModelName,
|
||||
Version: metadata.ModelVersion,
|
||||
Locations: file.NewLocationSet(locations...),
|
||||
Type: pkg.ModelPkg,
|
||||
Licenses: pkg.NewLicenseSet(),
|
||||
Metadata: *metadata,
|
||||
// NOTE: PURL is intentionally not set as the package-url spec
|
||||
// has not yet finalized support for ML model packages
|
||||
}
|
||||
|
||||
// Add license to the package if present in metadata
|
||||
if metadata.License != "" {
|
||||
p.Licenses.Add(pkg.NewLicenseFromFields(metadata.License, "", nil))
|
||||
}
|
||||
|
||||
p.SetID()
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// computeMetadataHash computes a stable hash of the metadata for use as a global identifier
|
||||
func computeMetadataHash(metadata *pkg.GGUFFileHeader) string {
|
||||
// Create a stable representation of the metadata
|
||||
hashData := struct {
|
||||
Format string
|
||||
Name string
|
||||
Version string
|
||||
Architecture string
|
||||
GGUFVersion uint32
|
||||
TensorCount uint64
|
||||
}{
|
||||
Format: metadata.ModelFormat,
|
||||
Name: metadata.ModelName,
|
||||
Version: metadata.ModelVersion,
|
||||
Architecture: metadata.Architecture,
|
||||
GGUFVersion: metadata.GGUFVersion,
|
||||
TensorCount: metadata.TensorCount,
|
||||
}
|
||||
|
||||
// Marshal to JSON for stable hashing
|
||||
jsonBytes, err := json.Marshal(hashData)
|
||||
if err != nil {
|
||||
log.Debugf("failed to marshal metadata for hashing: %v", err)
|
||||
return ""
|
||||
}
|
||||
|
||||
// Compute SHA256 hash
|
||||
hash := sha256.Sum256(jsonBytes)
|
||||
return fmt.Sprintf("%x", hash[:8]) // Use first 8 bytes (16 hex chars)
|
||||
}
|
||||
126
syft/pkg/cataloger/ai/package_test.go
Normal file
126
syft/pkg/cataloger/ai/package_test.go
Normal file
@ -0,0 +1,126 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func TestNewGGUFPackage(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
metadata *pkg.GGUFFileHeader
|
||||
locations []file.Location
|
||||
checkFunc func(t *testing.T, p pkg.Package)
|
||||
}{
|
||||
{
|
||||
name: "complete GGUF package with all fields",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "llama3-8b-instruct",
|
||||
ModelVersion: "3.0",
|
||||
License: "Apache-2.0",
|
||||
Architecture: "llama",
|
||||
Quantization: "Q4_K_M",
|
||||
Parameters: 8030000000,
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 291,
|
||||
Header: map[string]any{},
|
||||
TruncatedHeader: false,
|
||||
},
|
||||
locations: []file.Location{file.NewLocation("/models/llama3-8b.gguf")},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
if d := cmp.Diff("llama3-8b-instruct", p.Name); d != "" {
|
||||
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff("3.0", p.Version); d != "" {
|
||||
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||
assert.Len(t, p.Licenses.ToSlice(), 1)
|
||||
if d := cmp.Diff("Apache-2.0", p.Licenses.ToSlice()[0].Value); d != "" {
|
||||
t.Errorf("License value mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
assert.NotEmpty(t, p.ID())
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "minimal GGUF package",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "simple-model",
|
||||
ModelVersion: "1.0",
|
||||
Architecture: "gpt2",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 50,
|
||||
},
|
||||
locations: []file.Location{file.NewLocation("/models/simple.gguf")},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
if d := cmp.Diff("simple-model", p.Name); d != "" {
|
||||
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff("1.0", p.Version); d != "" {
|
||||
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
assert.Empty(t, p.PURL, "PURL should not be set for model packages")
|
||||
assert.Empty(t, p.Licenses.ToSlice())
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "GGUF package with multiple locations",
|
||||
metadata: &pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: "multi-location-model",
|
||||
ModelVersion: "1.5",
|
||||
Architecture: "llama",
|
||||
GGUFVersion: 3,
|
||||
TensorCount: 150,
|
||||
},
|
||||
locations: []file.Location{
|
||||
file.NewLocation("/models/model1.gguf"),
|
||||
file.NewLocation("/models/model2.gguf"),
|
||||
},
|
||||
checkFunc: func(t *testing.T, p pkg.Package) {
|
||||
assert.Len(t, p.Locations.ToSlice(), 2)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
p := newGGUFPackage(tt.metadata, tt.locations...)
|
||||
|
||||
if d := cmp.Diff(tt.metadata.ModelName, p.Name); d != "" {
|
||||
t.Errorf("Name mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(tt.metadata.ModelVersion, p.Version); d != "" {
|
||||
t.Errorf("Version mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
if d := cmp.Diff(pkg.ModelPkg, p.Type); d != "" {
|
||||
t.Errorf("Type mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
|
||||
// Verify metadata is attached
|
||||
metadata, ok := p.Metadata.(pkg.GGUFFileHeader)
|
||||
require.True(t, ok, "metadata should be GGUFFileHeader")
|
||||
if d := cmp.Diff(*tt.metadata, metadata); d != "" {
|
||||
t.Errorf("Metadata mismatch (-want +got):\n%s", d)
|
||||
}
|
||||
|
||||
if tt.checkFunc != nil {
|
||||
tt.checkFunc(t, p)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
89
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
89
syft/pkg/cataloger/ai/parse_gguf.go
Normal file
@ -0,0 +1,89 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
// GGUF file format constants
|
||||
const (
|
||||
ggufMagicNumber = 0x46554747 // "GGUF" in little-endian
|
||||
maxHeaderSize = 50 * 1024 * 1024 // 50MB for large tokenizer vocabularies
|
||||
)
|
||||
|
||||
// ggufHeaderReader reads just the header portion of a GGUF file efficiently
|
||||
type ggufHeaderReader struct {
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||
// This is much more efficient than reading the entire file
|
||||
// The reader should be wrapped with io.LimitedReader to prevent OOM issues
|
||||
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||
// Read initial chunk to determine header size
|
||||
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||
initialBuf := make([]byte, 24) // Enough for magic, version, tensor count, and kv count
|
||||
if _, err := io.ReadFull(r.reader, initialBuf); err != nil {
|
||||
return nil, fmt.Errorf("failed to read GGUF header prefix: %w", err)
|
||||
}
|
||||
|
||||
// Verify magic number
|
||||
magic := binary.LittleEndian.Uint32(initialBuf[0:4])
|
||||
if magic != ggufMagicNumber {
|
||||
return nil, fmt.Errorf("invalid GGUF magic number: 0x%08X", magic)
|
||||
}
|
||||
|
||||
// We need to read the metadata KV pairs to know the full header size
|
||||
// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
|
||||
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||
headerData = append(headerData, initialBuf...)
|
||||
|
||||
// Read the rest of the header in larger chunks for efficiency
|
||||
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||
buf := make([]byte, 64*1024) // 64KB chunks
|
||||
for {
|
||||
n, err := r.reader.Read(buf)
|
||||
if n > 0 {
|
||||
headerData = append(headerData, buf[:n]...)
|
||||
}
|
||||
if err == io.EOF {
|
||||
// Reached end of file or limit, we have all available data
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return headerData, nil
|
||||
}
|
||||
|
||||
// Helper to convert gguf_parser metadata to simpler types
|
||||
func convertGGUFMetadataKVs(kvs gguf_parser.GGUFMetadataKVs) map[string]interface{} {
|
||||
result := make(map[string]interface{})
|
||||
|
||||
// Limit KV pairs to avoid bloat
|
||||
const maxKVPairs = 200
|
||||
count := 0
|
||||
|
||||
for _, kv := range kvs {
|
||||
if count >= maxKVPairs {
|
||||
break
|
||||
}
|
||||
|
||||
// Skip standard fields that are extracted separately
|
||||
switch kv.Key {
|
||||
case "general.architecture", "general.name", "general.license",
|
||||
"general.version", "general.parameter_count", "general.quantization":
|
||||
continue
|
||||
}
|
||||
|
||||
result[kv.Key] = kv.Value
|
||||
count++
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
130
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
130
syft/pkg/cataloger/ai/parse_gguf_model.go
Normal file
@ -0,0 +1,130 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
|
||||
"github.com/anchore/syft/internal"
|
||||
"github.com/anchore/syft/internal/unknown"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
const unknownGGUFData = "unknown"
|
||||
|
||||
// parseGGUFModel parses a GGUF model file and returns the discovered package.
|
||||
// This implementation only reads the header portion of the file, not the entire model.
|
||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
defer internal.CloseAndLogError(reader, reader.Path())
|
||||
|
||||
// Read and validate the GGUF file header using LimitedReader to prevent OOM
|
||||
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||
headerReader := &ggufHeaderReader{reader: limitedReader}
|
||||
headerData, err := headerReader.readHeader()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||
}
|
||||
|
||||
// Create a temporary file for the library to parse
|
||||
// The library requires a file path, so we create a temp file
|
||||
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
tempPath := tempFile.Name()
|
||||
defer os.Remove(tempPath)
|
||||
|
||||
// Write the validated header data to temp file
|
||||
if _, err := tempFile.Write(headerData); err != nil {
|
||||
tempFile.Close()
|
||||
return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
|
||||
}
|
||||
tempFile.Close()
|
||||
|
||||
// Parse using gguf-parser-go with options to skip unnecessary data
|
||||
ggufFile, err := gguf_parser.ParseGGUFFile(tempPath,
|
||||
gguf_parser.SkipLargeMetadata(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to parse GGUF file: %w", err)
|
||||
}
|
||||
|
||||
// Extract metadata
|
||||
metadata := ggufFile.Metadata()
|
||||
|
||||
// Convert to syft metadata structure
|
||||
syftMetadata := &pkg.GGUFFileHeader{
|
||||
ModelFormat: "gguf",
|
||||
ModelName: metadata.Name,
|
||||
ModelVersion: extractVersion(ggufFile.Header.MetadataKV),
|
||||
License: metadata.License,
|
||||
Architecture: metadata.Architecture,
|
||||
Quantization: metadata.FileTypeDescriptor,
|
||||
Parameters: uint64(metadata.Parameters),
|
||||
GGUFVersion: uint32(ggufFile.Header.Version),
|
||||
TensorCount: ggufFile.Header.TensorCount,
|
||||
Header: convertGGUFMetadataKVs(ggufFile.Header.MetadataKV),
|
||||
TruncatedHeader: false, // We read the full header
|
||||
Hash: "", // Will be computed in newGGUFPackage
|
||||
}
|
||||
|
||||
// If model name is not in metadata, use filename
|
||||
if syftMetadata.ModelName == "" {
|
||||
syftMetadata.ModelName = extractModelNameFromPath(reader.Path())
|
||||
}
|
||||
|
||||
// If version is still unknown, try to infer from name
|
||||
if syftMetadata.ModelVersion == unknownGGUFData {
|
||||
syftMetadata.ModelVersion = extractVersionFromName(syftMetadata.ModelName)
|
||||
}
|
||||
|
||||
// Create package from metadata
|
||||
p := newGGUFPackage(
|
||||
syftMetadata,
|
||||
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
|
||||
)
|
||||
|
||||
return []pkg.Package{p}, nil, unknown.IfEmptyf([]pkg.Package{p}, "unable to parse GGUF file")
|
||||
}
|
||||
|
||||
// extractVersion attempts to extract version from metadata KV pairs
|
||||
func extractVersion(kvs gguf_parser.GGUFMetadataKVs) string {
|
||||
for _, kv := range kvs {
|
||||
if kv.Key == "general.version" {
|
||||
if v, ok := kv.Value.(string); ok && v != "" {
|
||||
return v
|
||||
}
|
||||
}
|
||||
}
|
||||
return unknownGGUFData
|
||||
}
|
||||
|
||||
// extractVersionFromName tries to extract version from model name
|
||||
func extractVersionFromName(_ string) string {
|
||||
// Look for version patterns like "v1.0", "1.5b", "3.0", etc.
|
||||
// For now, return unknown - this could be enhanced with regex
|
||||
return unknownGGUFData
|
||||
}
|
||||
|
||||
// extractModelNameFromPath extracts the model name from the file path
|
||||
func extractModelNameFromPath(path string) string {
|
||||
// Get the base filename
|
||||
base := filepath.Base(path)
|
||||
|
||||
// Remove .gguf extension
|
||||
name := strings.TrimSuffix(base, ".gguf")
|
||||
|
||||
return name
|
||||
}
|
||||
|
||||
// integrity check
|
||||
var _ generic.Parser = parseGGUFModel
|
||||
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
@ -0,0 +1,41 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Create a test GGUF file
|
||||
data := newTestGGUFBuilder().
|
||||
withVersion(3).
|
||||
withStringKV("general.architecture", "llama").
|
||||
withStringKV("general.name", "test-model").
|
||||
build()
|
||||
|
||||
// Write to temp file
|
||||
tempFile, err := os.CreateTemp("", "test-*.gguf")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
defer os.Remove(tempFile.Name())
|
||||
|
||||
if _, err := tempFile.Write(data); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
tempFile.Close()
|
||||
|
||||
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
|
||||
|
||||
// Try to parse it
|
||||
fmt.Println("Attempting to parse...")
|
||||
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
|
||||
if err != nil {
|
||||
fmt.Printf("Parse error: %v\n", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
|
||||
}
|
||||
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
127
syft/pkg/cataloger/ai/test_helpers_test.go
Normal file
@ -0,0 +1,127 @@
|
||||
package ai
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
// GGUF type constants for test builder
|
||||
const (
|
||||
ggufMagic = 0x46554747 // "GGUF" in little-endian
|
||||
ggufTypeUint8 = 0
|
||||
ggufTypeInt8 = 1
|
||||
ggufTypeUint16 = 2
|
||||
ggufTypeInt16 = 3
|
||||
ggufTypeUint32 = 4
|
||||
ggufTypeInt32 = 5
|
||||
ggufTypeFloat32 = 6
|
||||
ggufTypeBool = 7
|
||||
ggufTypeString = 8
|
||||
ggufTypeArray = 9
|
||||
ggufTypeUint64 = 10
|
||||
ggufTypeInt64 = 11
|
||||
ggufTypeFloat64 = 12
|
||||
)
|
||||
|
||||
// testGGUFBuilder helps build GGUF files for testing
|
||||
type testGGUFBuilder struct {
|
||||
buf *bytes.Buffer
|
||||
version uint32
|
||||
tensorCount uint64
|
||||
kvPairs []testKVPair
|
||||
}
|
||||
|
||||
type testKVPair struct {
|
||||
key string
|
||||
valueType uint32
|
||||
value interface{}
|
||||
}
|
||||
|
||||
func newTestGGUFBuilder() *testGGUFBuilder {
|
||||
return &testGGUFBuilder{
|
||||
buf: new(bytes.Buffer),
|
||||
version: 3,
|
||||
tensorCount: 0,
|
||||
kvPairs: []testKVPair{},
|
||||
}
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withVersion(v uint32) *testGGUFBuilder {
|
||||
b.version = v
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withTensorCount(count uint64) *testGGUFBuilder {
|
||||
b.tensorCount = count
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withStringKV(key, value string) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeString, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint64KV(key string, value uint64) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint64, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) withUint32KV(key string, value uint32) *testGGUFBuilder {
|
||||
b.kvPairs = append(b.kvPairs, testKVPair{key: key, valueType: ggufTypeUint32, value: value})
|
||||
return b
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) writeString(s string) {
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(s)))
|
||||
b.buf.WriteString(s)
|
||||
}
|
||||
|
||||
func (b *testGGUFBuilder) build() []byte {
|
||||
// Write magic number "GGUF"
|
||||
binary.Write(b.buf, binary.LittleEndian, uint32(ggufMagic))
|
||||
|
||||
// Write version
|
||||
binary.Write(b.buf, binary.LittleEndian, b.version)
|
||||
|
||||
// Write tensor count
|
||||
binary.Write(b.buf, binary.LittleEndian, b.tensorCount)
|
||||
|
||||
// Write KV count
|
||||
binary.Write(b.buf, binary.LittleEndian, uint64(len(b.kvPairs)))
|
||||
|
||||
// Write KV pairs
|
||||
for _, kv := range b.kvPairs {
|
||||
// Write key
|
||||
b.writeString(kv.key)
|
||||
// Write value type
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.valueType)
|
||||
// Write value based on type
|
||||
switch kv.valueType {
|
||||
case ggufTypeString:
|
||||
b.writeString(kv.value.(string))
|
||||
case ggufTypeUint32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint32))
|
||||
case ggufTypeUint64:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint64))
|
||||
case ggufTypeUint8:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(uint8))
|
||||
case ggufTypeInt32:
|
||||
binary.Write(b.buf, binary.LittleEndian, kv.value.(int32))
|
||||
case ggufTypeBool:
|
||||
var v uint8
|
||||
if kv.value.(bool) {
|
||||
v = 1
|
||||
}
|
||||
binary.Write(b.buf, binary.LittleEndian, v)
|
||||
}
|
||||
}
|
||||
|
||||
return b.buf.Bytes()
|
||||
}
|
||||
|
||||
// buildInvalidMagic creates a file with invalid magic number
|
||||
func (b *testGGUFBuilder) buildInvalidMagic() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
binary.Write(buf, binary.LittleEndian, uint32(0x12345678))
|
||||
return buf.Bytes()
|
||||
}
|
||||
47
syft/pkg/gguf.go
Normal file
47
syft/pkg/gguf.go
Normal file
@ -0,0 +1,47 @@
|
||||
package pkg
|
||||
|
||||
// GGUFFileHeader represents metadata extracted from a GGUF (GPT-Generated Unified Format) model file.
|
||||
// GGUF is a binary file format used for storing model weights for the GGML library, designed for fast
|
||||
// loading and saving of models, particularly quantized large language models.
|
||||
type GGUFFileHeader struct {
|
||||
// ModelFormat is always "gguf"
|
||||
ModelFormat string `json:"modelFormat" cyclonedx:"modelFormat"`
|
||||
|
||||
// ModelName is the name of the model (from general.name or filename)
|
||||
ModelName string `json:"modelName" cyclonedx:"modelName"`
|
||||
|
||||
// ModelVersion is the version of the model (if available in header, else "unknown")
|
||||
ModelVersion string `json:"modelVersion,omitempty" cyclonedx:"modelVersion"`
|
||||
|
||||
// FileSize is the size of the GGUF file in bytes (best-effort if available from resolver)
|
||||
FileSize int64 `json:"fileSize,omitempty" cyclonedx:"fileSize"`
|
||||
|
||||
// Hash is a content hash of the metadata (for stable global identifiers across remotes)
|
||||
Hash string `json:"hash,omitempty" cyclonedx:"hash"`
|
||||
|
||||
// License is the license identifier (from general.license if present)
|
||||
License string `json:"license,omitempty" cyclonedx:"license"`
|
||||
|
||||
// GGUFVersion is the GGUF format version (e.g., 3)
|
||||
GGUFVersion uint32 `json:"ggufVersion" cyclonedx:"ggufVersion"`
|
||||
|
||||
// Architecture is the model architecture (from general.architecture, e.g., "qwen3moe", "llama")
|
||||
Architecture string `json:"architecture,omitempty" cyclonedx:"architecture"`
|
||||
|
||||
// Quantization is the quantization type (e.g., "IQ4_NL", "Q4_K_M")
|
||||
Quantization string `json:"quantization,omitempty" cyclonedx:"quantization"`
|
||||
|
||||
// Parameters is the number of model parameters (if present in header)
|
||||
Parameters uint64 `json:"parameters,omitempty" cyclonedx:"parameters"`
|
||||
|
||||
// TensorCount is the number of tensors in the model
|
||||
TensorCount uint64 `json:"tensorCount" cyclonedx:"tensorCount"`
|
||||
|
||||
// Header contains the remaining key-value pairs from the GGUF header that are not already
|
||||
// represented as typed fields above. This preserves additional metadata fields for reference
|
||||
// (namespaced with general.*, llama.*, etc.) while avoiding duplication.
|
||||
Header map[string]interface{} `json:"header,omitempty" cyclonedx:"header"`
|
||||
|
||||
// TruncatedHeader indicates if the header was truncated during parsing (for very large headers)
|
||||
TruncatedHeader bool `json:"truncatedHeader,omitempty" cyclonedx:"truncatedHeader"`
|
||||
}
|
||||
@ -50,6 +50,7 @@ const (
|
||||
TerraformPkg Type = "terraform"
|
||||
WordpressPluginPkg Type = "wordpress-plugin"
|
||||
HomebrewPkg Type = "homebrew"
|
||||
ModelPkg Type = "model"
|
||||
)
|
||||
|
||||
// AllPkgs represents all supported package types
|
||||
@ -94,6 +95,7 @@ var AllPkgs = []Type{
|
||||
TerraformPkg,
|
||||
WordpressPluginPkg,
|
||||
HomebrewPkg,
|
||||
ModelPkg,
|
||||
}
|
||||
|
||||
// PackageURLType returns the PURL package type for the current package.
|
||||
|
||||
@ -155,6 +155,7 @@ func TestTypeFromPURL(t *testing.T) {
|
||||
expectedTypes.Remove(string(HomebrewPkg))
|
||||
expectedTypes.Remove(string(TerraformPkg))
|
||||
expectedTypes.Remove(string(GraalVMNativeImagePkg))
|
||||
expectedTypes.Remove(string(ModelPkg)) // no valid purl for ai artifacts currently
|
||||
expectedTypes.Remove(string(PhpPeclPkg)) // we should always consider this a pear package
|
||||
|
||||
for _, test := range tests {
|
||||
|
||||
39
syft/source/ocimodelsource/metadata.go
Normal file
39
syft/source/ocimodelsource/metadata.go
Normal file
@ -0,0 +1,39 @@
|
||||
package ocimodelsource
|
||||
|
||||
import "github.com/anchore/syft/syft/source"
|
||||
|
||||
// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
|
||||
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
|
||||
type OCIModelMetadata struct {
|
||||
// Core OCI artifact metadata (mirrors ImageMetadata)
|
||||
UserInput string `json:"userInput"`
|
||||
ID string `json:"artifactID"`
|
||||
ManifestDigest string `json:"manifestDigest"`
|
||||
MediaType string `json:"mediaType"`
|
||||
Tags []string `json:"tags"`
|
||||
Size int64 `json:"artifactSize"`
|
||||
Layers []source.LayerMetadata `json:"layers"`
|
||||
RawManifest []byte `json:"manifest"`
|
||||
RawConfig []byte `json:"config"`
|
||||
RepoDigests []string `json:"repoDigests"`
|
||||
Architecture string `json:"architecture"`
|
||||
Variant string `json:"architectureVariant,omitempty"`
|
||||
OS string `json:"os"`
|
||||
Labels map[string]string `json:"labels,omitempty"`
|
||||
|
||||
// OCI-specific metadata
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
|
||||
// Model-specific metadata
|
||||
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
|
||||
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
|
||||
}
|
||||
|
||||
// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
|
||||
type GGUFLayerInfo struct {
|
||||
Digest string `json:"digest"`
|
||||
Size int64 `json:"size"` // Full blob size in registry
|
||||
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
|
||||
Annotations map[string]string `json:"annotations,omitempty"`
|
||||
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
|
||||
}
|
||||
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,260 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/go-digest"
|
||||
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/anchore/syft/syft/source/internal"
|
||||
)
|
||||
|
||||
var _ source.Source = (*ociModelSource)(nil)
|
||||
|
||||
// Config holds the configuration for an OCI model artifact source.
|
||||
type Config struct {
|
||||
Reference string
|
||||
Platform string
|
||||
Alias source.Alias
|
||||
Client *RegistryClient
|
||||
Metadata *OCIModelMetadata
|
||||
TempFiles map[string]string // Virtual path -> temp file path
|
||||
}
|
||||
|
||||
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||
type ociModelSource struct {
|
||||
id artifact.ID
|
||||
config Config
|
||||
resolver *ociModelResolver
|
||||
mutex *sync.Mutex
|
||||
}
|
||||
|
||||
// NewFromArtifact creates a new OCI model source from a fetched model artifact.
|
||||
func NewFromArtifact(artifact *ModelArtifact, client *RegistryClient, alias source.Alias) (source.Source, error) {
|
||||
// Build metadata
|
||||
metadata := buildMetadata(artifact)
|
||||
|
||||
// Fetch GGUF layer headers via range-GET
|
||||
tempFiles := make(map[string]string)
|
||||
ggufLayers := make([]GGUFLayerInfo, 0, len(artifact.GGUFLayers))
|
||||
|
||||
for idx, layer := range artifact.GGUFLayers {
|
||||
log.WithFields("digest", layer.Digest, "size", layer.Size).Debug("fetching GGUF layer header")
|
||||
|
||||
// Fetch header via range-GET
|
||||
headerData, err := client.FetchBlobRange(context.Background(), artifact.Reference, layer.Digest, MaxHeaderBytes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||
}
|
||||
|
||||
// Extract virtual path from annotations
|
||||
virtualPath := extractVirtualPath(idx, extractAnnotations(layer.Annotations))
|
||||
|
||||
// Create temp file
|
||||
tempPath, err := createTempFileFromData(headerData, virtualPath)
|
||||
if err != nil {
|
||||
// Clean up any previously created temp files
|
||||
for _, path := range tempFiles {
|
||||
_ = removeFile(path)
|
||||
}
|
||||
return nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
|
||||
tempFiles[virtualPath] = tempPath
|
||||
|
||||
// Add to GGUF layers metadata
|
||||
ggufLayers = append(ggufLayers, GGUFLayerInfo{
|
||||
Digest: layer.Digest.String(),
|
||||
Size: layer.Size,
|
||||
MediaType: string(layer.MediaType),
|
||||
Annotations: extractAnnotations(layer.Annotations),
|
||||
FetchedBytes: int64(len(headerData)),
|
||||
})
|
||||
|
||||
log.WithFields("virtualPath", virtualPath, "tempPath", tempPath, "bytes", len(headerData)).Debug("created temp file for GGUF header")
|
||||
}
|
||||
|
||||
// Update metadata with GGUF layers
|
||||
metadata.GGUFLayers = ggufLayers
|
||||
metadata.ModelFormat = "gguf"
|
||||
|
||||
// Build config
|
||||
config := Config{
|
||||
Reference: artifact.Reference.String(),
|
||||
Alias: alias,
|
||||
Client: client,
|
||||
Metadata: metadata,
|
||||
TempFiles: tempFiles,
|
||||
}
|
||||
|
||||
// Derive artifact ID
|
||||
id := deriveIDFromArtifact(config)
|
||||
|
||||
return &ociModelSource{
|
||||
id: id,
|
||||
config: config,
|
||||
mutex: &sync.Mutex{},
|
||||
}, nil
|
||||
}
|
||||
|
||||
// buildMetadata constructs OCIModelMetadata from a ModelArtifact.
|
||||
func buildMetadata(artifact *ModelArtifact) *OCIModelMetadata {
|
||||
// Extract layers
|
||||
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||
for i, layer := range artifact.Manifest.Layers {
|
||||
layers[i] = source.LayerMetadata{
|
||||
MediaType: string(layer.MediaType),
|
||||
Digest: layer.Digest.String(),
|
||||
Size: layer.Size,
|
||||
}
|
||||
}
|
||||
|
||||
// Extract tags
|
||||
var tags []string
|
||||
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||
if tag := tagged.TagStr(); tag != "" {
|
||||
tags = []string{tag}
|
||||
}
|
||||
}
|
||||
|
||||
// Extract repo digests
|
||||
var repoDigests []string
|
||||
if artifact.ManifestDigest != "" {
|
||||
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||
}
|
||||
|
||||
// Build metadata
|
||||
return &OCIModelMetadata{
|
||||
UserInput: artifact.Reference.String(),
|
||||
ID: artifact.ManifestDigest,
|
||||
ManifestDigest: artifact.ManifestDigest,
|
||||
MediaType: string(artifact.Manifest.MediaType),
|
||||
Tags: tags,
|
||||
Size: calculateTotalSize(layers),
|
||||
Layers: layers,
|
||||
RawManifest: artifact.RawManifest,
|
||||
RawConfig: artifact.RawConfig,
|
||||
RepoDigests: repoDigests,
|
||||
Architecture: artifact.Config.Architecture,
|
||||
Variant: artifact.Config.Variant,
|
||||
OS: artifact.Config.OS,
|
||||
Labels: artifact.Config.Config.Labels,
|
||||
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||
}
|
||||
}
|
||||
|
||||
// extractAnnotations converts v1 annotations to a string map.
|
||||
func extractAnnotations(annotations map[string]string) map[string]string {
|
||||
if annotations == nil {
|
||||
return make(map[string]string)
|
||||
}
|
||||
return annotations
|
||||
}
|
||||
|
||||
// extractManifestAnnotations extracts annotations from the manifest.
|
||||
func extractManifestAnnotations(manifest interface{}) map[string]string {
|
||||
// v1.Manifest has Annotations field
|
||||
if m, ok := manifest.(interface{ GetAnnotations() map[string]string }); ok {
|
||||
return m.GetAnnotations()
|
||||
}
|
||||
return make(map[string]string)
|
||||
}
|
||||
|
||||
// calculateTotalSize sums up the size of all layers.
|
||||
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||
var total int64
|
||||
for _, layer := range layers {
|
||||
total += layer.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// deriveIDFromArtifact generates an artifact ID from the config.
|
||||
func deriveIDFromArtifact(cfg Config) artifact.ID {
|
||||
var info string
|
||||
|
||||
if !cfg.Alias.IsEmpty() {
|
||||
// Use alias for stable artifact ID
|
||||
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
|
||||
} else if cfg.Metadata.ManifestDigest != "" {
|
||||
// Use manifest digest
|
||||
info = cfg.Metadata.ManifestDigest
|
||||
} else {
|
||||
// Fall back to reference
|
||||
log.Warn("no explicit name/version or manifest digest, deriving artifact ID from reference")
|
||||
info = cfg.Reference
|
||||
}
|
||||
|
||||
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String())
|
||||
}
|
||||
|
||||
// ID returns the artifact ID.
|
||||
func (s *ociModelSource) ID() artifact.ID {
|
||||
return s.id
|
||||
}
|
||||
|
||||
// Describe returns a description of the source.
|
||||
func (s *ociModelSource) Describe() source.Description {
|
||||
name := s.config.Reference
|
||||
version := ""
|
||||
supplier := ""
|
||||
|
||||
if !s.config.Alias.IsEmpty() {
|
||||
a := s.config.Alias
|
||||
if a.Name != "" {
|
||||
name = a.Name
|
||||
}
|
||||
if a.Version != "" {
|
||||
version = a.Version
|
||||
}
|
||||
if a.Supplier != "" {
|
||||
supplier = a.Supplier
|
||||
}
|
||||
}
|
||||
|
||||
return source.Description{
|
||||
ID: string(s.id),
|
||||
Name: name,
|
||||
Version: version,
|
||||
Supplier: supplier,
|
||||
Metadata: s.config.Metadata,
|
||||
}
|
||||
}
|
||||
|
||||
// FileResolver returns a file resolver for accessing GGUF header files.
|
||||
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if s.resolver == nil {
|
||||
s.resolver = newOCIModelResolver(s.config.TempFiles)
|
||||
}
|
||||
|
||||
return s.resolver, nil
|
||||
}
|
||||
|
||||
// Close cleans up temporary files.
|
||||
func (s *ociModelSource) Close() error {
|
||||
s.mutex.Lock()
|
||||
defer s.mutex.Unlock()
|
||||
|
||||
if s.resolver != nil {
|
||||
if err := s.resolver.cleanup(); err != nil {
|
||||
log.WithFields("error", err).Warn("failed to cleanup temp files")
|
||||
return err
|
||||
}
|
||||
s.resolver = nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// removeFile removes a file and logs any errors.
|
||||
func removeFile(path string) error {
|
||||
return nil // Placeholder for now
|
||||
}
|
||||
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,76 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/source"
|
||||
)
|
||||
|
||||
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||
return &ociModelSourceProvider{
|
||||
reference: reference,
|
||||
registryOpts: registryOpts,
|
||||
alias: alias,
|
||||
}
|
||||
}
|
||||
|
||||
type ociModelSourceProvider struct {
|
||||
reference string
|
||||
registryOpts *image.RegistryOptions
|
||||
alias source.Alias
|
||||
}
|
||||
|
||||
func (p *ociModelSourceProvider) Name() string {
|
||||
return "oci-model-artifact"
|
||||
}
|
||||
|
||||
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||
// Create registry client
|
||||
client, err := NewRegistryClient(p.registryOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create registry client: %w", err)
|
||||
}
|
||||
|
||||
// Check if this is a model artifact (lightweight check)
|
||||
log.WithFields("reference", p.reference).Debug("checking if reference is a model artifact")
|
||||
|
||||
isModel, err := client.IsModelArtifactReference(ctx, p.reference)
|
||||
if err != nil {
|
||||
// Log the error but don't fail - let other providers try
|
||||
log.WithFields("reference", p.reference, "error", err).Debug("failed to check if reference is a model artifact")
|
||||
return nil, fmt.Errorf("not an OCI model artifact: %w", err)
|
||||
}
|
||||
|
||||
if !isModel {
|
||||
log.WithFields("reference", p.reference).Debug("reference is not a model artifact")
|
||||
return nil, fmt.Errorf("not an OCI model artifact")
|
||||
}
|
||||
|
||||
log.WithFields("reference", p.reference).Info("detected OCI model artifact, fetching headers")
|
||||
|
||||
// Fetch the full model artifact with metadata
|
||||
artifact, err := client.FetchModelArtifact(ctx, p.reference)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch model artifact: %w", err)
|
||||
}
|
||||
|
||||
// Check if there are any GGUF layers
|
||||
if len(artifact.GGUFLayers) == 0 {
|
||||
log.WithFields("reference", p.reference).Warn("model artifact has no GGUF layers")
|
||||
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||
}
|
||||
|
||||
log.WithFields("reference", p.reference, "ggufLayers", len(artifact.GGUFLayers)).Info("found GGUF layers in model artifact")
|
||||
|
||||
// Create the source
|
||||
src, err := NewFromArtifact(artifact, client, p.alias)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create OCI model source: %w", err)
|
||||
}
|
||||
|
||||
return src, nil
|
||||
}
|
||||
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
@ -0,0 +1,53 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestExtractVirtualPath(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
layerIndex int
|
||||
annotations map[string]string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "with title annotation",
|
||||
layerIndex: 0,
|
||||
annotations: map[string]string{"org.opencontainers.image.title": "model.gguf"},
|
||||
expected: "/model.gguf",
|
||||
},
|
||||
{
|
||||
name: "without title annotation",
|
||||
layerIndex: 1,
|
||||
annotations: map[string]string{},
|
||||
expected: "/model-layer-1.gguf",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractVirtualPath(tt.layerIndex, tt.annotations)
|
||||
assert.Equal(t, tt.expected, result)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCalculateTotalSize(t *testing.T) {
|
||||
// This is imported from syft/source
|
||||
// Just a simple test to ensure it works
|
||||
layers := []struct {
|
||||
MediaType string
|
||||
Digest string
|
||||
Size int64
|
||||
}{
|
||||
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:abc", 100},
|
||||
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:def", 200},
|
||||
}
|
||||
|
||||
// We'd need to convert to source.LayerMetadata to test this properly
|
||||
// For now, just ensure the package compiles
|
||||
assert.NotNil(t, layers)
|
||||
}
|
||||
227
syft/source/ocimodelsource/registry_client.go
Normal file
227
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,227 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
|
||||
"github.com/google/go-containerregistry/pkg/authn"
|
||||
"github.com/google/go-containerregistry/pkg/name"
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||
|
||||
"github.com/anchore/stereoscope/pkg/image"
|
||||
)
|
||||
|
||||
const (
|
||||
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||
ModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json"
|
||||
GGUFLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||
|
||||
// Maximum bytes to fetch via range-GET for GGUF headers
|
||||
MaxHeaderBytes = 10 * 1024 * 1024 // 10 MB
|
||||
)
|
||||
|
||||
// RegistryClient handles OCI registry interactions for model artifacts.
|
||||
type RegistryClient struct {
|
||||
options []remote.Option
|
||||
}
|
||||
|
||||
// NewRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||
func NewRegistryClient(registryOpts *image.RegistryOptions) (*RegistryClient, error) {
|
||||
opts, err := buildRemoteOptions(registryOpts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build remote options: %w", err)
|
||||
}
|
||||
|
||||
return &RegistryClient{
|
||||
options: opts,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||
func buildRemoteOptions(registryOpts *image.RegistryOptions) ([]remote.Option, error) {
|
||||
var opts []remote.Option
|
||||
|
||||
if registryOpts == nil {
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
// Build authenticator
|
||||
authenticator := buildAuthenticator(registryOpts)
|
||||
opts = append(opts, remote.WithAuth(authenticator))
|
||||
|
||||
// Handle TLS settings
|
||||
if registryOpts.InsecureSkipTLSVerify {
|
||||
transport := remote.DefaultTransport.(*http.Transport).Clone()
|
||||
transport.TLSClientConfig.InsecureSkipVerify = true
|
||||
opts = append(opts, remote.WithTransport(transport))
|
||||
}
|
||||
|
||||
// Handle insecure HTTP
|
||||
if registryOpts.InsecureUseHTTP {
|
||||
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||
}
|
||||
|
||||
return opts, nil
|
||||
}
|
||||
|
||||
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||
// If credentials are provided, use them
|
||||
if len(registryOpts.Credentials) > 0 {
|
||||
// Use the first credential set (we could enhance this to match by authority)
|
||||
cred := registryOpts.Credentials[0]
|
||||
|
||||
if cred.Token != "" {
|
||||
return &authn.Bearer{Token: cred.Token}
|
||||
}
|
||||
|
||||
if cred.Username != "" || cred.Password != "" {
|
||||
return &authn.Basic{
|
||||
Username: cred.Username,
|
||||
Password: cred.Password,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to anonymous authenticator
|
||||
return authn.Anonymous
|
||||
}
|
||||
|
||||
// ModelArtifact represents a parsed OCI model artifact.
|
||||
type ModelArtifact struct {
|
||||
Reference name.Reference
|
||||
Manifest *v1.Manifest
|
||||
Config *v1.ConfigFile
|
||||
RawManifest []byte
|
||||
RawConfig []byte
|
||||
ManifestDigest string
|
||||
GGUFLayers []v1.Descriptor
|
||||
}
|
||||
|
||||
// FetchModelArtifact fetches and parses an OCI model artifact from the registry.
|
||||
func (c *RegistryClient) FetchModelArtifact(ctx context.Context, refStr string) (*ModelArtifact, error) {
|
||||
// Parse reference
|
||||
ref, err := name.ParseReference(refStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||
}
|
||||
|
||||
// Fetch descriptor
|
||||
desc, err := remote.Get(ref, c.options...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||
}
|
||||
|
||||
// Parse manifest
|
||||
manifest := &v1.Manifest{}
|
||||
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||
}
|
||||
|
||||
// Check if this is a model artifact
|
||||
if !isModelArtifact(manifest) {
|
||||
return nil, fmt.Errorf("not a model artifact (config media type: %s)", manifest.Config.MediaType)
|
||||
}
|
||||
|
||||
// Fetch config
|
||||
img, err := desc.Image()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||
}
|
||||
|
||||
configFile, err := img.ConfigFile()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||
}
|
||||
|
||||
rawConfig, err := img.RawConfigFile()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||
}
|
||||
|
||||
// Extract GGUF layers
|
||||
ggufLayers := extractGGUFLayers(manifest)
|
||||
|
||||
return &ModelArtifact{
|
||||
Reference: ref,
|
||||
Manifest: manifest,
|
||||
Config: configFile,
|
||||
RawManifest: desc.Manifest,
|
||||
RawConfig: rawConfig,
|
||||
ManifestDigest: desc.Digest.String(),
|
||||
GGUFLayers: ggufLayers,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// isModelArtifact checks if the manifest represents a model artifact.
|
||||
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||
return manifest.Config.MediaType == ModelConfigMediaType
|
||||
}
|
||||
|
||||
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||
var ggufLayers []v1.Descriptor
|
||||
for _, layer := range manifest.Layers {
|
||||
if string(layer.MediaType) == GGUFLayerMediaType {
|
||||
ggufLayers = append(ggufLayers, layer)
|
||||
}
|
||||
}
|
||||
return ggufLayers
|
||||
}
|
||||
|
||||
// FetchBlobRange fetches a byte range from a blob in the registry.
|
||||
// This is used to fetch only the GGUF header without downloading the entire multi-GB file.
|
||||
func (c *RegistryClient) FetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||
// Use the remote package's Layer fetching with our options
|
||||
// Then read only the first maxBytes
|
||||
repo := ref.Context()
|
||||
|
||||
// Fetch the layer (blob) using remote.Layer
|
||||
layer, err := remote.Layer(repo.Digest(digest.String()), c.options...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||
}
|
||||
|
||||
// Get the compressed reader
|
||||
reader, err := layer.Compressed()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
// Read up to maxBytes
|
||||
data := make([]byte, maxBytes)
|
||||
n, err := io.ReadFull(reader, data)
|
||||
if err != nil && err != io.ErrUnexpectedEOF {
|
||||
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||
}
|
||||
|
||||
return data[:n], nil
|
||||
}
|
||||
|
||||
// IsModelArtifactReference checks if a reference points to a model artifact.
|
||||
// This is a lightweight check that only fetches the manifest.
|
||||
func (c *RegistryClient) IsModelArtifactReference(ctx context.Context, refStr string) (bool, error) {
|
||||
ref, err := name.ParseReference(refStr)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||
}
|
||||
|
||||
desc, err := remote.Get(ref, c.options...)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||
}
|
||||
|
||||
manifest := &v1.Manifest{}
|
||||
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||
return false, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||
}
|
||||
|
||||
return isModelArtifact(manifest), nil
|
||||
}
|
||||
211
syft/source/ocimodelsource/resolver.go
Normal file
211
syft/source/ocimodelsource/resolver.go
Normal file
@ -0,0 +1,211 @@
|
||||
package ocimodelsource
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
stereofile "github.com/anchore/stereoscope/pkg/file"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
)
|
||||
|
||||
var _ file.Resolver = (*ociModelResolver)(nil)
|
||||
|
||||
// ociModelResolver is a minimal file.Resolver implementation that provides access to
|
||||
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||
type ociModelResolver struct {
|
||||
tempFiles map[string]string // maps virtual path -> temporary file path
|
||||
locations []file.Location
|
||||
}
|
||||
|
||||
// newOCIModelResolver creates a new resolver with the given temporary files.
|
||||
func newOCIModelResolver(tempFiles map[string]string) *ociModelResolver {
|
||||
// Create locations for all temp files
|
||||
locations := make([]file.Location, 0, len(tempFiles))
|
||||
for virtualPath, tempPath := range tempFiles {
|
||||
// Use NewVirtualLocation: realPath is tempPath, accessPath is virtualPath
|
||||
locations = append(locations, file.NewVirtualLocation(tempPath, virtualPath))
|
||||
}
|
||||
|
||||
return &ociModelResolver{
|
||||
tempFiles: tempFiles,
|
||||
locations: locations,
|
||||
}
|
||||
}
|
||||
|
||||
// FileContentsByLocation returns the contents of the file at the given location.
|
||||
func (r *ociModelResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||
// Get the real path (temp file) from the location
|
||||
realPath := location.RealPath
|
||||
|
||||
// Check if this is one of our managed files
|
||||
found := false
|
||||
for _, tempPath := range r.tempFiles {
|
||||
if tempPath == realPath {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return nil, fmt.Errorf("location not found in resolver: %s", location.RealPath)
|
||||
}
|
||||
|
||||
// Open and return the temp file
|
||||
f, err := os.Open(realPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open temp file: %w", err)
|
||||
}
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||
func (r *ociModelResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||
realPath := location.RealPath
|
||||
|
||||
// Stat the temp file
|
||||
info, err := os.Stat(realPath)
|
||||
if err != nil {
|
||||
return file.Metadata{}, fmt.Errorf("failed to stat temp file: %w", err)
|
||||
}
|
||||
|
||||
// Return basic metadata
|
||||
return file.Metadata{
|
||||
Path: location.AccessPath, // Use AccessPath for virtual path
|
||||
Type: stereofile.TypeRegular,
|
||||
FileInfo: info,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// HasPath checks if the given path exists in the resolver.
|
||||
func (r *ociModelResolver) HasPath(path string) bool {
|
||||
_, exists := r.tempFiles[path]
|
||||
return exists
|
||||
}
|
||||
|
||||
// FilesByPath returns locations for files matching the given paths.
|
||||
func (r *ociModelResolver) FilesByPath(paths ...string) ([]file.Location, error) {
|
||||
var results []file.Location
|
||||
|
||||
for _, path := range paths {
|
||||
for virtualPath, tempPath := range r.tempFiles {
|
||||
if virtualPath == path {
|
||||
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||
func (r *ociModelResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||
var results []file.Location
|
||||
|
||||
for _, pattern := range patterns {
|
||||
for virtualPath, tempPath := range r.tempFiles {
|
||||
// Match against the virtual path
|
||||
matched, err := doublestar.Match(pattern, virtualPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to match pattern %q: %w", pattern, err)
|
||||
}
|
||||
|
||||
if matched {
|
||||
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return results, nil
|
||||
}
|
||||
|
||||
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||
func (r *ociModelResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||
// This is not applicable for OCI model artifacts.
|
||||
func (r *ociModelResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||
return nil
|
||||
}
|
||||
|
||||
// AllLocations returns all file locations in the resolver.
|
||||
func (r *ociModelResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||
ch := make(chan file.Location)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
for _, loc := range r.locations {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case ch <- loc:
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
// cleanup removes all temporary files managed by this resolver.
|
||||
func (r *ociModelResolver) cleanup() error {
|
||||
var errs []error
|
||||
|
||||
for virtualPath, tempPath := range r.tempFiles {
|
||||
if err := os.Remove(tempPath); err != nil {
|
||||
errs = append(errs, fmt.Errorf("failed to remove temp file for %s: %w", virtualPath, err))
|
||||
}
|
||||
}
|
||||
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("cleanup errors: %v", errs)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractVirtualPath generates a virtual path for a GGUF layer.
|
||||
// This simulates where the file would be in the artifact.
|
||||
func extractVirtualPath(layerIndex int, annotations map[string]string) string {
|
||||
// Check if there's a filename in annotations
|
||||
if filename, ok := annotations["org.opencontainers.image.title"]; ok {
|
||||
return "/" + filename
|
||||
}
|
||||
|
||||
// Fall back to generic name based on index
|
||||
return fmt.Sprintf("/model-layer-%d.gguf", layerIndex)
|
||||
}
|
||||
|
||||
// createTempFileFromData creates a temporary file with the given data.
|
||||
func createTempFileFromData(data []byte, virtualPath string) (string, error) {
|
||||
// Extract filename from virtual path for better temp file naming
|
||||
filename := filepath.Base(virtualPath)
|
||||
ext := filepath.Ext(filename)
|
||||
prefix := strings.TrimSuffix(filename, ext) + "-"
|
||||
|
||||
// Create temp file
|
||||
tempFile, err := os.CreateTemp("", prefix+"*"+ext)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
defer tempFile.Close()
|
||||
|
||||
// Write data
|
||||
if _, err := tempFile.Write(data); err != nil {
|
||||
os.Remove(tempFile.Name())
|
||||
return "", fmt.Errorf("failed to write to temp file: %w", err)
|
||||
}
|
||||
|
||||
return tempFile.Name(), nil
|
||||
}
|
||||
@ -7,15 +7,17 @@ import (
|
||||
"github.com/anchore/syft/syft/source"
|
||||
"github.com/anchore/syft/syft/source/directorysource"
|
||||
"github.com/anchore/syft/syft/source/filesource"
|
||||
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||
"github.com/anchore/syft/syft/source/snapsource"
|
||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||
)
|
||||
|
||||
const (
|
||||
FileTag = stereoscope.FileTag
|
||||
DirTag = stereoscope.DirTag
|
||||
PullTag = stereoscope.PullTag
|
||||
SnapTag = "snap"
|
||||
FileTag = stereoscope.FileTag
|
||||
DirTag = stereoscope.DirTag
|
||||
PullTag = stereoscope.PullTag
|
||||
SnapTag = "snap"
|
||||
OCIModelTag = "oci-model"
|
||||
)
|
||||
|
||||
// All returns all the configured source providers known to syft
|
||||
@ -40,6 +42,9 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
||||
|
||||
// 3. try remote sources after everything else...
|
||||
|
||||
// --from oci-model (model artifacts with header-only fetching)
|
||||
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), OCIModelTag)).
|
||||
|
||||
// --from docker, registry, etc.
|
||||
Join(stereoscopeProviders.Select(PullTag)...).
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user