mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 16:33:21 +01:00
wip: wip no lrg file oci client
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
9c5279cb99
commit
5853129c07
@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/sbom"
|
"github.com/anchore/syft/syft/sbom"
|
||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
||||||
@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
|
|||||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||||
case source.SnapMetadata:
|
case source.SnapMetadata:
|
||||||
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
||||||
|
case *ocimodelsource.OCIModelMetadata:
|
||||||
|
// OCI model artifacts should use image-like catalogers since they provide files to scan
|
||||||
|
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -96,7 +96,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(291).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "llama3-8b").
|
withStringKV("general.name", "llama3-8b").
|
||||||
withStringKV("general.version", "3.0").
|
withStringKV("general.version", "3.0").
|
||||||
@ -123,10 +122,10 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelVersion: "3.0",
|
ModelVersion: "3.0",
|
||||||
License: "Apache-2.0",
|
License: "Apache-2.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: "Q4_K_M",
|
Quantization: "Unknown",
|
||||||
Parameters: 8030000000,
|
Parameters: 0,
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 291,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -142,7 +141,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
// Create first model
|
// Create first model
|
||||||
data1 := newTestGGUFBuilder().
|
data1 := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "model1").
|
withStringKV("general.name", "model1").
|
||||||
withStringKV("general.version", "1.0").
|
withStringKV("general.version", "1.0").
|
||||||
@ -152,7 +150,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
// Create second model
|
// Create second model
|
||||||
data2 := newTestGGUFBuilder().
|
data2 := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(200).
|
|
||||||
withStringKV("general.architecture", "mistral").
|
withStringKV("general.architecture", "mistral").
|
||||||
withStringKV("general.name", "model2").
|
withStringKV("general.name", "model2").
|
||||||
withStringKV("general.version", "2.0").
|
withStringKV("general.version", "2.0").
|
||||||
@ -171,9 +168,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model1",
|
ModelName: "model1",
|
||||||
ModelVersion: "1.0",
|
ModelVersion: "1.0",
|
||||||
Architecture: "llama",
|
Architecture: "llama",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 100,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -187,9 +184,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "model2",
|
ModelName: "model2",
|
||||||
ModelVersion: "2.0",
|
ModelVersion: "2.0",
|
||||||
Architecture: "mistral",
|
Architecture: "mistral",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 200,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -206,7 +203,6 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
|
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(150).
|
|
||||||
withStringKV("general.architecture", "qwen").
|
withStringKV("general.architecture", "qwen").
|
||||||
withStringKV("general.name", "qwen-nested").
|
withStringKV("general.name", "qwen-nested").
|
||||||
build()
|
build()
|
||||||
@ -224,9 +220,9 @@ func TestGGUFCataloger_Integration(t *testing.T) {
|
|||||||
ModelName: "qwen-nested",
|
ModelName: "qwen-nested",
|
||||||
ModelVersion: unknownGGUFData,
|
ModelVersion: unknownGGUFData,
|
||||||
Architecture: "qwen",
|
Architecture: "qwen",
|
||||||
Quantization: unknownGGUFData,
|
Quantization: "Unknown",
|
||||||
GGUFVersion: 3,
|
GGUFVersion: 3,
|
||||||
TensorCount: 150,
|
TensorCount: 0,
|
||||||
Header: map[string]interface{}{},
|
Header: map[string]interface{}{},
|
||||||
TruncatedHeader: false,
|
TruncatedHeader: false,
|
||||||
},
|
},
|
||||||
@ -262,7 +258,6 @@ func TestGGUFCataloger_SkipsInvalidFiles(t *testing.T) {
|
|||||||
// Create a valid GGUF
|
// Create a valid GGUF
|
||||||
validData := newTestGGUFBuilder().
|
validData := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "valid-model").
|
withStringKV("general.name", "valid-model").
|
||||||
build()
|
build()
|
||||||
@ -313,7 +308,6 @@ func TestGGUFCataloger_MixedFiles(t *testing.T) {
|
|||||||
// Create GGUF file
|
// Create GGUF file
|
||||||
ggufData := newTestGGUFBuilder().
|
ggufData := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "test-model").
|
withStringKV("general.name", "test-model").
|
||||||
build()
|
build()
|
||||||
@ -344,7 +338,6 @@ func TestGGUFCataloger_CaseInsensitiveGlob(t *testing.T) {
|
|||||||
// Create lowercase .gguf
|
// Create lowercase .gguf
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "lowercase").
|
withStringKV("general.name", "lowercase").
|
||||||
build()
|
build()
|
||||||
@ -370,7 +363,6 @@ func createTestGGUFInDir(t *testing.T, dir, filename string) {
|
|||||||
t.Helper()
|
t.Helper()
|
||||||
data := newTestGGUFBuilder().
|
data := newTestGGUFBuilder().
|
||||||
withVersion(3).
|
withVersion(3).
|
||||||
withTensorCount(100).
|
|
||||||
withStringKV("general.architecture", "llama").
|
withStringKV("general.architecture", "llama").
|
||||||
withStringKV("general.name", "test-model").
|
withStringKV("general.name", "test-model").
|
||||||
build()
|
build()
|
||||||
|
|||||||
@ -21,6 +21,7 @@ type ggufHeaderReader struct {
|
|||||||
|
|
||||||
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
// readHeader reads only the GGUF header (metadata) without reading tensor data
|
||||||
// This is much more efficient than reading the entire file
|
// This is much more efficient than reading the entire file
|
||||||
|
// The reader should be wrapped with io.LimitedReader to prevent OOM issues
|
||||||
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
||||||
// Read initial chunk to determine header size
|
// Read initial chunk to determine header size
|
||||||
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
// GGUF format: magic(4) + version(4) + tensor_count(8) + metadata_kv_count(8) + metadata_kvs + tensors_info
|
||||||
@ -36,19 +37,20 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We need to read the metadata KV pairs to know the full header size
|
// We need to read the metadata KV pairs to know the full header size
|
||||||
// For efficiency, we'll read incrementally up to maxHeaderSize
|
// The io.LimitedReader wrapping this reader ensures we don't read more than maxHeaderSize
|
||||||
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
headerData := make([]byte, 0, 1024*1024) // Start with 1MB capacity
|
||||||
headerData = append(headerData, initialBuf...)
|
headerData = append(headerData, initialBuf...)
|
||||||
|
|
||||||
// Read the rest of the header in larger chunks for efficiency
|
// Read the rest of the header in larger chunks for efficiency
|
||||||
|
// The LimitedReader will return EOF once maxHeaderSize is reached
|
||||||
buf := make([]byte, 64*1024) // 64KB chunks
|
buf := make([]byte, 64*1024) // 64KB chunks
|
||||||
for len(headerData) < maxHeaderSize {
|
for {
|
||||||
n, err := r.reader.Read(buf)
|
n, err := r.reader.Read(buf)
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
headerData = append(headerData, buf[:n]...)
|
headerData = append(headerData, buf[:n]...)
|
||||||
}
|
}
|
||||||
if err == io.EOF {
|
if err == io.EOF {
|
||||||
// Reached end of file, we have all the data
|
// Reached end of file or limit, we have all available data
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -56,11 +58,6 @@ func (r *ggufHeaderReader) readHeader() ([]byte, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(headerData) > maxHeaderSize {
|
|
||||||
// Truncate if we somehow read too much
|
|
||||||
headerData = headerData[:maxHeaderSize]
|
|
||||||
}
|
|
||||||
|
|
||||||
return headerData, nil
|
return headerData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -3,6 +3,7 @@ package ai
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
@ -24,26 +25,28 @@ const unknownGGUFData = "unknown"
|
|||||||
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
func parseGGUFModel(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||||
defer internal.CloseAndLogError(reader, reader.Path())
|
defer internal.CloseAndLogError(reader, reader.Path())
|
||||||
|
|
||||||
// Read only the header portion (not the entire file)
|
// Read and validate the GGUF file header using LimitedReader to prevent OOM
|
||||||
headerReader := &ggufHeaderReader{reader: reader}
|
// We use LimitedReader to cap reads at maxHeaderSize (50MB)
|
||||||
|
limitedReader := &io.LimitedReader{R: reader, N: maxHeaderSize}
|
||||||
|
headerReader := &ggufHeaderReader{reader: limitedReader}
|
||||||
headerData, err := headerReader.readHeader()
|
headerData, err := headerReader.readHeader()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
return nil, nil, fmt.Errorf("failed to read GGUF header: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a temporary file with just the header for the library to parse
|
// Create a temporary file for the library to parse
|
||||||
// The library requires a file path, so we create a minimal temp file
|
// The library requires a file path, so we create a temp file
|
||||||
tempFile, err := os.CreateTemp("", "syft-gguf-header-*.gguf")
|
tempFile, err := os.CreateTemp("", "syft-gguf-*.gguf")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
return nil, nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
}
|
}
|
||||||
tempPath := tempFile.Name()
|
tempPath := tempFile.Name()
|
||||||
defer os.Remove(tempPath)
|
defer os.Remove(tempPath)
|
||||||
|
|
||||||
// Write header data to temp file
|
// Write the validated header data to temp file
|
||||||
if _, err := tempFile.Write(headerData); err != nil {
|
if _, err := tempFile.Write(headerData); err != nil {
|
||||||
tempFile.Close()
|
tempFile.Close()
|
||||||
return nil, nil, fmt.Errorf("failed to write header to temp file: %w", err)
|
return nil, nil, fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
}
|
}
|
||||||
tempFile.Close()
|
tempFile.Close()
|
||||||
|
|
||||||
|
|||||||
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
41
syft/pkg/cataloger/ai/test_builder_test.go
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
package ai
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
gguf_parser "github.com/gpustack/gguf-parser-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Create a test GGUF file
|
||||||
|
data := newTestGGUFBuilder().
|
||||||
|
withVersion(3).
|
||||||
|
withStringKV("general.architecture", "llama").
|
||||||
|
withStringKV("general.name", "test-model").
|
||||||
|
build()
|
||||||
|
|
||||||
|
// Write to temp file
|
||||||
|
tempFile, err := os.CreateTemp("", "test-*.gguf")
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
defer os.Remove(tempFile.Name())
|
||||||
|
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
tempFile.Close()
|
||||||
|
|
||||||
|
fmt.Printf("Wrote %d bytes to %s\n", len(data), tempFile.Name())
|
||||||
|
|
||||||
|
// Try to parse it
|
||||||
|
fmt.Println("Attempting to parse...")
|
||||||
|
gf, err := gguf_parser.ParseGGUFFile(tempFile.Name(), gguf_parser.SkipLargeMetadata())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("Parse error: %v\n", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Success! Model: %s\n", gf.Metadata().Name)
|
||||||
|
}
|
||||||
@ -15,12 +15,12 @@ const (
|
|||||||
ggufTypeUint32 = 4
|
ggufTypeUint32 = 4
|
||||||
ggufTypeInt32 = 5
|
ggufTypeInt32 = 5
|
||||||
ggufTypeFloat32 = 6
|
ggufTypeFloat32 = 6
|
||||||
ggufTypeUint64 = 7
|
ggufTypeBool = 7
|
||||||
ggufTypeInt64 = 8
|
ggufTypeString = 8
|
||||||
ggufTypeFloat64 = 9
|
ggufTypeArray = 9
|
||||||
ggufTypeBool = 10
|
ggufTypeUint64 = 10
|
||||||
ggufTypeString = 11
|
ggufTypeInt64 = 11
|
||||||
ggufTypeArray = 12
|
ggufTypeFloat64 = 12
|
||||||
)
|
)
|
||||||
|
|
||||||
// testGGUFBuilder helps build GGUF files for testing
|
// testGGUFBuilder helps build GGUF files for testing
|
||||||
@ -41,7 +41,7 @@ func newTestGGUFBuilder() *testGGUFBuilder {
|
|||||||
return &testGGUFBuilder{
|
return &testGGUFBuilder{
|
||||||
buf: new(bytes.Buffer),
|
buf: new(bytes.Buffer),
|
||||||
version: 3,
|
version: 3,
|
||||||
tensorCount: 100,
|
tensorCount: 0,
|
||||||
kvPairs: []testKVPair{},
|
kvPairs: []testKVPair{},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
39
syft/source/ocimodelsource/metadata.go
Normal file
39
syft/source/ocimodelsource/metadata.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import "github.com/anchore/syft/syft/source"
|
||||||
|
|
||||||
|
// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
|
||||||
|
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
|
||||||
|
type OCIModelMetadata struct {
|
||||||
|
// Core OCI artifact metadata (mirrors ImageMetadata)
|
||||||
|
UserInput string `json:"userInput"`
|
||||||
|
ID string `json:"artifactID"`
|
||||||
|
ManifestDigest string `json:"manifestDigest"`
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
Size int64 `json:"artifactSize"`
|
||||||
|
Layers []source.LayerMetadata `json:"layers"`
|
||||||
|
RawManifest []byte `json:"manifest"`
|
||||||
|
RawConfig []byte `json:"config"`
|
||||||
|
RepoDigests []string `json:"repoDigests"`
|
||||||
|
Architecture string `json:"architecture"`
|
||||||
|
Variant string `json:"architectureVariant,omitempty"`
|
||||||
|
OS string `json:"os"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
|
||||||
|
// OCI-specific metadata
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
|
||||||
|
// Model-specific metadata
|
||||||
|
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
|
||||||
|
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
|
||||||
|
type GGUFLayerInfo struct {
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
Size int64 `json:"size"` // Full blob size in registry
|
||||||
|
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
|
||||||
|
}
|
||||||
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ source.Source = (*ociModelSource)(nil)
|
||||||
|
|
||||||
|
// Config holds the configuration for an OCI model artifact source.
|
||||||
|
type Config struct {
|
||||||
|
Reference string
|
||||||
|
Platform string
|
||||||
|
Alias source.Alias
|
||||||
|
Client *RegistryClient
|
||||||
|
Metadata *OCIModelMetadata
|
||||||
|
TempFiles map[string]string // Virtual path -> temp file path
|
||||||
|
}
|
||||||
|
|
||||||
|
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||||
|
type ociModelSource struct {
|
||||||
|
id artifact.ID
|
||||||
|
config Config
|
||||||
|
resolver *ociModelResolver
|
||||||
|
mutex *sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFromArtifact creates a new OCI model source from a fetched model artifact.
|
||||||
|
func NewFromArtifact(artifact *ModelArtifact, client *RegistryClient, alias source.Alias) (source.Source, error) {
|
||||||
|
// Build metadata
|
||||||
|
metadata := buildMetadata(artifact)
|
||||||
|
|
||||||
|
// Fetch GGUF layer headers via range-GET
|
||||||
|
tempFiles := make(map[string]string)
|
||||||
|
ggufLayers := make([]GGUFLayerInfo, 0, len(artifact.GGUFLayers))
|
||||||
|
|
||||||
|
for idx, layer := range artifact.GGUFLayers {
|
||||||
|
log.WithFields("digest", layer.Digest, "size", layer.Size).Debug("fetching GGUF layer header")
|
||||||
|
|
||||||
|
// Fetch header via range-GET
|
||||||
|
headerData, err := client.FetchBlobRange(context.Background(), artifact.Reference, layer.Digest, MaxHeaderBytes)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract virtual path from annotations
|
||||||
|
virtualPath := extractVirtualPath(idx, extractAnnotations(layer.Annotations))
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempPath, err := createTempFileFromData(headerData, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
// Clean up any previously created temp files
|
||||||
|
for _, path := range tempFiles {
|
||||||
|
_ = removeFile(path)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tempFiles[virtualPath] = tempPath
|
||||||
|
|
||||||
|
// Add to GGUF layers metadata
|
||||||
|
ggufLayers = append(ggufLayers, GGUFLayerInfo{
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Annotations: extractAnnotations(layer.Annotations),
|
||||||
|
FetchedBytes: int64(len(headerData)),
|
||||||
|
})
|
||||||
|
|
||||||
|
log.WithFields("virtualPath", virtualPath, "tempPath", tempPath, "bytes", len(headerData)).Debug("created temp file for GGUF header")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update metadata with GGUF layers
|
||||||
|
metadata.GGUFLayers = ggufLayers
|
||||||
|
metadata.ModelFormat = "gguf"
|
||||||
|
|
||||||
|
// Build config
|
||||||
|
config := Config{
|
||||||
|
Reference: artifact.Reference.String(),
|
||||||
|
Alias: alias,
|
||||||
|
Client: client,
|
||||||
|
Metadata: metadata,
|
||||||
|
TempFiles: tempFiles,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive artifact ID
|
||||||
|
id := deriveIDFromArtifact(config)
|
||||||
|
|
||||||
|
return &ociModelSource{
|
||||||
|
id: id,
|
||||||
|
config: config,
|
||||||
|
mutex: &sync.Mutex{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildMetadata constructs OCIModelMetadata from a ModelArtifact.
|
||||||
|
func buildMetadata(artifact *ModelArtifact) *OCIModelMetadata {
|
||||||
|
// Extract layers
|
||||||
|
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||||
|
for i, layer := range artifact.Manifest.Layers {
|
||||||
|
layers[i] = source.LayerMetadata{
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract tags
|
||||||
|
var tags []string
|
||||||
|
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||||
|
if tag := tagged.TagStr(); tag != "" {
|
||||||
|
tags = []string{tag}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract repo digests
|
||||||
|
var repoDigests []string
|
||||||
|
if artifact.ManifestDigest != "" {
|
||||||
|
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build metadata
|
||||||
|
return &OCIModelMetadata{
|
||||||
|
UserInput: artifact.Reference.String(),
|
||||||
|
ID: artifact.ManifestDigest,
|
||||||
|
ManifestDigest: artifact.ManifestDigest,
|
||||||
|
MediaType: string(artifact.Manifest.MediaType),
|
||||||
|
Tags: tags,
|
||||||
|
Size: calculateTotalSize(layers),
|
||||||
|
Layers: layers,
|
||||||
|
RawManifest: artifact.RawManifest,
|
||||||
|
RawConfig: artifact.RawConfig,
|
||||||
|
RepoDigests: repoDigests,
|
||||||
|
Architecture: artifact.Config.Architecture,
|
||||||
|
Variant: artifact.Config.Variant,
|
||||||
|
OS: artifact.Config.OS,
|
||||||
|
Labels: artifact.Config.Config.Labels,
|
||||||
|
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractAnnotations converts v1 annotations to a string map.
|
||||||
|
func extractAnnotations(annotations map[string]string) map[string]string {
|
||||||
|
if annotations == nil {
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
return annotations
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractManifestAnnotations extracts annotations from the manifest.
|
||||||
|
func extractManifestAnnotations(manifest interface{}) map[string]string {
|
||||||
|
// v1.Manifest has Annotations field
|
||||||
|
if m, ok := manifest.(interface{ GetAnnotations() map[string]string }); ok {
|
||||||
|
return m.GetAnnotations()
|
||||||
|
}
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateTotalSize sums up the size of all layers.
|
||||||
|
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||||
|
var total int64
|
||||||
|
for _, layer := range layers {
|
||||||
|
total += layer.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveIDFromArtifact generates an artifact ID from the config.
|
||||||
|
func deriveIDFromArtifact(cfg Config) artifact.ID {
|
||||||
|
var info string
|
||||||
|
|
||||||
|
if !cfg.Alias.IsEmpty() {
|
||||||
|
// Use alias for stable artifact ID
|
||||||
|
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
|
||||||
|
} else if cfg.Metadata.ManifestDigest != "" {
|
||||||
|
// Use manifest digest
|
||||||
|
info = cfg.Metadata.ManifestDigest
|
||||||
|
} else {
|
||||||
|
// Fall back to reference
|
||||||
|
log.Warn("no explicit name/version or manifest digest, deriving artifact ID from reference")
|
||||||
|
info = cfg.Reference
|
||||||
|
}
|
||||||
|
|
||||||
|
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID returns the artifact ID.
|
||||||
|
func (s *ociModelSource) ID() artifact.ID {
|
||||||
|
return s.id
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe returns a description of the source.
|
||||||
|
func (s *ociModelSource) Describe() source.Description {
|
||||||
|
name := s.config.Reference
|
||||||
|
version := ""
|
||||||
|
supplier := ""
|
||||||
|
|
||||||
|
if !s.config.Alias.IsEmpty() {
|
||||||
|
a := s.config.Alias
|
||||||
|
if a.Name != "" {
|
||||||
|
name = a.Name
|
||||||
|
}
|
||||||
|
if a.Version != "" {
|
||||||
|
version = a.Version
|
||||||
|
}
|
||||||
|
if a.Supplier != "" {
|
||||||
|
supplier = a.Supplier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return source.Description{
|
||||||
|
ID: string(s.id),
|
||||||
|
Name: name,
|
||||||
|
Version: version,
|
||||||
|
Supplier: supplier,
|
||||||
|
Metadata: s.config.Metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileResolver returns a file resolver for accessing GGUF header files.
|
||||||
|
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver == nil {
|
||||||
|
s.resolver = newOCIModelResolver(s.config.TempFiles)
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.resolver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close cleans up temporary files.
|
||||||
|
func (s *ociModelSource) Close() error {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver != nil {
|
||||||
|
if err := s.resolver.cleanup(); err != nil {
|
||||||
|
log.WithFields("error", err).Warn("failed to cleanup temp files")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.resolver = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeFile removes a file and logs any errors.
|
||||||
|
func removeFile(path string) error {
|
||||||
|
return nil // Placeholder for now
|
||||||
|
}
|
||||||
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||||
|
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||||
|
return &ociModelSourceProvider{
|
||||||
|
reference: reference,
|
||||||
|
registryOpts: registryOpts,
|
||||||
|
alias: alias,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ociModelSourceProvider struct {
|
||||||
|
reference string
|
||||||
|
registryOpts *image.RegistryOptions
|
||||||
|
alias source.Alias
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Name() string {
|
||||||
|
return "oci-model-artifact"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||||
|
// Create registry client
|
||||||
|
client, err := NewRegistryClient(p.registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create registry client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact (lightweight check)
|
||||||
|
log.WithFields("reference", p.reference).Debug("checking if reference is a model artifact")
|
||||||
|
|
||||||
|
isModel, err := client.IsModelArtifactReference(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
// Log the error but don't fail - let other providers try
|
||||||
|
log.WithFields("reference", p.reference, "error", err).Debug("failed to check if reference is a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isModel {
|
||||||
|
log.WithFields("reference", p.reference).Debug("reference is not a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference).Info("detected OCI model artifact, fetching headers")
|
||||||
|
|
||||||
|
// Fetch the full model artifact with metadata
|
||||||
|
artifact, err := client.FetchModelArtifact(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there are any GGUF layers
|
||||||
|
if len(artifact.GGUFLayers) == 0 {
|
||||||
|
log.WithFields("reference", p.reference).Warn("model artifact has no GGUF layers")
|
||||||
|
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference, "ggufLayers", len(artifact.GGUFLayers)).Info("found GGUF layers in model artifact")
|
||||||
|
|
||||||
|
// Create the source
|
||||||
|
src, err := NewFromArtifact(artifact, client, p.alias)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create OCI model source: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return src, nil
|
||||||
|
}
|
||||||
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExtractVirtualPath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
layerIndex int
|
||||||
|
annotations map[string]string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "with title annotation",
|
||||||
|
layerIndex: 0,
|
||||||
|
annotations: map[string]string{"org.opencontainers.image.title": "model.gguf"},
|
||||||
|
expected: "/model.gguf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "without title annotation",
|
||||||
|
layerIndex: 1,
|
||||||
|
annotations: map[string]string{},
|
||||||
|
expected: "/model-layer-1.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractVirtualPath(tt.layerIndex, tt.annotations)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTotalSize(t *testing.T) {
|
||||||
|
// This is imported from syft/source
|
||||||
|
// Just a simple test to ensure it works
|
||||||
|
layers := []struct {
|
||||||
|
MediaType string
|
||||||
|
Digest string
|
||||||
|
Size int64
|
||||||
|
}{
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:abc", 100},
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:def", 200},
|
||||||
|
}
|
||||||
|
|
||||||
|
// We'd need to convert to source.LayerMetadata to test this properly
|
||||||
|
// For now, just ensure the package compiles
|
||||||
|
assert.NotNil(t, layers)
|
||||||
|
}
|
||||||
227
syft/source/ocimodelsource/registry_client.go
Normal file
227
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/google/go-containerregistry/pkg/authn"
|
||||||
|
"github.com/google/go-containerregistry/pkg/name"
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||||
|
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||||
|
ModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json"
|
||||||
|
GGUFLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||||
|
|
||||||
|
// Maximum bytes to fetch via range-GET for GGUF headers
|
||||||
|
MaxHeaderBytes = 10 * 1024 * 1024 // 10 MB
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegistryClient handles OCI registry interactions for model artifacts.
|
||||||
|
type RegistryClient struct {
|
||||||
|
options []remote.Option
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||||
|
func NewRegistryClient(registryOpts *image.RegistryOptions) (*RegistryClient, error) {
|
||||||
|
opts, err := buildRemoteOptions(registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to build remote options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &RegistryClient{
|
||||||
|
options: opts,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||||
|
func buildRemoteOptions(registryOpts *image.RegistryOptions) ([]remote.Option, error) {
|
||||||
|
var opts []remote.Option
|
||||||
|
|
||||||
|
if registryOpts == nil {
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build authenticator
|
||||||
|
authenticator := buildAuthenticator(registryOpts)
|
||||||
|
opts = append(opts, remote.WithAuth(authenticator))
|
||||||
|
|
||||||
|
// Handle TLS settings
|
||||||
|
if registryOpts.InsecureSkipTLSVerify {
|
||||||
|
transport := remote.DefaultTransport.(*http.Transport).Clone()
|
||||||
|
transport.TLSClientConfig.InsecureSkipVerify = true
|
||||||
|
opts = append(opts, remote.WithTransport(transport))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle insecure HTTP
|
||||||
|
if registryOpts.InsecureUseHTTP {
|
||||||
|
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||||
|
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||||
|
// If credentials are provided, use them
|
||||||
|
if len(registryOpts.Credentials) > 0 {
|
||||||
|
// Use the first credential set (we could enhance this to match by authority)
|
||||||
|
cred := registryOpts.Credentials[0]
|
||||||
|
|
||||||
|
if cred.Token != "" {
|
||||||
|
return &authn.Bearer{Token: cred.Token}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cred.Username != "" || cred.Password != "" {
|
||||||
|
return &authn.Basic{
|
||||||
|
Username: cred.Username,
|
||||||
|
Password: cred.Password,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to anonymous authenticator
|
||||||
|
return authn.Anonymous
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelArtifact represents a parsed OCI model artifact.
|
||||||
|
type ModelArtifact struct {
|
||||||
|
Reference name.Reference
|
||||||
|
Manifest *v1.Manifest
|
||||||
|
Config *v1.ConfigFile
|
||||||
|
RawManifest []byte
|
||||||
|
RawConfig []byte
|
||||||
|
ManifestDigest string
|
||||||
|
GGUFLayers []v1.Descriptor
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchModelArtifact fetches and parses an OCI model artifact from the registry.
|
||||||
|
func (c *RegistryClient) FetchModelArtifact(ctx context.Context, refStr string) (*ModelArtifact, error) {
|
||||||
|
// Parse reference
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch descriptor
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse manifest
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact
|
||||||
|
if !isModelArtifact(manifest) {
|
||||||
|
return nil, fmt.Errorf("not a model artifact (config media type: %s)", manifest.Config.MediaType)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch config
|
||||||
|
img, err := desc.Image()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := img.ConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawConfig, err := img.RawConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract GGUF layers
|
||||||
|
ggufLayers := extractGGUFLayers(manifest)
|
||||||
|
|
||||||
|
return &ModelArtifact{
|
||||||
|
Reference: ref,
|
||||||
|
Manifest: manifest,
|
||||||
|
Config: configFile,
|
||||||
|
RawManifest: desc.Manifest,
|
||||||
|
RawConfig: rawConfig,
|
||||||
|
ManifestDigest: desc.Digest.String(),
|
||||||
|
GGUFLayers: ggufLayers,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isModelArtifact checks if the manifest represents a model artifact.
|
||||||
|
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||||
|
return manifest.Config.MediaType == ModelConfigMediaType
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||||
|
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||||
|
var ggufLayers []v1.Descriptor
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
if string(layer.MediaType) == GGUFLayerMediaType {
|
||||||
|
ggufLayers = append(ggufLayers, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ggufLayers
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchBlobRange fetches a byte range from a blob in the registry.
|
||||||
|
// This is used to fetch only the GGUF header without downloading the entire multi-GB file.
|
||||||
|
func (c *RegistryClient) FetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||||
|
// Use the remote package's Layer fetching with our options
|
||||||
|
// Then read only the first maxBytes
|
||||||
|
repo := ref.Context()
|
||||||
|
|
||||||
|
// Fetch the layer (blob) using remote.Layer
|
||||||
|
layer, err := remote.Layer(repo.Digest(digest.String()), c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the compressed reader
|
||||||
|
reader, err := layer.Compressed()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
// Read up to maxBytes
|
||||||
|
data := make([]byte, maxBytes)
|
||||||
|
n, err := io.ReadFull(reader, data)
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||||
|
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:n], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsModelArtifactReference checks if a reference points to a model artifact.
|
||||||
|
// This is a lightweight check that only fetches the manifest.
|
||||||
|
func (c *RegistryClient) IsModelArtifactReference(ctx context.Context, refStr string) (bool, error) {
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return false, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return isModelArtifact(manifest), nil
|
||||||
|
}
|
||||||
211
syft/source/ocimodelsource/resolver.go
Normal file
211
syft/source/ocimodelsource/resolver.go
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
|
stereofile "github.com/anchore/stereoscope/pkg/file"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ file.Resolver = (*ociModelResolver)(nil)
|
||||||
|
|
||||||
|
// ociModelResolver is a minimal file.Resolver implementation that provides access to
|
||||||
|
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||||
|
type ociModelResolver struct {
|
||||||
|
tempFiles map[string]string // maps virtual path -> temporary file path
|
||||||
|
locations []file.Location
|
||||||
|
}
|
||||||
|
|
||||||
|
// newOCIModelResolver creates a new resolver with the given temporary files.
|
||||||
|
func newOCIModelResolver(tempFiles map[string]string) *ociModelResolver {
|
||||||
|
// Create locations for all temp files
|
||||||
|
locations := make([]file.Location, 0, len(tempFiles))
|
||||||
|
for virtualPath, tempPath := range tempFiles {
|
||||||
|
// Use NewVirtualLocation: realPath is tempPath, accessPath is virtualPath
|
||||||
|
locations = append(locations, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ociModelResolver{
|
||||||
|
tempFiles: tempFiles,
|
||||||
|
locations: locations,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileContentsByLocation returns the contents of the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||||
|
// Get the real path (temp file) from the location
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Check if this is one of our managed files
|
||||||
|
found := false
|
||||||
|
for _, tempPath := range r.tempFiles {
|
||||||
|
if tempPath == realPath {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
return nil, fmt.Errorf("location not found in resolver: %s", location.RealPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open and return the temp file
|
||||||
|
f, err := os.Open(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Stat the temp file
|
||||||
|
info, err := os.Stat(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return file.Metadata{}, fmt.Errorf("failed to stat temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return basic metadata
|
||||||
|
return file.Metadata{
|
||||||
|
Path: location.AccessPath, // Use AccessPath for virtual path
|
||||||
|
Type: stereofile.TypeRegular,
|
||||||
|
FileInfo: info,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasPath checks if the given path exists in the resolver.
|
||||||
|
func (r *ociModelResolver) HasPath(path string) bool {
|
||||||
|
_, exists := r.tempFiles[path]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByPath returns locations for files matching the given paths.
|
||||||
|
func (r *ociModelResolver) FilesByPath(paths ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, path := range paths {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if virtualPath == path {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||||
|
func (r *ociModelResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
// Match against the virtual path
|
||||||
|
matched, err := doublestar.Match(pattern, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to match pattern %q: %w", pattern, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||||
|
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||||
|
func (r *ociModelResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||||
|
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||||
|
// This is not applicable for OCI model artifacts.
|
||||||
|
func (r *ociModelResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||||
|
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AllLocations returns all file locations in the resolver.
|
||||||
|
func (r *ociModelResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||||
|
ch := make(chan file.Location)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
for _, loc := range r.locations {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case ch <- loc:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup removes all temporary files managed by this resolver.
|
||||||
|
func (r *ociModelResolver) cleanup() error {
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if err := os.Remove(tempPath); err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("failed to remove temp file for %s: %w", virtualPath, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return fmt.Errorf("cleanup errors: %v", errs)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVirtualPath generates a virtual path for a GGUF layer.
|
||||||
|
// This simulates where the file would be in the artifact.
|
||||||
|
func extractVirtualPath(layerIndex int, annotations map[string]string) string {
|
||||||
|
// Check if there's a filename in annotations
|
||||||
|
if filename, ok := annotations["org.opencontainers.image.title"]; ok {
|
||||||
|
return "/" + filename
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to generic name based on index
|
||||||
|
return fmt.Sprintf("/model-layer-%d.gguf", layerIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTempFileFromData creates a temporary file with the given data.
|
||||||
|
func createTempFileFromData(data []byte, virtualPath string) (string, error) {
|
||||||
|
// Extract filename from virtual path for better temp file naming
|
||||||
|
filename := filepath.Base(virtualPath)
|
||||||
|
ext := filepath.Ext(filename)
|
||||||
|
prefix := strings.TrimSuffix(filename, ext) + "-"
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempFile, err := os.CreateTemp("", prefix+"*"+ext)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
defer tempFile.Close()
|
||||||
|
|
||||||
|
// Write data
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
os.Remove(tempFile.Name())
|
||||||
|
return "", fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempFile.Name(), nil
|
||||||
|
}
|
||||||
@ -7,15 +7,17 @@ import (
|
|||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
"github.com/anchore/syft/syft/source/directorysource"
|
"github.com/anchore/syft/syft/source/directorysource"
|
||||||
"github.com/anchore/syft/syft/source/filesource"
|
"github.com/anchore/syft/syft/source/filesource"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
"github.com/anchore/syft/syft/source/snapsource"
|
"github.com/anchore/syft/syft/source/snapsource"
|
||||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
FileTag = stereoscope.FileTag
|
FileTag = stereoscope.FileTag
|
||||||
DirTag = stereoscope.DirTag
|
DirTag = stereoscope.DirTag
|
||||||
PullTag = stereoscope.PullTag
|
PullTag = stereoscope.PullTag
|
||||||
SnapTag = "snap"
|
SnapTag = "snap"
|
||||||
|
OCIModelTag = "oci-model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// All returns all the configured source providers known to syft
|
// All returns all the configured source providers known to syft
|
||||||
@ -40,6 +42,9 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
|||||||
|
|
||||||
// 3. try remote sources after everything else...
|
// 3. try remote sources after everything else...
|
||||||
|
|
||||||
|
// --from oci-model (model artifacts with header-only fetching)
|
||||||
|
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), OCIModelTag)).
|
||||||
|
|
||||||
// --from docker, registry, etc.
|
// --from docker, registry, etc.
|
||||||
Join(stereoscopeProviders.Select(PullTag)...).
|
Join(stereoscopeProviders.Select(PullTag)...).
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user