mirror of
https://github.com/anchore/syft.git
synced 2025-11-17 08:23:15 +01:00
wip: wip no lrg file oci client
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
7ed34c81f8
commit
efcfecb2db
@ -15,6 +15,7 @@ import (
|
|||||||
"github.com/anchore/syft/syft/file"
|
"github.com/anchore/syft/syft/file"
|
||||||
"github.com/anchore/syft/syft/sbom"
|
"github.com/anchore/syft/syft/sbom"
|
||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
)
|
)
|
||||||
|
|
||||||
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
// CreateSBOMConfig specifies all parameters needed for creating an SBOM.
|
||||||
@ -483,6 +484,9 @@ func findDefaultTags(src source.Description) ([]string, error) {
|
|||||||
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.DirectoryTag, filecataloging.FileTag}, nil
|
||||||
case source.SnapMetadata:
|
case source.SnapMetadata:
|
||||||
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
return []string{pkgcataloging.InstalledTag, filecataloging.FileTag}, nil
|
||||||
|
case *ocimodelsource.OCIModelMetadata:
|
||||||
|
// OCI model artifacts should use image-like catalogers since they provide files to scan
|
||||||
|
return []string{pkgcataloging.ImageTag, filecataloging.FileTag}, nil
|
||||||
default:
|
default:
|
||||||
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
return nil, fmt.Errorf("unable to determine default cataloger tag for source type=%T", m)
|
||||||
}
|
}
|
||||||
|
|||||||
39
syft/source/ocimodelsource/metadata.go
Normal file
39
syft/source/ocimodelsource/metadata.go
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import "github.com/anchore/syft/syft/source"
|
||||||
|
|
||||||
|
// OCIModelMetadata represents all static metadata that defines what an OCI model artifact is.
|
||||||
|
// This is similar to ImageMetadata but includes model-specific fields and OCI artifact annotations.
|
||||||
|
type OCIModelMetadata struct {
|
||||||
|
// Core OCI artifact metadata (mirrors ImageMetadata)
|
||||||
|
UserInput string `json:"userInput"`
|
||||||
|
ID string `json:"artifactID"`
|
||||||
|
ManifestDigest string `json:"manifestDigest"`
|
||||||
|
MediaType string `json:"mediaType"`
|
||||||
|
Tags []string `json:"tags"`
|
||||||
|
Size int64 `json:"artifactSize"`
|
||||||
|
Layers []source.LayerMetadata `json:"layers"`
|
||||||
|
RawManifest []byte `json:"manifest"`
|
||||||
|
RawConfig []byte `json:"config"`
|
||||||
|
RepoDigests []string `json:"repoDigests"`
|
||||||
|
Architecture string `json:"architecture"`
|
||||||
|
Variant string `json:"architectureVariant,omitempty"`
|
||||||
|
OS string `json:"os"`
|
||||||
|
Labels map[string]string `json:"labels,omitempty"`
|
||||||
|
|
||||||
|
// OCI-specific metadata
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
|
||||||
|
// Model-specific metadata
|
||||||
|
ModelFormat string `json:"modelFormat,omitempty"` // e.g., "gguf"
|
||||||
|
GGUFLayers []GGUFLayerInfo `json:"ggufLayers,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// GGUFLayerInfo represents metadata about a GGUF layer in the OCI artifact.
|
||||||
|
type GGUFLayerInfo struct {
|
||||||
|
Digest string `json:"digest"`
|
||||||
|
Size int64 `json:"size"` // Full blob size in registry
|
||||||
|
MediaType string `json:"mediaType"` // Should be "application/vnd.docker.ai.gguf.v3"
|
||||||
|
Annotations map[string]string `json:"annotations,omitempty"`
|
||||||
|
FetchedBytes int64 `json:"fetchedBytes"` // How many bytes we actually fetched via range-GET
|
||||||
|
}
|
||||||
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
260
syft/source/ocimodelsource/oci_model_source.go
Normal file
@ -0,0 +1,260 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
|
||||||
|
"github.com/opencontainers/go-digest"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/artifact"
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
"github.com/anchore/syft/syft/source/internal"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ source.Source = (*ociModelSource)(nil)
|
||||||
|
|
||||||
|
// Config holds the configuration for an OCI model artifact source.
|
||||||
|
type Config struct {
|
||||||
|
Reference string
|
||||||
|
Platform string
|
||||||
|
Alias source.Alias
|
||||||
|
Client *RegistryClient
|
||||||
|
Metadata *OCIModelMetadata
|
||||||
|
TempFiles map[string]string // Virtual path -> temp file path
|
||||||
|
}
|
||||||
|
|
||||||
|
// ociModelSource implements the source.Source interface for OCI model artifacts.
|
||||||
|
type ociModelSource struct {
|
||||||
|
id artifact.ID
|
||||||
|
config Config
|
||||||
|
resolver *ociModelResolver
|
||||||
|
mutex *sync.Mutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFromArtifact creates a new OCI model source from a fetched model artifact.
|
||||||
|
func NewFromArtifact(artifact *ModelArtifact, client *RegistryClient, alias source.Alias) (source.Source, error) {
|
||||||
|
// Build metadata
|
||||||
|
metadata := buildMetadata(artifact)
|
||||||
|
|
||||||
|
// Fetch GGUF layer headers via range-GET
|
||||||
|
tempFiles := make(map[string]string)
|
||||||
|
ggufLayers := make([]GGUFLayerInfo, 0, len(artifact.GGUFLayers))
|
||||||
|
|
||||||
|
for idx, layer := range artifact.GGUFLayers {
|
||||||
|
log.WithFields("digest", layer.Digest, "size", layer.Size).Debug("fetching GGUF layer header")
|
||||||
|
|
||||||
|
// Fetch header via range-GET
|
||||||
|
headerData, err := client.FetchBlobRange(context.Background(), artifact.Reference, layer.Digest, MaxHeaderBytes)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch GGUF layer header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract virtual path from annotations
|
||||||
|
virtualPath := extractVirtualPath(idx, extractAnnotations(layer.Annotations))
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempPath, err := createTempFileFromData(headerData, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
// Clean up any previously created temp files
|
||||||
|
for _, path := range tempFiles {
|
||||||
|
_ = removeFile(path)
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
tempFiles[virtualPath] = tempPath
|
||||||
|
|
||||||
|
// Add to GGUF layers metadata
|
||||||
|
ggufLayers = append(ggufLayers, GGUFLayerInfo{
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Annotations: extractAnnotations(layer.Annotations),
|
||||||
|
FetchedBytes: int64(len(headerData)),
|
||||||
|
})
|
||||||
|
|
||||||
|
log.WithFields("virtualPath", virtualPath, "tempPath", tempPath, "bytes", len(headerData)).Debug("created temp file for GGUF header")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update metadata with GGUF layers
|
||||||
|
metadata.GGUFLayers = ggufLayers
|
||||||
|
metadata.ModelFormat = "gguf"
|
||||||
|
|
||||||
|
// Build config
|
||||||
|
config := Config{
|
||||||
|
Reference: artifact.Reference.String(),
|
||||||
|
Alias: alias,
|
||||||
|
Client: client,
|
||||||
|
Metadata: metadata,
|
||||||
|
TempFiles: tempFiles,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Derive artifact ID
|
||||||
|
id := deriveIDFromArtifact(config)
|
||||||
|
|
||||||
|
return &ociModelSource{
|
||||||
|
id: id,
|
||||||
|
config: config,
|
||||||
|
mutex: &sync.Mutex{},
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildMetadata constructs OCIModelMetadata from a ModelArtifact.
|
||||||
|
func buildMetadata(artifact *ModelArtifact) *OCIModelMetadata {
|
||||||
|
// Extract layers
|
||||||
|
layers := make([]source.LayerMetadata, len(artifact.Manifest.Layers))
|
||||||
|
for i, layer := range artifact.Manifest.Layers {
|
||||||
|
layers[i] = source.LayerMetadata{
|
||||||
|
MediaType: string(layer.MediaType),
|
||||||
|
Digest: layer.Digest.String(),
|
||||||
|
Size: layer.Size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract tags
|
||||||
|
var tags []string
|
||||||
|
if tagged, ok := artifact.Reference.(interface{ TagStr() string }); ok {
|
||||||
|
if tag := tagged.TagStr(); tag != "" {
|
||||||
|
tags = []string{tag}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract repo digests
|
||||||
|
var repoDigests []string
|
||||||
|
if artifact.ManifestDigest != "" {
|
||||||
|
repoDigests = []string{artifact.Reference.Context().String() + "@" + artifact.ManifestDigest}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build metadata
|
||||||
|
return &OCIModelMetadata{
|
||||||
|
UserInput: artifact.Reference.String(),
|
||||||
|
ID: artifact.ManifestDigest,
|
||||||
|
ManifestDigest: artifact.ManifestDigest,
|
||||||
|
MediaType: string(artifact.Manifest.MediaType),
|
||||||
|
Tags: tags,
|
||||||
|
Size: calculateTotalSize(layers),
|
||||||
|
Layers: layers,
|
||||||
|
RawManifest: artifact.RawManifest,
|
||||||
|
RawConfig: artifact.RawConfig,
|
||||||
|
RepoDigests: repoDigests,
|
||||||
|
Architecture: artifact.Config.Architecture,
|
||||||
|
Variant: artifact.Config.Variant,
|
||||||
|
OS: artifact.Config.OS,
|
||||||
|
Labels: artifact.Config.Config.Labels,
|
||||||
|
Annotations: extractManifestAnnotations(artifact.Manifest),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractAnnotations converts v1 annotations to a string map.
|
||||||
|
func extractAnnotations(annotations map[string]string) map[string]string {
|
||||||
|
if annotations == nil {
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
return annotations
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractManifestAnnotations extracts annotations from the manifest.
|
||||||
|
func extractManifestAnnotations(manifest interface{}) map[string]string {
|
||||||
|
// v1.Manifest has Annotations field
|
||||||
|
if m, ok := manifest.(interface{ GetAnnotations() map[string]string }); ok {
|
||||||
|
return m.GetAnnotations()
|
||||||
|
}
|
||||||
|
return make(map[string]string)
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateTotalSize sums up the size of all layers.
|
||||||
|
func calculateTotalSize(layers []source.LayerMetadata) int64 {
|
||||||
|
var total int64
|
||||||
|
for _, layer := range layers {
|
||||||
|
total += layer.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveIDFromArtifact generates an artifact ID from the config.
|
||||||
|
func deriveIDFromArtifact(cfg Config) artifact.ID {
|
||||||
|
var info string
|
||||||
|
|
||||||
|
if !cfg.Alias.IsEmpty() {
|
||||||
|
// Use alias for stable artifact ID
|
||||||
|
info = fmt.Sprintf("%s@%s", cfg.Alias.Name, cfg.Alias.Version)
|
||||||
|
} else if cfg.Metadata.ManifestDigest != "" {
|
||||||
|
// Use manifest digest
|
||||||
|
info = cfg.Metadata.ManifestDigest
|
||||||
|
} else {
|
||||||
|
// Fall back to reference
|
||||||
|
log.Warn("no explicit name/version or manifest digest, deriving artifact ID from reference")
|
||||||
|
info = cfg.Reference
|
||||||
|
}
|
||||||
|
|
||||||
|
return internal.ArtifactIDFromDigest(digest.SHA256.FromString(info).String())
|
||||||
|
}
|
||||||
|
|
||||||
|
// ID returns the artifact ID.
|
||||||
|
func (s *ociModelSource) ID() artifact.ID {
|
||||||
|
return s.id
|
||||||
|
}
|
||||||
|
|
||||||
|
// Describe returns a description of the source.
|
||||||
|
func (s *ociModelSource) Describe() source.Description {
|
||||||
|
name := s.config.Reference
|
||||||
|
version := ""
|
||||||
|
supplier := ""
|
||||||
|
|
||||||
|
if !s.config.Alias.IsEmpty() {
|
||||||
|
a := s.config.Alias
|
||||||
|
if a.Name != "" {
|
||||||
|
name = a.Name
|
||||||
|
}
|
||||||
|
if a.Version != "" {
|
||||||
|
version = a.Version
|
||||||
|
}
|
||||||
|
if a.Supplier != "" {
|
||||||
|
supplier = a.Supplier
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return source.Description{
|
||||||
|
ID: string(s.id),
|
||||||
|
Name: name,
|
||||||
|
Version: version,
|
||||||
|
Supplier: supplier,
|
||||||
|
Metadata: s.config.Metadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileResolver returns a file resolver for accessing GGUF header files.
|
||||||
|
func (s *ociModelSource) FileResolver(_ source.Scope) (file.Resolver, error) {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver == nil {
|
||||||
|
s.resolver = newOCIModelResolver(s.config.TempFiles)
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.resolver, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close cleans up temporary files.
|
||||||
|
func (s *ociModelSource) Close() error {
|
||||||
|
s.mutex.Lock()
|
||||||
|
defer s.mutex.Unlock()
|
||||||
|
|
||||||
|
if s.resolver != nil {
|
||||||
|
if err := s.resolver.cleanup(); err != nil {
|
||||||
|
log.WithFields("error", err).Warn("failed to cleanup temp files")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
s.resolver = nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// removeFile removes a file and logs any errors.
|
||||||
|
func removeFile(path string) error {
|
||||||
|
return nil // Placeholder for now
|
||||||
|
}
|
||||||
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
76
syft/source/ocimodelsource/oci_model_source_provider.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
"github.com/anchore/syft/internal/log"
|
||||||
|
"github.com/anchore/syft/syft/source"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NewSourceProvider creates a new OCI model artifact source provider.
|
||||||
|
func NewSourceProvider(reference string, registryOpts *image.RegistryOptions, alias source.Alias) source.Provider {
|
||||||
|
return &ociModelSourceProvider{
|
||||||
|
reference: reference,
|
||||||
|
registryOpts: registryOpts,
|
||||||
|
alias: alias,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
type ociModelSourceProvider struct {
|
||||||
|
reference string
|
||||||
|
registryOpts *image.RegistryOptions
|
||||||
|
alias source.Alias
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Name() string {
|
||||||
|
return "oci-model-artifact"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (p *ociModelSourceProvider) Provide(ctx context.Context) (source.Source, error) {
|
||||||
|
// Create registry client
|
||||||
|
client, err := NewRegistryClient(p.registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create registry client: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact (lightweight check)
|
||||||
|
log.WithFields("reference", p.reference).Debug("checking if reference is a model artifact")
|
||||||
|
|
||||||
|
isModel, err := client.IsModelArtifactReference(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
// Log the error but don't fail - let other providers try
|
||||||
|
log.WithFields("reference", p.reference, "error", err).Debug("failed to check if reference is a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !isModel {
|
||||||
|
log.WithFields("reference", p.reference).Debug("reference is not a model artifact")
|
||||||
|
return nil, fmt.Errorf("not an OCI model artifact")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference).Info("detected OCI model artifact, fetching headers")
|
||||||
|
|
||||||
|
// Fetch the full model artifact with metadata
|
||||||
|
artifact, err := client.FetchModelArtifact(ctx, p.reference)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch model artifact: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if there are any GGUF layers
|
||||||
|
if len(artifact.GGUFLayers) == 0 {
|
||||||
|
log.WithFields("reference", p.reference).Warn("model artifact has no GGUF layers")
|
||||||
|
return nil, fmt.Errorf("model artifact has no GGUF layers")
|
||||||
|
}
|
||||||
|
|
||||||
|
log.WithFields("reference", p.reference, "ggufLayers", len(artifact.GGUFLayers)).Info("found GGUF layers in model artifact")
|
||||||
|
|
||||||
|
// Create the source
|
||||||
|
src, err := NewFromArtifact(artifact, client, p.alias)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to create OCI model source: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return src, nil
|
||||||
|
}
|
||||||
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
53
syft/source/ocimodelsource/oci_model_source_test.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestExtractVirtualPath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
layerIndex int
|
||||||
|
annotations map[string]string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "with title annotation",
|
||||||
|
layerIndex: 0,
|
||||||
|
annotations: map[string]string{"org.opencontainers.image.title": "model.gguf"},
|
||||||
|
expected: "/model.gguf",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "without title annotation",
|
||||||
|
layerIndex: 1,
|
||||||
|
annotations: map[string]string{},
|
||||||
|
expected: "/model-layer-1.gguf",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
result := extractVirtualPath(tt.layerIndex, tt.annotations)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTotalSize(t *testing.T) {
|
||||||
|
// This is imported from syft/source
|
||||||
|
// Just a simple test to ensure it works
|
||||||
|
layers := []struct {
|
||||||
|
MediaType string
|
||||||
|
Digest string
|
||||||
|
Size int64
|
||||||
|
}{
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:abc", 100},
|
||||||
|
{"application/vnd.docker.image.rootfs.diff.tar.gzip", "sha256:def", 200},
|
||||||
|
}
|
||||||
|
|
||||||
|
// We'd need to convert to source.LayerMetadata to test this properly
|
||||||
|
// For now, just ensure the package compiles
|
||||||
|
assert.NotNil(t, layers)
|
||||||
|
}
|
||||||
227
syft/source/ocimodelsource/registry_client.go
Normal file
227
syft/source/ocimodelsource/registry_client.go
Normal file
@ -0,0 +1,227 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/google/go-containerregistry/pkg/authn"
|
||||||
|
"github.com/google/go-containerregistry/pkg/name"
|
||||||
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||||
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||||
|
|
||||||
|
"github.com/anchore/stereoscope/pkg/image"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
// Model artifact media types as per Docker's OCI artifacts for AI model packaging
|
||||||
|
// Reference: https://www.docker.com/blog/oci-artifacts-for-ai-model-packaging/
|
||||||
|
ModelConfigMediaType = "application/vnd.docker.ai.model.config.v0.1+json"
|
||||||
|
GGUFLayerMediaType = "application/vnd.docker.ai.gguf.v3"
|
||||||
|
|
||||||
|
// Maximum bytes to fetch via range-GET for GGUF headers
|
||||||
|
MaxHeaderBytes = 10 * 1024 * 1024 // 10 MB
|
||||||
|
)
|
||||||
|
|
||||||
|
// RegistryClient handles OCI registry interactions for model artifacts.
|
||||||
|
type RegistryClient struct {
|
||||||
|
options []remote.Option
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistryClient creates a new registry client with authentication from RegistryOptions.
|
||||||
|
func NewRegistryClient(registryOpts *image.RegistryOptions) (*RegistryClient, error) {
|
||||||
|
opts, err := buildRemoteOptions(registryOpts)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to build remote options: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &RegistryClient{
|
||||||
|
options: opts,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildRemoteOptions converts stereoscope RegistryOptions to go-containerregistry remote.Options.
|
||||||
|
func buildRemoteOptions(registryOpts *image.RegistryOptions) ([]remote.Option, error) {
|
||||||
|
var opts []remote.Option
|
||||||
|
|
||||||
|
if registryOpts == nil {
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build authenticator
|
||||||
|
authenticator := buildAuthenticator(registryOpts)
|
||||||
|
opts = append(opts, remote.WithAuth(authenticator))
|
||||||
|
|
||||||
|
// Handle TLS settings
|
||||||
|
if registryOpts.InsecureSkipTLSVerify {
|
||||||
|
transport := remote.DefaultTransport.(*http.Transport).Clone()
|
||||||
|
transport.TLSClientConfig.InsecureSkipVerify = true
|
||||||
|
opts = append(opts, remote.WithTransport(transport))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle insecure HTTP
|
||||||
|
if registryOpts.InsecureUseHTTP {
|
||||||
|
opts = append(opts, remote.WithTransport(http.DefaultTransport))
|
||||||
|
}
|
||||||
|
|
||||||
|
return opts, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildAuthenticator creates an authn.Authenticator from RegistryOptions.
|
||||||
|
func buildAuthenticator(registryOpts *image.RegistryOptions) authn.Authenticator {
|
||||||
|
// If credentials are provided, use them
|
||||||
|
if len(registryOpts.Credentials) > 0 {
|
||||||
|
// Use the first credential set (we could enhance this to match by authority)
|
||||||
|
cred := registryOpts.Credentials[0]
|
||||||
|
|
||||||
|
if cred.Token != "" {
|
||||||
|
return &authn.Bearer{Token: cred.Token}
|
||||||
|
}
|
||||||
|
|
||||||
|
if cred.Username != "" || cred.Password != "" {
|
||||||
|
return &authn.Basic{
|
||||||
|
Username: cred.Username,
|
||||||
|
Password: cred.Password,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to anonymous authenticator
|
||||||
|
return authn.Anonymous
|
||||||
|
}
|
||||||
|
|
||||||
|
// ModelArtifact represents a parsed OCI model artifact.
|
||||||
|
type ModelArtifact struct {
|
||||||
|
Reference name.Reference
|
||||||
|
Manifest *v1.Manifest
|
||||||
|
Config *v1.ConfigFile
|
||||||
|
RawManifest []byte
|
||||||
|
RawConfig []byte
|
||||||
|
ManifestDigest string
|
||||||
|
GGUFLayers []v1.Descriptor
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchModelArtifact fetches and parses an OCI model artifact from the registry.
|
||||||
|
func (c *RegistryClient) FetchModelArtifact(ctx context.Context, refStr string) (*ModelArtifact, error) {
|
||||||
|
// Parse reference
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch descriptor
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parse manifest
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if this is a model artifact
|
||||||
|
if !isModelArtifact(manifest) {
|
||||||
|
return nil, fmt.Errorf("not a model artifact (config media type: %s)", manifest.Config.MediaType)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch config
|
||||||
|
img, err := desc.Image()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get image: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := img.ConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
rawConfig, err := img.RawConfigFile()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get raw config: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract GGUF layers
|
||||||
|
ggufLayers := extractGGUFLayers(manifest)
|
||||||
|
|
||||||
|
return &ModelArtifact{
|
||||||
|
Reference: ref,
|
||||||
|
Manifest: manifest,
|
||||||
|
Config: configFile,
|
||||||
|
RawManifest: desc.Manifest,
|
||||||
|
RawConfig: rawConfig,
|
||||||
|
ManifestDigest: desc.Digest.String(),
|
||||||
|
GGUFLayers: ggufLayers,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isModelArtifact checks if the manifest represents a model artifact.
|
||||||
|
func isModelArtifact(manifest *v1.Manifest) bool {
|
||||||
|
return manifest.Config.MediaType == ModelConfigMediaType
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractGGUFLayers extracts GGUF layer descriptors from the manifest.
|
||||||
|
func extractGGUFLayers(manifest *v1.Manifest) []v1.Descriptor {
|
||||||
|
var ggufLayers []v1.Descriptor
|
||||||
|
for _, layer := range manifest.Layers {
|
||||||
|
if string(layer.MediaType) == GGUFLayerMediaType {
|
||||||
|
ggufLayers = append(ggufLayers, layer)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ggufLayers
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchBlobRange fetches a byte range from a blob in the registry.
|
||||||
|
// This is used to fetch only the GGUF header without downloading the entire multi-GB file.
|
||||||
|
func (c *RegistryClient) FetchBlobRange(ctx context.Context, ref name.Reference, digest v1.Hash, maxBytes int64) ([]byte, error) {
|
||||||
|
// Use the remote package's Layer fetching with our options
|
||||||
|
// Then read only the first maxBytes
|
||||||
|
repo := ref.Context()
|
||||||
|
|
||||||
|
// Fetch the layer (blob) using remote.Layer
|
||||||
|
layer, err := remote.Layer(repo.Digest(digest.String()), c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to fetch layer: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the compressed reader
|
||||||
|
reader, err := layer.Compressed()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get layer reader: %w", err)
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
|
||||||
|
// Read up to maxBytes
|
||||||
|
data := make([]byte, maxBytes)
|
||||||
|
n, err := io.ReadFull(reader, data)
|
||||||
|
if err != nil && err != io.ErrUnexpectedEOF {
|
||||||
|
// ErrUnexpectedEOF is okay - it means the file is smaller than maxBytes
|
||||||
|
return nil, fmt.Errorf("failed to read layer data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return data[:n], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsModelArtifactReference checks if a reference points to a model artifact.
|
||||||
|
// This is a lightweight check that only fetches the manifest.
|
||||||
|
func (c *RegistryClient) IsModelArtifactReference(ctx context.Context, refStr string) (bool, error) {
|
||||||
|
ref, err := name.ParseReference(refStr)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to parse reference %q: %w", refStr, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
desc, err := remote.Get(ref, c.options...)
|
||||||
|
if err != nil {
|
||||||
|
return false, fmt.Errorf("failed to fetch descriptor: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
manifest := &v1.Manifest{}
|
||||||
|
if err := json.Unmarshal(desc.Manifest, manifest); err != nil {
|
||||||
|
return false, fmt.Errorf("failed to unmarshal manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return isModelArtifact(manifest), nil
|
||||||
|
}
|
||||||
211
syft/source/ocimodelsource/resolver.go
Normal file
211
syft/source/ocimodelsource/resolver.go
Normal file
@ -0,0 +1,211 @@
|
|||||||
|
package ocimodelsource
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/bmatcuk/doublestar/v4"
|
||||||
|
stereofile "github.com/anchore/stereoscope/pkg/file"
|
||||||
|
|
||||||
|
"github.com/anchore/syft/syft/file"
|
||||||
|
)
|
||||||
|
|
||||||
|
var _ file.Resolver = (*ociModelResolver)(nil)
|
||||||
|
|
||||||
|
// ociModelResolver is a minimal file.Resolver implementation that provides access to
|
||||||
|
// GGUF header data fetched from OCI model artifacts via range-GET requests.
|
||||||
|
type ociModelResolver struct {
|
||||||
|
tempFiles map[string]string // maps virtual path -> temporary file path
|
||||||
|
locations []file.Location
|
||||||
|
}
|
||||||
|
|
||||||
|
// newOCIModelResolver creates a new resolver with the given temporary files.
|
||||||
|
func newOCIModelResolver(tempFiles map[string]string) *ociModelResolver {
|
||||||
|
// Create locations for all temp files
|
||||||
|
locations := make([]file.Location, 0, len(tempFiles))
|
||||||
|
for virtualPath, tempPath := range tempFiles {
|
||||||
|
// Use NewVirtualLocation: realPath is tempPath, accessPath is virtualPath
|
||||||
|
locations = append(locations, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
|
||||||
|
return &ociModelResolver{
|
||||||
|
tempFiles: tempFiles,
|
||||||
|
locations: locations,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileContentsByLocation returns the contents of the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileContentsByLocation(location file.Location) (io.ReadCloser, error) {
|
||||||
|
// Get the real path (temp file) from the location
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Check if this is one of our managed files
|
||||||
|
found := false
|
||||||
|
for _, tempPath := range r.tempFiles {
|
||||||
|
if tempPath == realPath {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !found {
|
||||||
|
return nil, fmt.Errorf("location not found in resolver: %s", location.RealPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open and return the temp file
|
||||||
|
f, err := os.Open(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to open temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileMetadataByLocation returns metadata for the file at the given location.
|
||||||
|
func (r *ociModelResolver) FileMetadataByLocation(location file.Location) (file.Metadata, error) {
|
||||||
|
realPath := location.RealPath
|
||||||
|
|
||||||
|
// Stat the temp file
|
||||||
|
info, err := os.Stat(realPath)
|
||||||
|
if err != nil {
|
||||||
|
return file.Metadata{}, fmt.Errorf("failed to stat temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return basic metadata
|
||||||
|
return file.Metadata{
|
||||||
|
Path: location.AccessPath, // Use AccessPath for virtual path
|
||||||
|
Type: stereofile.TypeRegular,
|
||||||
|
FileInfo: info,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HasPath checks if the given path exists in the resolver.
|
||||||
|
func (r *ociModelResolver) HasPath(path string) bool {
|
||||||
|
_, exists := r.tempFiles[path]
|
||||||
|
return exists
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByPath returns locations for files matching the given paths.
|
||||||
|
func (r *ociModelResolver) FilesByPath(paths ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, path := range paths {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if virtualPath == path {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByGlob returns locations for files matching the given glob patterns.
|
||||||
|
func (r *ociModelResolver) FilesByGlob(patterns ...string) ([]file.Location, error) {
|
||||||
|
var results []file.Location
|
||||||
|
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
// Match against the virtual path
|
||||||
|
matched, err := doublestar.Match(pattern, virtualPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to match pattern %q: %w", pattern, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if matched {
|
||||||
|
results = append(results, file.NewVirtualLocation(tempPath, virtualPath))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FilesByMIMEType returns locations for files with the given MIME types.
|
||||||
|
// This is not implemented for OCI model artifacts as we don't have MIME type detection.
|
||||||
|
func (r *ociModelResolver) FilesByMIMEType(types ...string) ([]file.Location, error) {
|
||||||
|
// Not implemented - OCI model artifacts don't have MIME type detection
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// RelativeFileByPath returns a file at the given path relative to the reference location.
|
||||||
|
// This is not applicable for OCI model artifacts.
|
||||||
|
func (r *ociModelResolver) RelativeFileByPath(_ file.Location, path string) *file.Location {
|
||||||
|
// Not implemented - no layer hierarchy in OCI model artifacts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// AllLocations returns all file locations in the resolver.
|
||||||
|
func (r *ociModelResolver) AllLocations(ctx context.Context) <-chan file.Location {
|
||||||
|
ch := make(chan file.Location)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(ch)
|
||||||
|
|
||||||
|
for _, loc := range r.locations {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case ch <- loc:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return ch
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanup removes all temporary files managed by this resolver.
|
||||||
|
func (r *ociModelResolver) cleanup() error {
|
||||||
|
var errs []error
|
||||||
|
|
||||||
|
for virtualPath, tempPath := range r.tempFiles {
|
||||||
|
if err := os.Remove(tempPath); err != nil {
|
||||||
|
errs = append(errs, fmt.Errorf("failed to remove temp file for %s: %w", virtualPath, err))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(errs) > 0 {
|
||||||
|
return fmt.Errorf("cleanup errors: %v", errs)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractVirtualPath generates a virtual path for a GGUF layer.
|
||||||
|
// This simulates where the file would be in the artifact.
|
||||||
|
func extractVirtualPath(layerIndex int, annotations map[string]string) string {
|
||||||
|
// Check if there's a filename in annotations
|
||||||
|
if filename, ok := annotations["org.opencontainers.image.title"]; ok {
|
||||||
|
return "/" + filename
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to generic name based on index
|
||||||
|
return fmt.Sprintf("/model-layer-%d.gguf", layerIndex)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createTempFileFromData creates a temporary file with the given data.
|
||||||
|
func createTempFileFromData(data []byte, virtualPath string) (string, error) {
|
||||||
|
// Extract filename from virtual path for better temp file naming
|
||||||
|
filename := filepath.Base(virtualPath)
|
||||||
|
ext := filepath.Ext(filename)
|
||||||
|
prefix := strings.TrimSuffix(filename, ext) + "-"
|
||||||
|
|
||||||
|
// Create temp file
|
||||||
|
tempFile, err := os.CreateTemp("", prefix+"*"+ext)
|
||||||
|
if err != nil {
|
||||||
|
return "", fmt.Errorf("failed to create temp file: %w", err)
|
||||||
|
}
|
||||||
|
defer tempFile.Close()
|
||||||
|
|
||||||
|
// Write data
|
||||||
|
if _, err := tempFile.Write(data); err != nil {
|
||||||
|
os.Remove(tempFile.Name())
|
||||||
|
return "", fmt.Errorf("failed to write to temp file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return tempFile.Name(), nil
|
||||||
|
}
|
||||||
@ -7,6 +7,7 @@ import (
|
|||||||
"github.com/anchore/syft/syft/source"
|
"github.com/anchore/syft/syft/source"
|
||||||
"github.com/anchore/syft/syft/source/directorysource"
|
"github.com/anchore/syft/syft/source/directorysource"
|
||||||
"github.com/anchore/syft/syft/source/filesource"
|
"github.com/anchore/syft/syft/source/filesource"
|
||||||
|
"github.com/anchore/syft/syft/source/ocimodelsource"
|
||||||
"github.com/anchore/syft/syft/source/snapsource"
|
"github.com/anchore/syft/syft/source/snapsource"
|
||||||
"github.com/anchore/syft/syft/source/stereoscopesource"
|
"github.com/anchore/syft/syft/source/stereoscopesource"
|
||||||
)
|
)
|
||||||
@ -16,6 +17,7 @@ const (
|
|||||||
DirTag = stereoscope.DirTag
|
DirTag = stereoscope.DirTag
|
||||||
PullTag = stereoscope.PullTag
|
PullTag = stereoscope.PullTag
|
||||||
SnapTag = "snap"
|
SnapTag = "snap"
|
||||||
|
OCIModelTag = "oci-model"
|
||||||
)
|
)
|
||||||
|
|
||||||
// All returns all the configured source providers known to syft
|
// All returns all the configured source providers known to syft
|
||||||
@ -40,6 +42,9 @@ func All(userInput string, cfg *Config) []collections.TaggedValue[source.Provide
|
|||||||
|
|
||||||
// 3. try remote sources after everything else...
|
// 3. try remote sources after everything else...
|
||||||
|
|
||||||
|
// --from oci-model (model artifacts with header-only fetching)
|
||||||
|
Join(tagProvider(ocimodelsource.NewSourceProvider(userInput, cfg.RegistryOptions, cfg.Alias), OCIModelTag)).
|
||||||
|
|
||||||
// --from docker, registry, etc.
|
// --from docker, registry, etc.
|
||||||
Join(stereoscopeProviders.Select(PullTag)...).
|
Join(stereoscopeProviders.Select(PullTag)...).
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user