syft/syft/pkg/cataloger/ai/processor.go
Christopher Phillips 9644340981
fix: license frontmatter
Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
2026-06-22 11:46:23 -04:00

713 lines
24 KiB
Go

package ai
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"path"
"sort"
"strings"
"github.com/cespare/xxhash/v2"
"gopkg.in/yaml.v3"
"github.com/anchore/syft/internal"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/licenses"
)
// ociGroupKey is the sentinel grouping key for every safetensors package that
// originated from an OCI model artifact. The ContainerImageModel resolver gives
// each layer the virtual RealPath "/" regardless of layer media type, so all
// safetensors packages from a single OCI scan collapse into one group.
const ociGroupKey = "@oci@"
// ggufMergeProcessor consolidates multiple GGUF packages into a single package
// representing the AI model. When scanning OCI images with multiple layers,
// each layer may produce a separate package. This processor finds the package
// with a name and merges metadata from nameless packages into its GGUFFileParts field.
// Only packages with a non-empty name are returned in the final result.
func ggufMergeProcessor(pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
if err != nil {
return pkgs, rels, err
}
if len(pkgs) == 0 {
return pkgs, rels, err
}
// Separate packages with names from those without
var namedPkgs []pkg.Package
var namelessHeaders []pkg.GGUFFileHeader
for _, p := range pkgs {
if p.Name != "" {
namedPkgs = append(namedPkgs, p)
} else {
if header, ok := p.Metadata.(pkg.GGUFFileHeader); ok {
// We do not want a kv hash for nameless headers
header.MetadataKeyValuesHash = ""
namelessHeaders = append(namelessHeaders, header)
}
}
}
// If there are no named packages, return nothing
if len(namedPkgs) == 0 {
return nil, rels, err
}
// merge nameless headers into a single named package;
// if there are multiple named packages, return them without trying to merge headers.
// we cannot determine which nameless headers belong to which package
// this is because the order we receive the gguf headers in is not guaranteed
// to match the layer order in the original oci image
if len(namedPkgs) == 1 && len(namelessHeaders) > 0 {
winner := &namedPkgs[0]
if header, ok := winner.Metadata.(pkg.GGUFFileHeader); ok {
header.Parts = namelessHeaders
winner.Metadata = header
}
}
return namedPkgs, rels, err
}
// safeTensorsMergeProcessor is the single owner of naming, license resolution,
// HF config.json mining, cross-shard rollup, and supporting-evidence attachment
// for safetensors packages. The parsers it processes are intentionally minimal
// — they only decode the safetensors-specific format and emit nameless packages
// with content-derived metadata. This function:
//
// 1. groups all nameless packages by parent directory (or a single sentinel
// for OCI artifacts, since the ContainerImageModel resolver puts every
// layer at virtual path "/");
// 2. merges the per-shard metadata (tensor count, dominant dtype, total size,
// UserMetadata, rollup MetadataHash) into one package per group;
// 3. enriches the merged package by consulting the resolver ONCE per group —
// sibling config.json + README.md for dir scans, the model-file companion
// layers + license layer for OCI — and attaches those locations as
// supporting evidence;
// 4. picks a name via the precedence chain
// config.json _name_or_path → Architecture-Parameters → parent-dir
// and drops the group when none of those produced a name (no opaque
// fallback / no MetadataHash-as-name).
func safeTensorsMergeProcessor(ctx context.Context, resolver file.Resolver, pkgs []pkg.Package, rels []artifact.Relationship, err error) ([]pkg.Package, []artifact.Relationship, error) {
if err != nil || len(pkgs) == 0 {
return pkgs, rels, err
}
// Defensively split off non-safetensors packages — the cataloger only emits
// SafeTensorsModelInfo today, but this keeps the processor robust if other
// types ever flow through.
var stPkgs, other []pkg.Package
for _, p := range pkgs {
if _, ok := p.Metadata.(pkg.SafeTensorsModelInfo); ok {
stPkgs = append(stPkgs, p)
continue
}
other = append(other, p)
}
if len(stPkgs) == 0 {
return pkgs, rels, err
}
groups := groupSafeTensorsPackages(stPkgs)
// Deterministic iteration order so the SBOM doesn't depend on map order.
keys := make([]string, 0, len(groups))
for k := range groups {
keys = append(keys, k)
}
sort.Strings(keys)
out := other
for _, key := range keys {
merged := mergeSafeTensorsGroup(groups[key])
nameOrPath := enrichSafeTensorsGroup(ctx, resolver, key, &merged)
name := pickSafeTensorsName(merged, key, nameOrPath)
if name == "" {
continue // drop unnameable groups, per design (no opaque fallback)
}
merged.Name = name
merged.SetID()
out = append(out, merged)
}
return out, rels, nil
}
// groupSafeTensorsPackages buckets packages by the parent directory of their
// primary-evidence location, or the OCI sentinel when the location lives at
// the ContainerImageModel resolver's virtual "/" path.
func groupSafeTensorsPackages(pkgs []pkg.Package) map[string][]pkg.Package {
out := make(map[string][]pkg.Package)
for _, p := range pkgs {
key := safeTensorsGroupKey(p)
if key == "" {
continue
}
out[key] = append(out[key], p)
}
return out
}
func safeTensorsGroupKey(p pkg.Package) string {
loc := primaryEvidenceLocation(p)
if loc == nil {
return ""
}
if loc.RealPath == "/" {
return ociGroupKey
}
return path.Dir(loc.RealPath)
}
func primaryEvidenceLocation(p pkg.Package) *file.Location {
locs := p.Locations.ToSlice()
for i, l := range locs {
if l.Annotations != nil && l.Annotations[pkg.EvidenceAnnotationKey] == pkg.PrimaryEvidenceAnnotation {
return &locs[i]
}
}
if len(locs) > 0 {
return &locs[0]
}
return nil
}
// mergeSafeTensorsGroup folds a group's per-member metadata into a single
// package. Members are classified into two buckets to avoid double-counting:
//
// - "aggregate" members have producer-declared totals (TensorCount, TotalSize,
// ShardCount, Parameters) but no MetadataHash — these are the Docker AI
// config blob and the sharded-index file.
// - "shard" members have a content-derived MetadataHash and per-shard counts —
// these are the individual .safetensors header parsers, both dir-scan and
// OCI weight-layer.
//
// Aggregate values are the source of truth for the merged totals when present;
// shards contribute Quantization, UserMetadata, the rollup MetadataHash, and
// (for multi-shard models) the Parts breakdown.
func mergeSafeTensorsGroup(members []pkg.Package) pkg.Package {
locSet := unionLocations(members)
aggregates, shards := bucketSafeTensorsMembers(members)
merged := pkg.SafeTensorsModelInfo{Format: "safetensors"}
mergeAggregatesInto(&merged, aggregates)
shardTensorTotal, hashes := mergeShardsInto(&merged, shards)
if merged.TensorCount == 0 {
merged.TensorCount = shardTensorTotal
}
if merged.ShardCount == 0 {
if len(shards) > 0 {
merged.ShardCount = len(shards)
} else {
merged.ShardCount = 1
}
}
merged.MetadataHash = rollupHash(hashes)
// Parts only carry value for multi-shard models; for a single shard the
// outer view already exposes every per-shard field.
if len(shards) > 1 {
parts := append([]pkg.SafeTensorsModelInfo(nil), shards...)
sort.Slice(parts, func(i, j int) bool {
return parts[i].MetadataHash < parts[j].MetadataHash
})
merged.Parts = parts
}
return pkg.Package{
Locations: locSet,
Type: pkg.ModelPkg,
Metadata: merged,
}
}
// mergeAggregatesInto folds aggregate-declared totals (config blob or sharded
// index) into merged. First non-empty wins, so the order aggregates are passed
// in determines tie-breaking — in practice there is one config blob and one
// index per group, never two of the same kind.
func mergeAggregatesInto(merged *pkg.SafeTensorsModelInfo, aggregates []pkg.SafeTensorsModelInfo) {
for _, a := range aggregates {
if merged.TensorCount == 0 {
merged.TensorCount = a.TensorCount
}
if merged.ShardCount == 0 {
merged.ShardCount = a.ShardCount
}
firstNonEmpty(&merged.Parameters, a.Parameters)
firstNonEmpty(&merged.TotalSize, a.TotalSize)
firstNonEmpty(&merged.Architecture, a.Architecture)
firstNonEmpty(&merged.Quantization, a.Quantization)
firstNonEmpty(&merged.TorchDtype, a.TorchDtype)
firstNonEmpty(&merged.TransformersVersion, a.TransformersVersion)
}
}
// mergeShardsInto folds the per-shard header metadata into merged, returning
// the summed shard TensorCount and the list of non-empty per-shard hashes for
// the rollup. Architecture / TorchDtype / TransformersVersion are accepted as
// fallbacks if a shard ever carries them (the current parsers don't, but the
// resolver-backed enrichment runs afterwards and won't overwrite anything
// already set, so it's safe to populate them earlier).
func mergeShardsInto(merged *pkg.SafeTensorsModelInfo, shards []pkg.SafeTensorsModelInfo) (shardTensorTotal uint64, hashes []string) {
seenKV := map[string]bool{}
for _, s := range shards {
shardTensorTotal += s.TensorCount
firstNonEmpty(&merged.Quantization, s.Quantization)
firstNonEmpty(&merged.Parameters, s.Parameters)
firstNonEmpty(&merged.Architecture, s.Architecture)
firstNonEmpty(&merged.TorchDtype, s.TorchDtype)
firstNonEmpty(&merged.TransformersVersion, s.TransformersVersion)
for _, kv := range s.UserMetadata {
if seenKV[kv.Key] {
continue
}
seenKV[kv.Key] = true
merged.UserMetadata = append(merged.UserMetadata, kv)
}
if s.MetadataHash != "" {
hashes = append(hashes, s.MetadataHash)
}
}
return shardTensorTotal, hashes
}
func firstNonEmpty(dst *string, v string) {
if *dst == "" {
*dst = v
}
}
// unionLocations gathers every location from every member into a single set.
func unionLocations(members []pkg.Package) file.LocationSet {
out := file.NewLocationSet()
for _, m := range members {
for _, l := range m.Locations.ToSlice() {
out.Add(l)
}
}
return out
}
// bucketSafeTensorsMembers splits group members into aggregate-flavored entries
// (no MetadataHash — Docker AI config blob or sharded index) and shard-flavored
// entries (carry a content-derived MetadataHash from a header parser).
func bucketSafeTensorsMembers(members []pkg.Package) (aggregates, shards []pkg.SafeTensorsModelInfo) {
for _, m := range members {
md, ok := m.Metadata.(pkg.SafeTensorsModelInfo)
if !ok {
continue
}
if md.MetadataHash != "" {
shards = append(shards, md)
continue
}
aggregates = append(aggregates, md)
}
return aggregates, shards
}
// rollupHash returns a stable hash across the sorted set of per-member
// content-derived hashes. For a single member it returns that hash unchanged,
// so a single-file dir scan and an OCI scan with one safetensors layer surface
// the same value. For multi-shard models the rollup is the xxhash of the
// sorted hashes joined with "|".
func rollupHash(hashes []string) string {
if len(hashes) == 0 {
return ""
}
if len(hashes) == 1 {
return hashes[0]
}
sorted := append([]string(nil), hashes...)
sort.Strings(sorted)
return fmt.Sprintf("%016x", xxhash.Sum64String(strings.Join(sorted, "|")))
}
// enrichSafeTensorsGroup reads the resolver once for the group to populate the
// merged metadata's Architecture / TorchDtype / TransformersVersion, set the
// licenses on the merged package, and attach the location of every consulted
// supporting file as SupportingEvidence. Returns the raw _name_or_path so the
// caller can apply path.Base in its naming step.
func enrichSafeTensorsGroup(ctx context.Context, resolver file.Resolver, groupKey string, merged *pkg.Package) (nameOrPath string) {
md := merged.Metadata.(pkg.SafeTensorsModelInfo)
var (
lics []pkg.License
supporting []file.Location
)
if groupKey == ociGroupKey {
nameOrPath, lics, supporting = enrichSafeTensorsOCI(ctx, resolver, &md)
} else {
nameOrPath, lics, supporting = enrichSafeTensorsDir(ctx, resolver, groupKey, &md)
}
merged.Metadata = md
if len(lics) > 0 {
merged.Licenses = pkg.NewLicenseSet(lics...)
}
for _, loc := range supporting {
merged.Locations.Add(loc.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.SupportingEvidenceAnnotation))
}
return nameOrPath
}
// enrichSafeTensorsDir handles the directory-scan case: look for sibling
// config.json and README.md next to the model files.
func enrichSafeTensorsDir(ctx context.Context, resolver file.Resolver, dir string, md *pkg.SafeTensorsModelInfo) (nameOrPath string, lics []pkg.License, supporting []file.Location) {
if loc, cfg := readDirHFConfig(resolver, path.Join(dir, "config.json")); cfg != nil {
applyHFConfig(md, cfg)
nameOrPath = cfg.NameOrPath
supporting = append(supporting, *loc)
}
if loc, fm := readDirReadmeFrontmatter(resolver, path.Join(dir, "README.md")); fm != nil {
if fm.License != "" {
lics = pkg.NewLicensesFromValuesWithContext(ctx, fm.License)
}
if nameOrPath == "" && len(fm.BaseModel) > 0 {
nameOrPath = fm.BaseModel[0]
}
supporting = append(supporting, *loc)
}
return nameOrPath, lics, supporting
}
// enrichSafeTensorsOCI handles the OCI-artifact case: walk the
// vnd.docker.ai.model.file layers (READMEs and HF config.json all ride that
// media type — we sniff content to tell them apart), then fall back to the
// vnd.docker.ai.license layer through the shared license scanner.
func enrichSafeTensorsOCI(ctx context.Context, resolver file.Resolver, md *pkg.SafeTensorsModelInfo) (nameOrPath string, lics []pkg.License, supporting []file.Location) {
ociResolver, ok := resolver.(file.OCIMediaTypeResolver)
if !ok {
return "", nil, nil
}
modelFileLocs, err := ociResolver.FilesByMediaType(dockerAIModelFileMediaType)
if err != nil {
log.Debugf("failed to list docker AI model-file layers: %v", err)
}
// Collect config / readme candidates separately so the layer-iteration order
// returned by the resolver doesn't decide the precedence.
var configName, readmeName, readmeLicense string
for _, loc := range modelFileLocs {
if classifyOCIModelFileLayer(resolver, loc, md, &configName, &readmeName, &readmeLicense) {
supporting = append(supporting, loc)
}
}
// Precedence: config.json _name_or_path > README base_model.
if configName != "" {
nameOrPath = configName
} else {
nameOrPath = readmeName
}
// README license takes precedence; fall back to the license layer. For each
// license layer we first try a cheap YAML-frontmatter spdx-id read; layers
// without frontmatter fall through to the shared license scanner.
switch {
case readmeLicense != "":
lics = pkg.NewLicensesFromValuesWithContext(ctx, readmeLicense)
default:
licLocs, lErr := ociResolver.FilesByMediaType(dockerAILicenseMediaType)
if lErr != nil {
log.Debugf("failed to list docker AI license layers: %v", lErr)
}
if len(licLocs) > 0 {
lics = identifyLicenseLayers(ctx, resolver, licLocs)
supporting = append(supporting, licLocs...)
}
}
return nameOrPath, lics, supporting
}
// identifyLicenseLayers turns Docker AI license-layer locations into
// pkg.License values. It first attempts a cheap, exact SPDX-id read from the
// layer's YAML frontmatter (the choosealicense.com shape Docker Model Runner
// publishes for its AI artifacts); layers without frontmatter fall through to
// the shared license scanner. Each returned license is tagged with the layer
// location it came from so the SBOM cites its source.
func identifyLicenseLayers(ctx context.Context, resolver file.Resolver, locs []file.Location) []pkg.License {
var out []pkg.License
var scanFallback []file.Location
for i := range locs {
loc := locs[i]
if spdx := readLicenseSPDXIDFromFrontmatter(resolver, loc); spdx != "" {
out = append(out, pkg.NewLicenseFromFieldsWithContext(ctx, spdx, "", &loc))
continue
}
scanFallback = append(scanFallback, loc)
}
if len(scanFallback) > 0 {
out = append(out, licenses.FindAtLocations(ctx, resolver, scanFallback...)...)
}
return out
}
// readLicenseSPDXIDFromFrontmatter reads a bounded prefix of a license-layer
// blob and returns the spdx-id declared in its YAML frontmatter, if any. The
// 64 KiB cap is well above any real choosealicense.com frontmatter block while
// still bounding memory if the layer turns out to be huge.
func readLicenseSPDXIDFromFrontmatter(resolver file.Resolver, loc file.Location) string {
rc, err := resolver.FileContentsByLocation(loc)
if err != nil {
return ""
}
defer internal.CloseAndLogError(rc, loc.RealPath)
buf, err := io.ReadAll(io.LimitReader(rc, 64*1024))
if err != nil {
return ""
}
return parseLicenseFrontmatter(buf)
}
// classifyOCIModelFileLayer reads up to 4 MiB of a model.file layer and
// classifies it as README frontmatter or HF config.json based on its leading
// bytes. Side-effects: applies HF config fields onto md, accumulates name and
// license candidates via the out-params. Returns true when the layer was
// successfully classified (and should be recorded as supporting evidence).
func classifyOCIModelFileLayer(resolver file.Resolver, loc file.Location, md *pkg.SafeTensorsModelInfo, configName, readmeName, license *string) bool {
rc, err := resolver.FileContentsByLocation(loc)
if err != nil {
return false
}
defer internal.CloseAndLogError(rc, loc.RealPath)
buf, err := io.ReadAll(io.LimitReader(rc, 4*1024*1024))
if err != nil {
return false
}
trimmed := trimLeadingWhitespace(buf)
switch {
case hasPrefix(trimmed, "---"):
fm := parseFrontmatter(buf)
if fm == nil {
return false
}
if *license == "" {
*license = fm.License
}
if *readmeName == "" && len(fm.BaseModel) > 0 {
*readmeName = fm.BaseModel[0]
}
return true
case hasPrefix(trimmed, "{"):
var cfg hfConfig
if err := json.Unmarshal(buf, &cfg); err != nil {
return false
}
applyHFConfig(md, &cfg)
if *configName == "" && cfg.NameOrPath != "" {
*configName = cfg.NameOrPath
}
return true
}
return false
}
// applyHFConfig folds the subset of HF config.json fields we surface in our
// metadata onto md. Fields already populated on md are left alone — earlier
// content-derived values (Quantization, TensorCount, etc., from header bytes)
// always win over producer-declared ones in case of conflict.
func applyHFConfig(md *pkg.SafeTensorsModelInfo, cfg *hfConfig) {
if md.Architecture == "" && len(cfg.Architectures) > 0 {
md.Architecture = cfg.Architectures[0]
}
if md.TorchDtype == "" {
md.TorchDtype = cfg.TorchDtype
}
if md.TransformersVersion == "" {
md.TransformersVersion = cfg.TransformersVersion
}
}
// pickSafeTensorsName implements the documented naming precedence chain:
//
// 1. config.json _name_or_path (path.Base, so "org/Model" → "Model")
// 2. OCI manifest title (deferred to a follow-up; reserved here)
// 3. Architecture-Parameters synthetic (only when both are populated)
// 4. parent directory of the group (dir-scan only — OCI has no useful path)
//
// Returns "" to signal the merge processor should drop the group rather than
// invent a name.
func pickSafeTensorsName(merged pkg.Package, groupKey, nameOrPath string) string {
md, _ := merged.Metadata.(pkg.SafeTensorsModelInfo)
if nameOrPath != "" {
return path.Base(nameOrPath)
}
// 2. OCI manifest title — follow-up.
if md.Architecture != "" && md.Parameters != "" {
return md.Architecture + "-" + md.Parameters
}
if groupKey != ociGroupKey {
base := path.Base(groupKey)
if base != "" && base != "." && base != "/" {
return base
}
}
return ""
}
// --- Relocated enrichment helpers ----------------------------------------
//
// These types and functions used to live in the parser files; they moved here
// when the parsers shrank to "just decode the safetensors-specific format" and
// every resolver-backed read centralized in the merge processor.
// hfConfig is a minimal projection of Hugging Face config.json fields.
type hfConfig struct {
Architectures []string `json:"architectures"`
TorchDtype string `json:"torch_dtype"`
TransformersVersion string `json:"transformers_version"`
NameOrPath string `json:"_name_or_path"`
}
// readmeFrontmatter holds the subset of YAML frontmatter fields we extract.
type readmeFrontmatter struct {
License string `yaml:"license"`
BaseModel []string `yaml:"base_model"`
}
func readDirHFConfig(resolver file.Resolver, p string) (*file.Location, *hfConfig) {
locations, err := resolver.FilesByPath(p)
if err != nil || len(locations) == 0 {
return nil, nil
}
rc, err := resolver.FileContentsByLocation(locations[0])
if err != nil {
return nil, nil
}
defer internal.CloseAndLogError(rc, p)
var cfg hfConfig
if err := json.NewDecoder(rc).Decode(&cfg); err != nil {
log.Debugf("failed to decode %s: %v", p, err)
return nil, nil
}
return &locations[0], &cfg
}
func readDirReadmeFrontmatter(resolver file.Resolver, p string) (*file.Location, *readmeFrontmatter) {
locations, err := resolver.FilesByPath(p)
if err != nil || len(locations) == 0 {
return nil, nil
}
rc, err := resolver.FileContentsByLocation(locations[0])
if err != nil {
return nil, nil
}
defer internal.CloseAndLogError(rc, p)
buf, err := io.ReadAll(io.LimitReader(rc, 1024*1024))
if err != nil {
return nil, nil
}
fm := parseFrontmatter(buf)
if fm == nil {
return nil, nil
}
return &locations[0], fm
}
// extractFrontmatterBlock returns the YAML bytes between the first and second
// "---" delimiters of a file (stripping a leading BOM and any leading
// whitespace), or nil when no closed frontmatter block exists. Shared by every
// YAML-frontmatter parser the cataloger needs.
func extractFrontmatterBlock(buf []byte) []byte {
trimmed := bytes.TrimLeft(buf, "\xef\xbb\xbf \t\r\n")
if !bytes.HasPrefix(trimmed, []byte("---")) {
return nil
}
rest := trimmed[3:]
if i := bytes.IndexByte(rest, '\n'); i >= 0 {
rest = rest[i+1:]
}
end := bytes.Index(rest, []byte("\n---"))
if end < 0 {
return nil
}
return rest[:end]
}
// parseFrontmatter decodes a Hugging Face model card YAML frontmatter block
// and returns the license and base_model fields. base_model is decoded via
// yaml.Node so a scalar value ("org/model") doesn't fail the whole block.
func parseFrontmatter(buf []byte) *readmeFrontmatter {
block := extractFrontmatterBlock(buf)
if block == nil {
return nil
}
var raw struct {
License string `yaml:"license"`
BaseModel yaml.Node `yaml:"base_model"`
}
if err := yaml.Unmarshal(block, &raw); err != nil {
log.Debugf("failed to parse README frontmatter: %v", err)
return nil
}
fm := readmeFrontmatter{License: raw.License}
switch raw.BaseModel.Kind {
case yaml.ScalarNode:
if raw.BaseModel.Value != "" {
fm.BaseModel = []string{raw.BaseModel.Value}
}
case yaml.SequenceNode:
_ = raw.BaseModel.Decode(&fm.BaseModel)
}
return &fm
}
// licenseFrontmatter holds the fields we lift from a choosealicense.com-style
// YAML frontmatter block at the top of a license file (the LICENSE blobs Docker
// Model Runner publishes for AI artifacts use this shape).
type licenseFrontmatter struct {
SPDXID string `yaml:"spdx-id"`
}
// parseLicenseFrontmatter returns the producer-declared SPDX identifier from a
// choosealicense.com-style YAML frontmatter block, or "" if the buffer has no
// frontmatter or no spdx-id field — caller should fall back to a full license
// scan in that case.
func parseLicenseFrontmatter(buf []byte) string {
block := extractFrontmatterBlock(buf)
if block == nil {
return ""
}
var fm licenseFrontmatter
if err := yaml.Unmarshal(block, &fm); err != nil {
log.Debugf("failed to parse license frontmatter: %v", err)
return ""
}
return fm.SPDXID
}
func hasPrefix(b []byte, s string) bool {
return len(b) >= len(s) && string(b[:len(s)]) == s
}
func trimLeadingWhitespace(b []byte) []byte {
i := 0
for i < len(b) && (b[i] == ' ' || b[i] == '\t' || b[i] == '\r' || b[i] == '\n') {
i++
}
if len(b)-i >= 3 && b[i] == 0xEF && b[i+1] == 0xBB && b[i+2] == 0xBF {
i += 3
}
return b[i:]
}