mirror of
https://github.com/anchore/syft.git
synced 2026-07-05 02:28:25 +02:00
fix: bound safetensors header read to content size
readSafeTensorsHeader pre-allocated the declared header length, which is read straight from the file and bounded only by the 100MB ceiling. A short file declaring a huge header could force a large allocation it never fills. Read incrementally via io.ReadAll(io.LimitReader(...)) and verify the full header was actually present Signed-off-by: Christopher Phillips <32073428+spiffcs@users.noreply.github.com>
This commit is contained in:
parent
b5fc7c46f1
commit
4d59bdbb7f
@ -51,10 +51,14 @@ func readSafeTensorsHeader(r io.Reader) (*safeTensorsHeader, error) {
|
||||
return nil, fmt.Errorf("safetensors header size %d exceeds maximum %d", headerLen, maxSafeTensorsHeaderSize)
|
||||
}
|
||||
|
||||
body := make([]byte, headerLen)
|
||||
if _, err := io.ReadFull(r, body); err != nil {
|
||||
// Read incrementally rather than pre-allocating headerLen up front
|
||||
body, err := io.ReadAll(io.LimitReader(r, int64(headerLen)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read header body: %w", err)
|
||||
}
|
||||
if uint64(len(body)) != headerLen {
|
||||
return nil, fmt.Errorf("safetensors header truncated: read %d of %d bytes", len(body), headerLen)
|
||||
}
|
||||
|
||||
var raw map[string]json.RawMessage
|
||||
if err := json.Unmarshal(body, &raw); err != nil {
|
||||
@ -123,21 +127,23 @@ func (h *safeTensorsHeader) dominantDType() string {
|
||||
return best
|
||||
}
|
||||
|
||||
// metadataHash returns a stable xxhash64 over the tensor entries + __metadata__.
|
||||
// Tensor keys are sorted to keep the hash deterministic across producers.
|
||||
// metadataHash returns a stable xxhash64 over the logical tensor content
|
||||
// (name + dtype + shape) plus the __metadata__ map. Tensor keys are sorted to
|
||||
// keep the hash deterministic across producers.
|
||||
func (h *safeTensorsHeader) metadataHash() string {
|
||||
type entry struct {
|
||||
Name string `json:"name"`
|
||||
Entry safeTensorsEntry `json:"entry"`
|
||||
type logicalEntry struct {
|
||||
Name string `json:"name"`
|
||||
DType string `json:"dtype"`
|
||||
Shape []int64 `json:"shape"`
|
||||
}
|
||||
entries := make([]entry, 0, len(h.tensors))
|
||||
entries := make([]logicalEntry, 0, len(h.tensors))
|
||||
for name, t := range h.tensors {
|
||||
entries = append(entries, entry{Name: name, Entry: t})
|
||||
entries = append(entries, logicalEntry{Name: name, DType: t.DType, Shape: t.Shape})
|
||||
}
|
||||
sort.Slice(entries, func(i, j int) bool { return entries[i].Name < entries[j].Name })
|
||||
|
||||
type hashInput struct {
|
||||
Tensors []entry `json:"tensors"`
|
||||
Tensors []logicalEntry `json:"tensors"`
|
||||
Metadata map[string]string `json:"metadata,omitempty"`
|
||||
}
|
||||
b, err := json.Marshal(hashInput{Tensors: entries, Metadata: h.metadata})
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user