address reading archives into memory

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2026-05-13 10:56:57 -04:00
parent 9f047fdf11
commit dfde0974b0
No known key found for this signature in database
2 changed files with 200 additions and 141 deletions

View File

@ -1,17 +1,18 @@
package kernel package kernel
import ( import (
"bytes"
"compress/gzip"
"context" "context"
"debug/elf" "debug/elf"
"errors"
"fmt" "fmt"
"io" "io"
"os"
"strings" "strings"
"github.com/klauspost/compress/zstd" "github.com/mholt/archives"
"github.com/ulikunitz/xz"
intfile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader" "github.com/anchore/syft/syft/internal/unionreader"
@ -27,10 +28,11 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err) return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err)
} }
moduleReader, err := decompressedModuleReader(reader.RealPath, unionReader) moduleReader, err := decompressedModuleReader(ctx, reader.RealPath, unionReader)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to decompress kernel module %q: %w", reader.RealPath, err) return nil, nil, fmt.Errorf("unable to decompress kernel module %q: %w", reader.RealPath, err)
} }
defer moduleReader.Close()
metadata, err := parseLinuxKernelModuleMetadata(moduleReader) metadata, err := parseLinuxKernelModuleMetadata(moduleReader)
if err != nil { if err != nil {
@ -52,58 +54,88 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
} }
// decompressedModuleReader returns a UnionReader over the decompressed contents of the kernel module // decompressedModuleReader returns a UnionReader over the decompressed contents of the kernel module
// if the path indicates it is compressed (.ko.gz, .ko.xz, .ko.zst). For plain .ko files, the // when the path indicates compression (e.g. .ko.gz, .ko.xz, .ko.zst). For plain .ko files the original
// original reader is returned unchanged. // reader is returned unchanged. ELF parsing requires random access (io.ReaderAt + io.Seeker), so
func decompressedModuleReader(path string, r unionreader.UnionReader) (unionreader.UnionReader, error) { // compressed streams are spilled to a temp file rather than buffered in memory — kernel modules can be
var decompressed []byte // tens of MB decompressed and large numbers of them are scanned per cataloger run. The caller owns
// the returned reader and must Close it; the underlying reader (r) is not closed by Close on the
switch { // passthrough path — its lifecycle is the caller's.
case strings.HasSuffix(path, ".ko.gz"): func decompressedModuleReader(ctx context.Context, path string, r unionreader.UnionReader) (unionreader.UnionReader, error) {
gz, err := gzip.NewReader(r) // fast path: plain .ko files don't need format sniffing
if err != nil { if strings.HasSuffix(path, ".ko") {
return nil, fmt.Errorf("unable to create gzip reader: %w", err) return &nopCloseUnionReader{UnionReader: r}, nil
}
defer gz.Close()
decompressed, err = io.ReadAll(gz)
if err != nil {
return nil, fmt.Errorf("unable to decompress gzip stream: %w", err)
}
case strings.HasSuffix(path, ".ko.xz"):
xzr, err := xz.NewReader(r)
if err != nil {
return nil, fmt.Errorf("unable to create xz reader: %w", err)
}
decompressed, err = io.ReadAll(xzr)
if err != nil {
return nil, fmt.Errorf("unable to decompress xz stream: %w", err)
}
case strings.HasSuffix(path, ".ko.zst"):
zstdr, err := zstd.NewReader(r)
if err != nil {
return nil, fmt.Errorf("unable to create zstd reader: %w", err)
}
defer zstdr.Close()
decompressed, err = io.ReadAll(zstdr)
if err != nil {
return nil, fmt.Errorf("unable to decompress zstd stream: %w", err)
}
default:
return r, nil
} }
br := bytes.NewReader(decompressed) format, stream, err := intfile.IdentifyArchive(ctx, path, r)
return struct { if err != nil {
io.ReadCloser if errors.Is(err, archives.NoMatch) {
io.ReaderAt return passthrough(r)
io.Seeker }
}{ return nil, fmt.Errorf("unable to identify compression format: %w", err)
ReadCloser: io.NopCloser(br), }
ReaderAt: br,
Seeker: br, decompressor, ok := format.(archives.Decompressor)
}, nil if !ok {
// not a single-stream compressed format (e.g. a tar/zip archive); treat as a plain .ko
return passthrough(r)
}
rc, err := decompressor.OpenReader(stream)
if err != nil {
return nil, fmt.Errorf("unable to open %s decompression stream: %w", format.Extension(), err)
}
defer rc.Close()
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, fmt.Errorf("no temp dir factory in context")
}
tempFile, fileCleanup, err := td.NewFile("syft-kmod-*.ko") //nolint:gocritic // cleanup outlives this function — runs from tempFileUnionReader.Close on the returned reader
if err != nil {
fileCleanup()
return nil, fmt.Errorf("unable to create temp file for decompressed kernel module: %w", err)
}
tfr := &tempFileUnionReader{File: tempFile, cleanup: fileCleanup}
if _, err := io.Copy(tempFile, rc); err != nil {
_ = tfr.Close()
return nil, fmt.Errorf("unable to write decompressed kernel module: %w", err)
}
if _, err := tempFile.Seek(0, io.SeekStart); err != nil {
_ = tfr.Close()
return nil, fmt.Errorf("unable to rewind decompressed kernel module: %w", err)
}
return tfr, nil
}
// passthrough returns the original reader rewound to offset 0. IdentifyArchive consumes bytes to
// sniff magic; we rewind explicitly so callers don't have to reason about the seeker's position.
func passthrough(r unionreader.UnionReader) (unionreader.UnionReader, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to rewind reader after format sniff: %w", err)
}
return &nopCloseUnionReader{UnionReader: r}, nil
}
// nopCloseUnionReader wraps a UnionReader so that Close is a no-op. used on the passthrough path
// where the underlying reader's lifecycle is owned by the caller, not by us.
type nopCloseUnionReader struct {
unionreader.UnionReader
}
func (*nopCloseUnionReader) Close() error { return nil }
// tempFileUnionReader is a UnionReader backed by a temp file; Close closes the file and removes it.
type tempFileUnionReader struct {
*os.File
cleanup func()
}
func (t *tempFileUnionReader) Close() error {
err := t.File.Close()
t.cleanup()
return err
} }
func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) { func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) {

View File

@ -4,8 +4,10 @@ import (
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"context" "context"
"debug/elf"
"encoding/binary" "encoding/binary"
"io" "io"
"os"
"testing" "testing"
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
@ -13,114 +15,106 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ulikunitz/xz" "github.com/ulikunitz/xz"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
func testContext(t *testing.T) context.Context {
t.Helper()
td := tmpdir.FromPath(t.TempDir())
return tmpdir.WithValue(context.Background(), td)
}
// minimalKOBytes constructs a minimal ELF64 LE relocatable object with a .modinfo // minimalKOBytes constructs a minimal ELF64 LE relocatable object with a .modinfo
// section containing the given null-terminated key=value entries. // section containing the given null-terminated key=value entries.
func minimalKOBytes(entries []string) []byte { func minimalKOBytes(entries []string) []byte {
// Build .modinfo section data: each entry is key=value\0 // .modinfo section: each entry is key=value\0
var modinfo []byte var modinfo bytes.Buffer
for _, e := range entries { for _, e := range entries {
modinfo = append(modinfo, []byte(e)...) modinfo.WriteString(e)
modinfo = append(modinfo, 0) modinfo.WriteByte(0)
} }
// Section name string table: \0 .modinfo\0 .shstrtab\0 // section header string table — embeds names of all sections back-to-back, leading null required.
// offsets below index into this blob.
shstrtab := []byte("\x00.modinfo\x00.shstrtab\x00") shstrtab := []byte("\x00.modinfo\x00.shstrtab\x00")
modinfoNameOff := uint32(1) // offset of ".modinfo" in shstrtab
shstrtabNameOff := uint32(10) // offset of ".shstrtab" in shstrtab
// ELF64 header is 64 bytes.
// We have 3 sections: null, .modinfo, .shstrtab
const ( const (
elfHeaderSize = 64 modinfoNameOff uint32 = 1
sectionHdrSize = 64 shstrtabNameOff uint32 = 10
numSections = 3
) )
modinfoOff := uint64(elfHeaderSize) const (
modinfoSize := uint64(len(modinfo)) ehdrSize uint64 = 64
shdrSize uint64 = 64
numSections uint16 = 3 // null + .modinfo + .shstrtab
)
shstrtabOff := modinfoOff + modinfoSize // layout: [ehdr][modinfo][shstrtab][pad to 8][section headers]
shstrtabSize := uint64(len(shstrtab)) var (
modinfoOff = ehdrSize
modinfoSize = uint64(modinfo.Len())
shstrtabOff = modinfoOff + modinfoSize
shstrtabSize = uint64(len(shstrtab))
shdrsOff = alignUp(shstrtabOff+shstrtabSize, 8)
)
// Align section header table to 8 bytes header := elf.Header64{
shdrsOff := shstrtabOff + shstrtabSize Ident: [16]byte{
if shdrsOff%8 != 0 { 0x7f, 'E', 'L', 'F',
shdrsOff += 8 - (shdrsOff % 8) byte(elf.ELFCLASS64),
byte(elf.ELFDATA2LSB),
byte(elf.EV_CURRENT),
},
Type: uint16(elf.ET_REL),
Machine: uint16(elf.EM_X86_64),
Version: uint32(elf.EV_CURRENT),
Shoff: shdrsOff,
Ehsize: uint16(ehdrSize),
Shentsize: uint16(shdrSize),
Shnum: numSections,
Shstrndx: numSections - 1, // .shstrtab is last
} }
buf := new(bytes.Buffer) sections := []elf.Section64{
le := binary.LittleEndian {}, // SHN_UNDEF
{
Name: modinfoNameOff,
Type: uint32(elf.SHT_PROGBITS),
Off: modinfoOff,
Size: modinfoSize,
Addralign: 1,
},
{
Name: shstrtabNameOff,
Type: uint32(elf.SHT_STRTAB),
Off: shstrtabOff,
Size: shstrtabSize,
Addralign: 1,
},
}
// ELF header var buf bytes.Buffer
buf.Write([]byte{0x7f, 'E', 'L', 'F'}) // magic _ = binary.Write(&buf, binary.LittleEndian, header)
buf.WriteByte(2) // EI_CLASS: ELFCLASS64 buf.Write(modinfo.Bytes())
buf.WriteByte(1) // EI_DATA: ELFDATA2LSB
buf.WriteByte(1) // EI_VERSION: EV_CURRENT
buf.WriteByte(0) // EI_OSABI
buf.Write(make([]byte, 8)) // EI_ABIVERSION + padding
writeU16 := func(v uint16) { binary.Write(buf, le, v) } //nolint:errcheck
writeU32 := func(v uint32) { binary.Write(buf, le, v) } //nolint:errcheck
writeU64 := func(v uint64) { binary.Write(buf, le, v) } //nolint:errcheck
writeU16(1) // e_type: ET_REL
writeU16(62) // e_machine: EM_X86_64
writeU32(1) // e_version: EV_CURRENT
writeU64(0) // e_entry
writeU64(0) // e_phoff (no program headers)
writeU64(shdrsOff) // e_shoff
writeU32(0) // e_flags
writeU16(elfHeaderSize) // e_ehsize
writeU16(0) // e_phentsize
writeU16(0) // e_phnum
writeU16(sectionHdrSize) // e_shentsize
writeU16(numSections) // e_shnum
writeU16(numSections - 1) // e_shstrndx (.shstrtab is last)
// Write section data
buf.Write(modinfo)
buf.Write(shstrtab) buf.Write(shstrtab)
// Pad to shdrsOff
for uint64(buf.Len()) < shdrsOff { for uint64(buf.Len()) < shdrsOff {
buf.WriteByte(0) buf.WriteByte(0)
} }
for _, s := range sections {
// Section header 0: null _ = binary.Write(&buf, binary.LittleEndian, s)
buf.Write(make([]byte, sectionHdrSize)) }
// Section header 1: .modinfo (SHT_PROGBITS=1)
writeU32(modinfoNameOff) // sh_name
writeU32(1) // sh_type: SHT_PROGBITS
writeU64(0) // sh_flags
writeU64(0) // sh_addr
writeU64(modinfoOff) // sh_offset
writeU64(modinfoSize) // sh_size
writeU32(0) // sh_link
writeU32(0) // sh_info
writeU64(1) // sh_addralign
writeU64(0) // sh_entsize
// Section header 2: .shstrtab (SHT_STRTAB=3)
writeU32(shstrtabNameOff) // sh_name
writeU32(3) // sh_type: SHT_STRTAB
writeU64(0) // sh_flags
writeU64(0) // sh_addr
writeU64(shstrtabOff) // sh_offset
writeU64(shstrtabSize) // sh_size
writeU32(0) // sh_link
writeU32(0) // sh_info
writeU64(1) // sh_addralign
writeU64(0) // sh_entsize
return buf.Bytes() return buf.Bytes()
} }
func alignUp(v, align uint64) uint64 {
if v%align == 0 {
return v
}
return v + (align - v%align)
}
func gzCompress(data []byte) []byte { func gzCompress(data []byte) []byte {
var buf bytes.Buffer var buf bytes.Buffer
w := gzip.NewWriter(&buf) w := gzip.NewWriter(&buf)
@ -207,7 +201,7 @@ func TestParseLinuxKernelModuleFile_Compressed(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
reader := makeLocationReadCloser(tt.path, tt.data) reader := makeLocationReadCloser(tt.path, tt.data)
pkgs, rels, err := parseLinuxKernelModuleFile(context.Background(), nil, &generic.Environment{}, reader) pkgs, rels, err := parseLinuxKernelModuleFile(testContext(t), nil, &generic.Environment{}, reader)
require.NoError(t, err) require.NoError(t, err)
require.Len(t, pkgs, 1) require.Len(t, pkgs, 1)
assert.Empty(t, rels) assert.Empty(t, rels)
@ -237,18 +231,20 @@ func TestDecompressedModuleReader(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
br := bytes.NewReader(tt.data)
wrapped := struct { wrapped := struct {
io.ReadCloser io.ReadCloser
io.ReaderAt io.ReaderAt
io.Seeker io.Seeker
}{ }{
ReadCloser: io.NopCloser(bytes.NewReader(tt.data)), ReadCloser: io.NopCloser(br),
ReaderAt: bytes.NewReader(tt.data), ReaderAt: br,
Seeker: bytes.NewReader(tt.data), Seeker: br,
} }
got, err := decompressedModuleReader(tt.path, wrapped) got, err := decompressedModuleReader(testContext(t), tt.path, wrapped)
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, got) require.NotNil(t, got)
t.Cleanup(func() { _ = got.Close() })
b, err := io.ReadAll(got) b, err := io.ReadAll(got)
require.NoError(t, err) require.NoError(t, err)
@ -256,3 +252,34 @@ func TestDecompressedModuleReader(t *testing.T) {
}) })
} }
} }
func TestDecompressedModuleReader_TempFileRemovedOnClose(t *testing.T) {
koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP"})
data := gzCompress(koBytes)
br := bytes.NewReader(data)
wrapped := struct {
io.ReadCloser
io.ReaderAt
io.Seeker
}{
ReadCloser: io.NopCloser(br),
ReaderAt: br,
Seeker: br,
}
got, err := decompressedModuleReader(testContext(t), "/test.ko.gz", wrapped)
require.NoError(t, err)
tfr, ok := got.(*tempFileUnionReader)
require.True(t, ok, "expected compressed path to spill to a temp file")
path := tfr.File.Name()
_, err = os.Stat(path)
require.NoError(t, err, "temp file should exist before Close")
require.NoError(t, got.Close())
_, err = os.Stat(path)
assert.True(t, os.IsNotExist(err), "temp file should be removed after Close, got err=%v", err)
}