mirror of
https://github.com/anchore/syft.git
synced 2026-07-04 18:18:26 +02:00
address reading archives into memory
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
parent
9f047fdf11
commit
dfde0974b0
@ -1,17 +1,18 @@
|
||||
package kernel
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"debug/elf"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/ulikunitz/xz"
|
||||
"github.com/mholt/archives"
|
||||
|
||||
intfile "github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/tmpdir"
|
||||
"github.com/anchore/syft/syft/artifact"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/internal/unionreader"
|
||||
@ -27,10 +28,11 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
|
||||
return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err)
|
||||
}
|
||||
|
||||
moduleReader, err := decompressedModuleReader(reader.RealPath, unionReader)
|
||||
moduleReader, err := decompressedModuleReader(ctx, reader.RealPath, unionReader)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("unable to decompress kernel module %q: %w", reader.RealPath, err)
|
||||
}
|
||||
defer moduleReader.Close()
|
||||
|
||||
metadata, err := parseLinuxKernelModuleMetadata(moduleReader)
|
||||
if err != nil {
|
||||
@ -52,58 +54,88 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
|
||||
}
|
||||
|
||||
// decompressedModuleReader returns a UnionReader over the decompressed contents of the kernel module
|
||||
// if the path indicates it is compressed (.ko.gz, .ko.xz, .ko.zst). For plain .ko files, the
|
||||
// original reader is returned unchanged.
|
||||
func decompressedModuleReader(path string, r unionreader.UnionReader) (unionreader.UnionReader, error) {
|
||||
var decompressed []byte
|
||||
|
||||
switch {
|
||||
case strings.HasSuffix(path, ".ko.gz"):
|
||||
gz, err := gzip.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create gzip reader: %w", err)
|
||||
}
|
||||
defer gz.Close()
|
||||
decompressed, err = io.ReadAll(gz)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decompress gzip stream: %w", err)
|
||||
}
|
||||
|
||||
case strings.HasSuffix(path, ".ko.xz"):
|
||||
xzr, err := xz.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create xz reader: %w", err)
|
||||
}
|
||||
decompressed, err = io.ReadAll(xzr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decompress xz stream: %w", err)
|
||||
}
|
||||
|
||||
case strings.HasSuffix(path, ".ko.zst"):
|
||||
zstdr, err := zstd.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to create zstd reader: %w", err)
|
||||
}
|
||||
defer zstdr.Close()
|
||||
decompressed, err = io.ReadAll(zstdr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to decompress zstd stream: %w", err)
|
||||
}
|
||||
|
||||
default:
|
||||
return r, nil
|
||||
// when the path indicates compression (e.g. .ko.gz, .ko.xz, .ko.zst). For plain .ko files the original
|
||||
// reader is returned unchanged. ELF parsing requires random access (io.ReaderAt + io.Seeker), so
|
||||
// compressed streams are spilled to a temp file rather than buffered in memory — kernel modules can be
|
||||
// tens of MB decompressed and large numbers of them are scanned per cataloger run. The caller owns
|
||||
// the returned reader and must Close it; the underlying reader (r) is not closed by Close on the
|
||||
// passthrough path — its lifecycle is the caller's.
|
||||
func decompressedModuleReader(ctx context.Context, path string, r unionreader.UnionReader) (unionreader.UnionReader, error) {
|
||||
// fast path: plain .ko files don't need format sniffing
|
||||
if strings.HasSuffix(path, ".ko") {
|
||||
return &nopCloseUnionReader{UnionReader: r}, nil
|
||||
}
|
||||
|
||||
br := bytes.NewReader(decompressed)
|
||||
return struct {
|
||||
io.ReadCloser
|
||||
io.ReaderAt
|
||||
io.Seeker
|
||||
}{
|
||||
ReadCloser: io.NopCloser(br),
|
||||
ReaderAt: br,
|
||||
Seeker: br,
|
||||
}, nil
|
||||
format, stream, err := intfile.IdentifyArchive(ctx, path, r)
|
||||
if err != nil {
|
||||
if errors.Is(err, archives.NoMatch) {
|
||||
return passthrough(r)
|
||||
}
|
||||
return nil, fmt.Errorf("unable to identify compression format: %w", err)
|
||||
}
|
||||
|
||||
decompressor, ok := format.(archives.Decompressor)
|
||||
if !ok {
|
||||
// not a single-stream compressed format (e.g. a tar/zip archive); treat as a plain .ko
|
||||
return passthrough(r)
|
||||
}
|
||||
|
||||
rc, err := decompressor.OpenReader(stream)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to open %s decompression stream: %w", format.Extension(), err)
|
||||
}
|
||||
defer rc.Close()
|
||||
|
||||
td := tmpdir.FromContext(ctx)
|
||||
if td == nil {
|
||||
return nil, fmt.Errorf("no temp dir factory in context")
|
||||
}
|
||||
tempFile, fileCleanup, err := td.NewFile("syft-kmod-*.ko") //nolint:gocritic // cleanup outlives this function — runs from tempFileUnionReader.Close on the returned reader
|
||||
if err != nil {
|
||||
fileCleanup()
|
||||
return nil, fmt.Errorf("unable to create temp file for decompressed kernel module: %w", err)
|
||||
}
|
||||
tfr := &tempFileUnionReader{File: tempFile, cleanup: fileCleanup}
|
||||
|
||||
if _, err := io.Copy(tempFile, rc); err != nil {
|
||||
_ = tfr.Close()
|
||||
return nil, fmt.Errorf("unable to write decompressed kernel module: %w", err)
|
||||
}
|
||||
if _, err := tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
_ = tfr.Close()
|
||||
return nil, fmt.Errorf("unable to rewind decompressed kernel module: %w", err)
|
||||
}
|
||||
|
||||
return tfr, nil
|
||||
}
|
||||
|
||||
// passthrough returns the original reader rewound to offset 0. IdentifyArchive consumes bytes to
|
||||
// sniff magic; we rewind explicitly so callers don't have to reason about the seeker's position.
|
||||
func passthrough(r unionreader.UnionReader) (unionreader.UnionReader, error) {
|
||||
if _, err := r.Seek(0, io.SeekStart); err != nil {
|
||||
return nil, fmt.Errorf("unable to rewind reader after format sniff: %w", err)
|
||||
}
|
||||
return &nopCloseUnionReader{UnionReader: r}, nil
|
||||
}
|
||||
|
||||
// nopCloseUnionReader wraps a UnionReader so that Close is a no-op. used on the passthrough path
|
||||
// where the underlying reader's lifecycle is owned by the caller, not by us.
|
||||
type nopCloseUnionReader struct {
|
||||
unionreader.UnionReader
|
||||
}
|
||||
|
||||
func (*nopCloseUnionReader) Close() error { return nil }
|
||||
|
||||
// tempFileUnionReader is a UnionReader backed by a temp file; Close closes the file and removes it.
|
||||
type tempFileUnionReader struct {
|
||||
*os.File
|
||||
cleanup func()
|
||||
}
|
||||
|
||||
func (t *tempFileUnionReader) Close() error {
|
||||
err := t.File.Close()
|
||||
t.cleanup()
|
||||
return err
|
||||
}
|
||||
|
||||
func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) {
|
||||
|
||||
@ -4,8 +4,10 @@ import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"debug/elf"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/klauspost/compress/zstd"
|
||||
@ -13,114 +15,106 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/ulikunitz/xz"
|
||||
|
||||
"github.com/anchore/syft/internal/tmpdir"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
func testContext(t *testing.T) context.Context {
|
||||
t.Helper()
|
||||
td := tmpdir.FromPath(t.TempDir())
|
||||
return tmpdir.WithValue(context.Background(), td)
|
||||
}
|
||||
|
||||
// minimalKOBytes constructs a minimal ELF64 LE relocatable object with a .modinfo
|
||||
// section containing the given null-terminated key=value entries.
|
||||
func minimalKOBytes(entries []string) []byte {
|
||||
// Build .modinfo section data: each entry is key=value\0
|
||||
var modinfo []byte
|
||||
// .modinfo section: each entry is key=value\0
|
||||
var modinfo bytes.Buffer
|
||||
for _, e := range entries {
|
||||
modinfo = append(modinfo, []byte(e)...)
|
||||
modinfo = append(modinfo, 0)
|
||||
modinfo.WriteString(e)
|
||||
modinfo.WriteByte(0)
|
||||
}
|
||||
|
||||
// Section name string table: \0 .modinfo\0 .shstrtab\0
|
||||
// section header string table — embeds names of all sections back-to-back, leading null required.
|
||||
// offsets below index into this blob.
|
||||
shstrtab := []byte("\x00.modinfo\x00.shstrtab\x00")
|
||||
modinfoNameOff := uint32(1) // offset of ".modinfo" in shstrtab
|
||||
shstrtabNameOff := uint32(10) // offset of ".shstrtab" in shstrtab
|
||||
|
||||
// ELF64 header is 64 bytes.
|
||||
// We have 3 sections: null, .modinfo, .shstrtab
|
||||
const (
|
||||
elfHeaderSize = 64
|
||||
sectionHdrSize = 64
|
||||
numSections = 3
|
||||
modinfoNameOff uint32 = 1
|
||||
shstrtabNameOff uint32 = 10
|
||||
)
|
||||
|
||||
modinfoOff := uint64(elfHeaderSize)
|
||||
modinfoSize := uint64(len(modinfo))
|
||||
const (
|
||||
ehdrSize uint64 = 64
|
||||
shdrSize uint64 = 64
|
||||
numSections uint16 = 3 // null + .modinfo + .shstrtab
|
||||
)
|
||||
|
||||
shstrtabOff := modinfoOff + modinfoSize
|
||||
shstrtabSize := uint64(len(shstrtab))
|
||||
// layout: [ehdr][modinfo][shstrtab][pad to 8][section headers]
|
||||
var (
|
||||
modinfoOff = ehdrSize
|
||||
modinfoSize = uint64(modinfo.Len())
|
||||
shstrtabOff = modinfoOff + modinfoSize
|
||||
shstrtabSize = uint64(len(shstrtab))
|
||||
shdrsOff = alignUp(shstrtabOff+shstrtabSize, 8)
|
||||
)
|
||||
|
||||
// Align section header table to 8 bytes
|
||||
shdrsOff := shstrtabOff + shstrtabSize
|
||||
if shdrsOff%8 != 0 {
|
||||
shdrsOff += 8 - (shdrsOff % 8)
|
||||
header := elf.Header64{
|
||||
Ident: [16]byte{
|
||||
0x7f, 'E', 'L', 'F',
|
||||
byte(elf.ELFCLASS64),
|
||||
byte(elf.ELFDATA2LSB),
|
||||
byte(elf.EV_CURRENT),
|
||||
},
|
||||
Type: uint16(elf.ET_REL),
|
||||
Machine: uint16(elf.EM_X86_64),
|
||||
Version: uint32(elf.EV_CURRENT),
|
||||
Shoff: shdrsOff,
|
||||
Ehsize: uint16(ehdrSize),
|
||||
Shentsize: uint16(shdrSize),
|
||||
Shnum: numSections,
|
||||
Shstrndx: numSections - 1, // .shstrtab is last
|
||||
}
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
le := binary.LittleEndian
|
||||
sections := []elf.Section64{
|
||||
{}, // SHN_UNDEF
|
||||
{
|
||||
Name: modinfoNameOff,
|
||||
Type: uint32(elf.SHT_PROGBITS),
|
||||
Off: modinfoOff,
|
||||
Size: modinfoSize,
|
||||
Addralign: 1,
|
||||
},
|
||||
{
|
||||
Name: shstrtabNameOff,
|
||||
Type: uint32(elf.SHT_STRTAB),
|
||||
Off: shstrtabOff,
|
||||
Size: shstrtabSize,
|
||||
Addralign: 1,
|
||||
},
|
||||
}
|
||||
|
||||
// ELF header
|
||||
buf.Write([]byte{0x7f, 'E', 'L', 'F'}) // magic
|
||||
buf.WriteByte(2) // EI_CLASS: ELFCLASS64
|
||||
buf.WriteByte(1) // EI_DATA: ELFDATA2LSB
|
||||
buf.WriteByte(1) // EI_VERSION: EV_CURRENT
|
||||
buf.WriteByte(0) // EI_OSABI
|
||||
buf.Write(make([]byte, 8)) // EI_ABIVERSION + padding
|
||||
|
||||
writeU16 := func(v uint16) { binary.Write(buf, le, v) } //nolint:errcheck
|
||||
writeU32 := func(v uint32) { binary.Write(buf, le, v) } //nolint:errcheck
|
||||
writeU64 := func(v uint64) { binary.Write(buf, le, v) } //nolint:errcheck
|
||||
|
||||
writeU16(1) // e_type: ET_REL
|
||||
writeU16(62) // e_machine: EM_X86_64
|
||||
writeU32(1) // e_version: EV_CURRENT
|
||||
writeU64(0) // e_entry
|
||||
writeU64(0) // e_phoff (no program headers)
|
||||
writeU64(shdrsOff) // e_shoff
|
||||
writeU32(0) // e_flags
|
||||
writeU16(elfHeaderSize) // e_ehsize
|
||||
writeU16(0) // e_phentsize
|
||||
writeU16(0) // e_phnum
|
||||
writeU16(sectionHdrSize) // e_shentsize
|
||||
writeU16(numSections) // e_shnum
|
||||
writeU16(numSections - 1) // e_shstrndx (.shstrtab is last)
|
||||
|
||||
// Write section data
|
||||
buf.Write(modinfo)
|
||||
var buf bytes.Buffer
|
||||
_ = binary.Write(&buf, binary.LittleEndian, header)
|
||||
buf.Write(modinfo.Bytes())
|
||||
buf.Write(shstrtab)
|
||||
|
||||
// Pad to shdrsOff
|
||||
for uint64(buf.Len()) < shdrsOff {
|
||||
buf.WriteByte(0)
|
||||
}
|
||||
|
||||
// Section header 0: null
|
||||
buf.Write(make([]byte, sectionHdrSize))
|
||||
|
||||
// Section header 1: .modinfo (SHT_PROGBITS=1)
|
||||
writeU32(modinfoNameOff) // sh_name
|
||||
writeU32(1) // sh_type: SHT_PROGBITS
|
||||
writeU64(0) // sh_flags
|
||||
writeU64(0) // sh_addr
|
||||
writeU64(modinfoOff) // sh_offset
|
||||
writeU64(modinfoSize) // sh_size
|
||||
writeU32(0) // sh_link
|
||||
writeU32(0) // sh_info
|
||||
writeU64(1) // sh_addralign
|
||||
writeU64(0) // sh_entsize
|
||||
|
||||
// Section header 2: .shstrtab (SHT_STRTAB=3)
|
||||
writeU32(shstrtabNameOff) // sh_name
|
||||
writeU32(3) // sh_type: SHT_STRTAB
|
||||
writeU64(0) // sh_flags
|
||||
writeU64(0) // sh_addr
|
||||
writeU64(shstrtabOff) // sh_offset
|
||||
writeU64(shstrtabSize) // sh_size
|
||||
writeU32(0) // sh_link
|
||||
writeU32(0) // sh_info
|
||||
writeU64(1) // sh_addralign
|
||||
writeU64(0) // sh_entsize
|
||||
|
||||
for _, s := range sections {
|
||||
_ = binary.Write(&buf, binary.LittleEndian, s)
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func alignUp(v, align uint64) uint64 {
|
||||
if v%align == 0 {
|
||||
return v
|
||||
}
|
||||
return v + (align - v%align)
|
||||
}
|
||||
|
||||
func gzCompress(data []byte) []byte {
|
||||
var buf bytes.Buffer
|
||||
w := gzip.NewWriter(&buf)
|
||||
@ -207,7 +201,7 @@ func TestParseLinuxKernelModuleFile_Compressed(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := makeLocationReadCloser(tt.path, tt.data)
|
||||
pkgs, rels, err := parseLinuxKernelModuleFile(context.Background(), nil, &generic.Environment{}, reader)
|
||||
pkgs, rels, err := parseLinuxKernelModuleFile(testContext(t), nil, &generic.Environment{}, reader)
|
||||
require.NoError(t, err)
|
||||
require.Len(t, pkgs, 1)
|
||||
assert.Empty(t, rels)
|
||||
@ -237,18 +231,20 @@ func TestDecompressedModuleReader(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
br := bytes.NewReader(tt.data)
|
||||
wrapped := struct {
|
||||
io.ReadCloser
|
||||
io.ReaderAt
|
||||
io.Seeker
|
||||
}{
|
||||
ReadCloser: io.NopCloser(bytes.NewReader(tt.data)),
|
||||
ReaderAt: bytes.NewReader(tt.data),
|
||||
Seeker: bytes.NewReader(tt.data),
|
||||
ReadCloser: io.NopCloser(br),
|
||||
ReaderAt: br,
|
||||
Seeker: br,
|
||||
}
|
||||
got, err := decompressedModuleReader(tt.path, wrapped)
|
||||
got, err := decompressedModuleReader(testContext(t), tt.path, wrapped)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, got)
|
||||
t.Cleanup(func() { _ = got.Close() })
|
||||
|
||||
b, err := io.ReadAll(got)
|
||||
require.NoError(t, err)
|
||||
@ -256,3 +252,34 @@ func TestDecompressedModuleReader(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecompressedModuleReader_TempFileRemovedOnClose(t *testing.T) {
|
||||
koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP"})
|
||||
data := gzCompress(koBytes)
|
||||
|
||||
br := bytes.NewReader(data)
|
||||
wrapped := struct {
|
||||
io.ReadCloser
|
||||
io.ReaderAt
|
||||
io.Seeker
|
||||
}{
|
||||
ReadCloser: io.NopCloser(br),
|
||||
ReaderAt: br,
|
||||
Seeker: br,
|
||||
}
|
||||
|
||||
got, err := decompressedModuleReader(testContext(t), "/test.ko.gz", wrapped)
|
||||
require.NoError(t, err)
|
||||
|
||||
tfr, ok := got.(*tempFileUnionReader)
|
||||
require.True(t, ok, "expected compressed path to spill to a temp file")
|
||||
path := tfr.File.Name()
|
||||
|
||||
_, err = os.Stat(path)
|
||||
require.NoError(t, err, "temp file should exist before Close")
|
||||
|
||||
require.NoError(t, got.Close())
|
||||
|
||||
_, err = os.Stat(path)
|
||||
assert.True(t, os.IsNotExist(err), "temp file should be removed after Close, got err=%v", err)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user