fix: detect compressed kernel modules (.ko.gz, .ko.xz, .ko.zst) (#4740)

* fix: detect compressed kernel modules (.ko.gz, .ko.xz, .ko.zst)

The linux-kernel-cataloger only matched plain *.ko files, missing
compressed modules produced when CONFIG_MODULE_COMPRESS is enabled
(common on Debian 13 / Ubuntu 24.04+). This resulted in near-zero
module packages being reported for such filesystems.

Changes:
- Add *.ko.gz, *.ko.xz, *.ko.zst glob patterns to both the cataloger
  and capabilities.yaml so the file resolver picks up compressed modules
- Add decompressedModuleReader() which detects the extension and
  transparently decompresses via compress/gzip, ulikunitz/xz, or
  klauspost/compress/zstd before handing the ELF bytes to the existing
  parseLinuxKernelModuleMetadata parser
- Promote github.com/klauspost/compress from indirect to direct dependency
- Add unit tests covering all three compression formats plus the
  uncompressed baseline, using a programmatically generated minimal ELF

Fixes #4721

Signed-off-by: Will Bates <william.bates11@outlook.com>

* address reading archives into memory

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Will Bates <william.bates11@outlook.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Co-authored-by: Will Bates <william.bates11@outlook.com>
Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
William Bates 2026-05-13 13:44:18 -04:00 committed by GitHub
parent 07ae2ca08d
commit 4579d11abc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 394 additions and 4 deletions

2
go.mod
View File

@ -62,6 +62,7 @@ require (
github.com/jedib0t/go-pretty/v6 v6.7.8 github.com/jedib0t/go-pretty/v6 v6.7.8
github.com/jinzhu/copier v0.4.0 github.com/jinzhu/copier v0.4.0
github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953
github.com/klauspost/compress v1.18.5
github.com/magiconair/properties v1.8.10 github.com/magiconair/properties v1.8.10
github.com/mholt/archives v0.1.5 github.com/mholt/archives v0.1.5
github.com/moby/sys/mountinfo v0.7.2 github.com/moby/sys/mountinfo v0.7.2
@ -215,7 +216,6 @@ require (
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/compress v1.18.5 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/klauspost/pgzip v1.2.6 // indirect github.com/klauspost/pgzip v1.2.6 // indirect
github.com/kr/pretty v0.3.1 // indirect github.com/kr/pretty v0.3.1 // indirect

View File

@ -4,7 +4,7 @@ configs: # AUTO-GENERATED - config structs and their fields
kernel.LinuxKernelCatalogerConfig: kernel.LinuxKernelCatalogerConfig:
fields: fields:
- key: CatalogModules - key: CatalogModules
description: CatalogModules enables cataloging linux kernel modules (`*.ko` files) in addition to the kernel itself. description: CatalogModules enables cataloging linux kernel modules (`*.ko` and compressed `*.ko.gz`, `*.ko.xz`, `*.ko.zst` files) in addition to the kernel itself.
app_key: linux-kernel.catalog-modules app_key: linux-kernel.catalog-modules
catalogers: catalogers:
- ecosystem: linux # MANUAL - ecosystem: linux # MANUAL
@ -36,6 +36,9 @@ catalogers:
- '**/zImage' - '**/zImage'
- '**/zImage-*' - '**/zImage-*'
- '**/lib/modules/**/*.ko' - '**/lib/modules/**/*.ko'
- '**/lib/modules/**/*.ko.gz'
- '**/lib/modules/**/*.ko.xz'
- '**/lib/modules/**/*.ko.zst'
metadata_types: # AUTO-GENERATED metadata_types: # AUTO-GENERATED
- pkg.LinuxKernel - pkg.LinuxKernel
- pkg.LinuxKernelModule - pkg.LinuxKernelModule

View File

@ -17,7 +17,7 @@ import (
var _ pkg.Cataloger = (*linuxKernelCataloger)(nil) var _ pkg.Cataloger = (*linuxKernelCataloger)(nil)
type LinuxKernelCatalogerConfig struct { type LinuxKernelCatalogerConfig struct {
// CatalogModules enables cataloging linux kernel modules (`*.ko` files) in addition to the kernel itself. // CatalogModules enables cataloging linux kernel modules (`*.ko` and compressed `*.ko.gz`, `*.ko.xz`, `*.ko.zst` files) in addition to the kernel itself.
// app-config: linux-kernel.catalog-modules // app-config: linux-kernel.catalog-modules
CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"` CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"`
} }
@ -47,6 +47,9 @@ var kernelArchiveGlobs = []string{
var kernelModuleGlobs = []string{ var kernelModuleGlobs = []string{
"**/lib/modules/**/*.ko", "**/lib/modules/**/*.ko",
"**/lib/modules/**/*.ko.gz",
"**/lib/modules/**/*.ko.xz",
"**/lib/modules/**/*.ko.zst",
} }
// NewLinuxKernelCataloger returns a new kernel files cataloger object. // NewLinuxKernelCataloger returns a new kernel files cataloger object.

View File

@ -3,9 +3,16 @@ package kernel
import ( import (
"context" "context"
"debug/elf" "debug/elf"
"errors"
"fmt" "fmt"
"io"
"os"
"strings" "strings"
"github.com/mholt/archives"
intfile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader" "github.com/anchore/syft/syft/internal/unionreader"
@ -20,7 +27,14 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err) return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err)
} }
metadata, err := parseLinuxKernelModuleMetadata(unionReader)
moduleReader, err := decompressedModuleReader(ctx, reader.RealPath, unionReader)
if err != nil {
return nil, nil, fmt.Errorf("unable to decompress kernel module %q: %w", reader.RealPath, err)
}
defer moduleReader.Close()
metadata, err := parseLinuxKernelModuleMetadata(moduleReader)
if err != nil { if err != nil {
return nil, nil, fmt.Errorf("unable to parse kernel module metadata: %w", err) return nil, nil, fmt.Errorf("unable to parse kernel module metadata: %w", err)
} }
@ -39,6 +53,91 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic
}, nil, nil }, nil, nil
} }
// decompressedModuleReader returns a UnionReader over the decompressed contents of the kernel module
// when the path indicates compression (e.g. .ko.gz, .ko.xz, .ko.zst). For plain .ko files the original
// reader is returned unchanged. ELF parsing requires random access (io.ReaderAt + io.Seeker), so
// compressed streams are spilled to a temp file rather than buffered in memory — kernel modules can be
// tens of MB decompressed and large numbers of them are scanned per cataloger run. The caller owns
// the returned reader and must Close it; the underlying reader (r) is not closed by Close on the
// passthrough path — its lifecycle is the caller's.
func decompressedModuleReader(ctx context.Context, path string, r unionreader.UnionReader) (unionreader.UnionReader, error) {
// fast path: plain .ko files don't need format sniffing
if strings.HasSuffix(path, ".ko") {
return &nopCloseUnionReader{UnionReader: r}, nil
}
format, stream, err := intfile.IdentifyArchive(ctx, path, r)
if err != nil {
if errors.Is(err, archives.NoMatch) {
return passthrough(r)
}
return nil, fmt.Errorf("unable to identify compression format: %w", err)
}
decompressor, ok := format.(archives.Decompressor)
if !ok {
// not a single-stream compressed format (e.g. a tar/zip archive); treat as a plain .ko
return passthrough(r)
}
rc, err := decompressor.OpenReader(stream)
if err != nil {
return nil, fmt.Errorf("unable to open %s decompression stream: %w", format.Extension(), err)
}
defer rc.Close()
td := tmpdir.FromContext(ctx)
if td == nil {
return nil, fmt.Errorf("no temp dir factory in context")
}
tempFile, fileCleanup, err := td.NewFile("syft-kmod-*.ko") //nolint:gocritic // cleanup outlives this function — runs from tempFileUnionReader.Close on the returned reader
if err != nil {
fileCleanup()
return nil, fmt.Errorf("unable to create temp file for decompressed kernel module: %w", err)
}
tfr := &tempFileUnionReader{File: tempFile, cleanup: fileCleanup}
if _, err := io.Copy(tempFile, rc); err != nil {
_ = tfr.Close()
return nil, fmt.Errorf("unable to write decompressed kernel module: %w", err)
}
if _, err := tempFile.Seek(0, io.SeekStart); err != nil {
_ = tfr.Close()
return nil, fmt.Errorf("unable to rewind decompressed kernel module: %w", err)
}
return tfr, nil
}
// passthrough returns the original reader rewound to offset 0. IdentifyArchive consumes bytes to
// sniff magic; we rewind explicitly so callers don't have to reason about the seeker's position.
func passthrough(r unionreader.UnionReader) (unionreader.UnionReader, error) {
if _, err := r.Seek(0, io.SeekStart); err != nil {
return nil, fmt.Errorf("unable to rewind reader after format sniff: %w", err)
}
return &nopCloseUnionReader{UnionReader: r}, nil
}
// nopCloseUnionReader wraps a UnionReader so that Close is a no-op. used on the passthrough path
// where the underlying reader's lifecycle is owned by the caller, not by us.
type nopCloseUnionReader struct {
unionreader.UnionReader
}
func (*nopCloseUnionReader) Close() error { return nil }
// tempFileUnionReader is a UnionReader backed by a temp file; Close closes the file and removes it.
type tempFileUnionReader struct {
*os.File
cleanup func()
}
func (t *tempFileUnionReader) Close() error {
err := t.File.Close()
t.cleanup()
return err
}
func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) { func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) {
// filename: /lib/modules/5.15.0-1031-aws/kernel/zfs/zzstd.ko // filename: /lib/modules/5.15.0-1031-aws/kernel/zfs/zzstd.ko
// version: 1.4.5a // version: 1.4.5a

View File

@ -0,0 +1,285 @@
package kernel
import (
"bytes"
"compress/gzip"
"context"
"debug/elf"
"encoding/binary"
"io"
"os"
"testing"
"github.com/klauspost/compress/zstd"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/ulikunitz/xz"
"github.com/anchore/syft/internal/tmpdir"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
func testContext(t *testing.T) context.Context {
t.Helper()
td := tmpdir.FromPath(t.TempDir())
return tmpdir.WithValue(context.Background(), td)
}
// minimalKOBytes constructs a minimal ELF64 LE relocatable object with a .modinfo
// section containing the given null-terminated key=value entries.
func minimalKOBytes(entries []string) []byte {
// .modinfo section: each entry is key=value\0
var modinfo bytes.Buffer
for _, e := range entries {
modinfo.WriteString(e)
modinfo.WriteByte(0)
}
// section header string table — embeds names of all sections back-to-back, leading null required.
// offsets below index into this blob.
shstrtab := []byte("\x00.modinfo\x00.shstrtab\x00")
const (
modinfoNameOff uint32 = 1
shstrtabNameOff uint32 = 10
)
const (
ehdrSize uint64 = 64
shdrSize uint64 = 64
numSections uint16 = 3 // null + .modinfo + .shstrtab
)
// layout: [ehdr][modinfo][shstrtab][pad to 8][section headers]
var (
modinfoOff = ehdrSize
modinfoSize = uint64(modinfo.Len())
shstrtabOff = modinfoOff + modinfoSize
shstrtabSize = uint64(len(shstrtab))
shdrsOff = alignUp(shstrtabOff+shstrtabSize, 8)
)
header := elf.Header64{
Ident: [16]byte{
0x7f, 'E', 'L', 'F',
byte(elf.ELFCLASS64),
byte(elf.ELFDATA2LSB),
byte(elf.EV_CURRENT),
},
Type: uint16(elf.ET_REL),
Machine: uint16(elf.EM_X86_64),
Version: uint32(elf.EV_CURRENT),
Shoff: shdrsOff,
Ehsize: uint16(ehdrSize),
Shentsize: uint16(shdrSize),
Shnum: numSections,
Shstrndx: numSections - 1, // .shstrtab is last
}
sections := []elf.Section64{
{}, // SHN_UNDEF
{
Name: modinfoNameOff,
Type: uint32(elf.SHT_PROGBITS),
Off: modinfoOff,
Size: modinfoSize,
Addralign: 1,
},
{
Name: shstrtabNameOff,
Type: uint32(elf.SHT_STRTAB),
Off: shstrtabOff,
Size: shstrtabSize,
Addralign: 1,
},
}
var buf bytes.Buffer
_ = binary.Write(&buf, binary.LittleEndian, header)
buf.Write(modinfo.Bytes())
buf.Write(shstrtab)
for uint64(buf.Len()) < shdrsOff {
buf.WriteByte(0)
}
for _, s := range sections {
_ = binary.Write(&buf, binary.LittleEndian, s)
}
return buf.Bytes()
}
func alignUp(v, align uint64) uint64 {
if v%align == 0 {
return v
}
return v + (align - v%align)
}
func gzCompress(data []byte) []byte {
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
_, _ = w.Write(data)
_ = w.Close()
return buf.Bytes()
}
func xzCompress(data []byte) []byte {
var buf bytes.Buffer
w, _ := xz.NewWriter(&buf)
_, _ = w.Write(data)
_ = w.Close()
return buf.Bytes()
}
func zstCompress(data []byte) []byte {
var buf bytes.Buffer
w, _ := zstd.NewWriter(&buf)
_, _ = w.Write(data)
_ = w.Close()
return buf.Bytes()
}
// makeLocationReadCloser wraps a byte slice as a file.LocationReadCloser with the given path.
func makeLocationReadCloser(path string, data []byte) file.LocationReadCloser {
return file.LocationReadCloser{
Location: file.NewVirtualLocation(path, path),
ReadCloser: io.NopCloser(bytes.NewReader(data)),
}
}
func TestParseLinuxKernelModuleFile_Compressed(t *testing.T) {
modinfo := []string{
"name=dummy_mod",
"version=1.2.3",
"vermagic=6.1.0-rc1 SMP mod_unload",
"license=GPL v2",
}
koBytes := minimalKOBytes(modinfo)
tests := []struct {
name string
path string
data []byte
wantName string
wantVer string
wantKV string // expected KernelVersion from vermagic
}{
{
name: "uncompressed .ko",
path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko",
data: koBytes,
wantName: "dummy_mod",
wantVer: "1.2.3",
wantKV: "6.1.0-rc1",
},
{
name: "gzip-compressed .ko.gz",
path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.gz",
data: gzCompress(koBytes),
wantName: "dummy_mod",
wantVer: "1.2.3",
wantKV: "6.1.0-rc1",
},
{
name: "xz-compressed .ko.xz",
path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.xz",
data: xzCompress(koBytes),
wantName: "dummy_mod",
wantVer: "1.2.3",
wantKV: "6.1.0-rc1",
},
{
name: "zstd-compressed .ko.zst",
path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.zst",
data: zstCompress(koBytes),
wantName: "dummy_mod",
wantVer: "1.2.3",
wantKV: "6.1.0-rc1",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := makeLocationReadCloser(tt.path, tt.data)
pkgs, rels, err := parseLinuxKernelModuleFile(testContext(t), nil, &generic.Environment{}, reader)
require.NoError(t, err)
require.Len(t, pkgs, 1)
assert.Empty(t, rels)
assert.Equal(t, tt.wantName, pkgs[0].Name)
assert.Equal(t, tt.wantVer, pkgs[0].Version)
meta, ok := pkgs[0].Metadata.(pkg.LinuxKernelModule)
require.True(t, ok)
assert.Equal(t, tt.wantKV, meta.KernelVersion)
})
}
}
func TestDecompressedModuleReader(t *testing.T) {
koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP mod_unload"})
tests := []struct {
name string
path string
data []byte
}{
{"uncompressed", "/lib/modules/5.15.0/kernel/test.ko", koBytes},
{"gz", "/lib/modules/5.15.0/kernel/test.ko.gz", gzCompress(koBytes)},
{"xz", "/lib/modules/5.15.0/kernel/test.ko.xz", xzCompress(koBytes)},
{"zst", "/lib/modules/5.15.0/kernel/test.ko.zst", zstCompress(koBytes)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
br := bytes.NewReader(tt.data)
wrapped := struct {
io.ReadCloser
io.ReaderAt
io.Seeker
}{
ReadCloser: io.NopCloser(br),
ReaderAt: br,
Seeker: br,
}
got, err := decompressedModuleReader(testContext(t), tt.path, wrapped)
require.NoError(t, err)
require.NotNil(t, got)
t.Cleanup(func() { _ = got.Close() })
b, err := io.ReadAll(got)
require.NoError(t, err)
assert.Equal(t, koBytes, b, "decompressed bytes should match original .ko bytes")
})
}
}
func TestDecompressedModuleReader_TempFileRemovedOnClose(t *testing.T) {
koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP"})
data := gzCompress(koBytes)
br := bytes.NewReader(data)
wrapped := struct {
io.ReadCloser
io.ReaderAt
io.Seeker
}{
ReadCloser: io.NopCloser(br),
ReaderAt: br,
Seeker: br,
}
got, err := decompressedModuleReader(testContext(t), "/test.ko.gz", wrapped)
require.NoError(t, err)
tfr, ok := got.(*tempFileUnionReader)
require.True(t, ok, "expected compressed path to spill to a temp file")
path := tfr.File.Name()
_, err = os.Stat(path)
require.NoError(t, err, "temp file should exist before Close")
require.NoError(t, got.Close())
_, err = os.Stat(path)
assert.True(t, os.IsNotExist(err), "temp file should be removed after Close, got err=%v", err)
}