From 4579d11abc99db7e8be462516749b99a9bfb30d3 Mon Sep 17 00:00:00 2001 From: William Bates <148372411+will-bates11@users.noreply.github.com> Date: Wed, 13 May 2026 13:44:18 -0400 Subject: [PATCH] fix: detect compressed kernel modules (.ko.gz, .ko.xz, .ko.zst) (#4740) * fix: detect compressed kernel modules (.ko.gz, .ko.xz, .ko.zst) The linux-kernel-cataloger only matched plain *.ko files, missing compressed modules produced when CONFIG_MODULE_COMPRESS is enabled (common on Debian 13 / Ubuntu 24.04+). This resulted in near-zero module packages being reported for such filesystems. Changes: - Add *.ko.gz, *.ko.xz, *.ko.zst glob patterns to both the cataloger and capabilities.yaml so the file resolver picks up compressed modules - Add decompressedModuleReader() which detects the extension and transparently decompresses via compress/gzip, ulikunitz/xz, or klauspost/compress/zstd before handing the ELF bytes to the existing parseLinuxKernelModuleMetadata parser - Promote github.com/klauspost/compress from indirect to direct dependency - Add unit tests covering all three compression formats plus the uncompressed baseline, using a programmatically generated minimal ELF Fixes #4721 Signed-off-by: Will Bates * address reading archives into memory Signed-off-by: Alex Goodman --------- Signed-off-by: Will Bates Signed-off-by: Alex Goodman Co-authored-by: Will Bates Co-authored-by: Alex Goodman --- go.mod | 2 +- syft/pkg/cataloger/kernel/capabilities.yaml | 5 +- syft/pkg/cataloger/kernel/cataloger.go | 5 +- .../kernel/parse_linux_kernel_module_file.go | 101 ++++++- .../parse_linux_kernel_module_file_test.go | 285 ++++++++++++++++++ 5 files changed, 394 insertions(+), 4 deletions(-) create mode 100644 syft/pkg/cataloger/kernel/parse_linux_kernel_module_file_test.go diff --git a/go.mod b/go.mod index a7e4c86d8..962075add 100644 --- a/go.mod +++ b/go.mod @@ -62,6 +62,7 @@ require ( github.com/jedib0t/go-pretty/v6 v6.7.8 github.com/jinzhu/copier v0.4.0 github.com/kastenhq/goversion v0.0.0-20230811215019-93b2f8823953 + github.com/klauspost/compress v1.18.5 github.com/magiconair/properties v1.8.10 github.com/mholt/archives v0.1.5 github.com/moby/sys/mountinfo v0.7.2 @@ -215,7 +216,6 @@ require ( github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect - github.com/klauspost/compress v1.18.5 // indirect github.com/klauspost/cpuid/v2 v2.3.0 // indirect github.com/klauspost/pgzip v1.2.6 // indirect github.com/kr/pretty v0.3.1 // indirect diff --git a/syft/pkg/cataloger/kernel/capabilities.yaml b/syft/pkg/cataloger/kernel/capabilities.yaml index 5888f515a..c6e8afe1e 100644 --- a/syft/pkg/cataloger/kernel/capabilities.yaml +++ b/syft/pkg/cataloger/kernel/capabilities.yaml @@ -4,7 +4,7 @@ configs: # AUTO-GENERATED - config structs and their fields kernel.LinuxKernelCatalogerConfig: fields: - key: CatalogModules - description: CatalogModules enables cataloging linux kernel modules (`*.ko` files) in addition to the kernel itself. + description: CatalogModules enables cataloging linux kernel modules (`*.ko` and compressed `*.ko.gz`, `*.ko.xz`, `*.ko.zst` files) in addition to the kernel itself. app_key: linux-kernel.catalog-modules catalogers: - ecosystem: linux # MANUAL @@ -36,6 +36,9 @@ catalogers: - '**/zImage' - '**/zImage-*' - '**/lib/modules/**/*.ko' + - '**/lib/modules/**/*.ko.gz' + - '**/lib/modules/**/*.ko.xz' + - '**/lib/modules/**/*.ko.zst' metadata_types: # AUTO-GENERATED - pkg.LinuxKernel - pkg.LinuxKernelModule diff --git a/syft/pkg/cataloger/kernel/cataloger.go b/syft/pkg/cataloger/kernel/cataloger.go index c9ddcb4ec..4ea830575 100644 --- a/syft/pkg/cataloger/kernel/cataloger.go +++ b/syft/pkg/cataloger/kernel/cataloger.go @@ -17,7 +17,7 @@ import ( var _ pkg.Cataloger = (*linuxKernelCataloger)(nil) type LinuxKernelCatalogerConfig struct { - // CatalogModules enables cataloging linux kernel modules (`*.ko` files) in addition to the kernel itself. + // CatalogModules enables cataloging linux kernel modules (`*.ko` and compressed `*.ko.gz`, `*.ko.xz`, `*.ko.zst` files) in addition to the kernel itself. // app-config: linux-kernel.catalog-modules CatalogModules bool `yaml:"catalog-modules" json:"catalog-modules" mapstructure:"catalog-modules"` } @@ -47,6 +47,9 @@ var kernelArchiveGlobs = []string{ var kernelModuleGlobs = []string{ "**/lib/modules/**/*.ko", + "**/lib/modules/**/*.ko.gz", + "**/lib/modules/**/*.ko.xz", + "**/lib/modules/**/*.ko.zst", } // NewLinuxKernelCataloger returns a new kernel files cataloger object. diff --git a/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file.go b/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file.go index 3e3e061e8..064c8c3ce 100644 --- a/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file.go +++ b/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file.go @@ -3,9 +3,16 @@ package kernel import ( "context" "debug/elf" + "errors" "fmt" + "io" + "os" "strings" + "github.com/mholt/archives" + + intfile "github.com/anchore/syft/internal/file" + "github.com/anchore/syft/internal/tmpdir" "github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/internal/unionreader" @@ -20,7 +27,14 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic if err != nil { return nil, nil, fmt.Errorf("unable to get union reader for file: %w", err) } - metadata, err := parseLinuxKernelModuleMetadata(unionReader) + + moduleReader, err := decompressedModuleReader(ctx, reader.RealPath, unionReader) + if err != nil { + return nil, nil, fmt.Errorf("unable to decompress kernel module %q: %w", reader.RealPath, err) + } + defer moduleReader.Close() + + metadata, err := parseLinuxKernelModuleMetadata(moduleReader) if err != nil { return nil, nil, fmt.Errorf("unable to parse kernel module metadata: %w", err) } @@ -39,6 +53,91 @@ func parseLinuxKernelModuleFile(ctx context.Context, _ file.Resolver, _ *generic }, nil, nil } +// decompressedModuleReader returns a UnionReader over the decompressed contents of the kernel module +// when the path indicates compression (e.g. .ko.gz, .ko.xz, .ko.zst). For plain .ko files the original +// reader is returned unchanged. ELF parsing requires random access (io.ReaderAt + io.Seeker), so +// compressed streams are spilled to a temp file rather than buffered in memory — kernel modules can be +// tens of MB decompressed and large numbers of them are scanned per cataloger run. The caller owns +// the returned reader and must Close it; the underlying reader (r) is not closed by Close on the +// passthrough path — its lifecycle is the caller's. +func decompressedModuleReader(ctx context.Context, path string, r unionreader.UnionReader) (unionreader.UnionReader, error) { + // fast path: plain .ko files don't need format sniffing + if strings.HasSuffix(path, ".ko") { + return &nopCloseUnionReader{UnionReader: r}, nil + } + + format, stream, err := intfile.IdentifyArchive(ctx, path, r) + if err != nil { + if errors.Is(err, archives.NoMatch) { + return passthrough(r) + } + return nil, fmt.Errorf("unable to identify compression format: %w", err) + } + + decompressor, ok := format.(archives.Decompressor) + if !ok { + // not a single-stream compressed format (e.g. a tar/zip archive); treat as a plain .ko + return passthrough(r) + } + + rc, err := decompressor.OpenReader(stream) + if err != nil { + return nil, fmt.Errorf("unable to open %s decompression stream: %w", format.Extension(), err) + } + defer rc.Close() + + td := tmpdir.FromContext(ctx) + if td == nil { + return nil, fmt.Errorf("no temp dir factory in context") + } + tempFile, fileCleanup, err := td.NewFile("syft-kmod-*.ko") //nolint:gocritic // cleanup outlives this function — runs from tempFileUnionReader.Close on the returned reader + if err != nil { + fileCleanup() + return nil, fmt.Errorf("unable to create temp file for decompressed kernel module: %w", err) + } + tfr := &tempFileUnionReader{File: tempFile, cleanup: fileCleanup} + + if _, err := io.Copy(tempFile, rc); err != nil { + _ = tfr.Close() + return nil, fmt.Errorf("unable to write decompressed kernel module: %w", err) + } + if _, err := tempFile.Seek(0, io.SeekStart); err != nil { + _ = tfr.Close() + return nil, fmt.Errorf("unable to rewind decompressed kernel module: %w", err) + } + + return tfr, nil +} + +// passthrough returns the original reader rewound to offset 0. IdentifyArchive consumes bytes to +// sniff magic; we rewind explicitly so callers don't have to reason about the seeker's position. +func passthrough(r unionreader.UnionReader) (unionreader.UnionReader, error) { + if _, err := r.Seek(0, io.SeekStart); err != nil { + return nil, fmt.Errorf("unable to rewind reader after format sniff: %w", err) + } + return &nopCloseUnionReader{UnionReader: r}, nil +} + +// nopCloseUnionReader wraps a UnionReader so that Close is a no-op. used on the passthrough path +// where the underlying reader's lifecycle is owned by the caller, not by us. +type nopCloseUnionReader struct { + unionreader.UnionReader +} + +func (*nopCloseUnionReader) Close() error { return nil } + +// tempFileUnionReader is a UnionReader backed by a temp file; Close closes the file and removes it. +type tempFileUnionReader struct { + *os.File + cleanup func() +} + +func (t *tempFileUnionReader) Close() error { + err := t.File.Close() + t.cleanup() + return err +} + func parseLinuxKernelModuleMetadata(r unionreader.UnionReader) (p *pkg.LinuxKernelModule, err error) { // filename: /lib/modules/5.15.0-1031-aws/kernel/zfs/zzstd.ko // version: 1.4.5a diff --git a/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file_test.go b/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file_test.go new file mode 100644 index 000000000..cb99485bc --- /dev/null +++ b/syft/pkg/cataloger/kernel/parse_linux_kernel_module_file_test.go @@ -0,0 +1,285 @@ +package kernel + +import ( + "bytes" + "compress/gzip" + "context" + "debug/elf" + "encoding/binary" + "io" + "os" + "testing" + + "github.com/klauspost/compress/zstd" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "github.com/ulikunitz/xz" + + "github.com/anchore/syft/internal/tmpdir" + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +func testContext(t *testing.T) context.Context { + t.Helper() + td := tmpdir.FromPath(t.TempDir()) + return tmpdir.WithValue(context.Background(), td) +} + +// minimalKOBytes constructs a minimal ELF64 LE relocatable object with a .modinfo +// section containing the given null-terminated key=value entries. +func minimalKOBytes(entries []string) []byte { + // .modinfo section: each entry is key=value\0 + var modinfo bytes.Buffer + for _, e := range entries { + modinfo.WriteString(e) + modinfo.WriteByte(0) + } + + // section header string table — embeds names of all sections back-to-back, leading null required. + // offsets below index into this blob. + shstrtab := []byte("\x00.modinfo\x00.shstrtab\x00") + const ( + modinfoNameOff uint32 = 1 + shstrtabNameOff uint32 = 10 + ) + + const ( + ehdrSize uint64 = 64 + shdrSize uint64 = 64 + numSections uint16 = 3 // null + .modinfo + .shstrtab + ) + + // layout: [ehdr][modinfo][shstrtab][pad to 8][section headers] + var ( + modinfoOff = ehdrSize + modinfoSize = uint64(modinfo.Len()) + shstrtabOff = modinfoOff + modinfoSize + shstrtabSize = uint64(len(shstrtab)) + shdrsOff = alignUp(shstrtabOff+shstrtabSize, 8) + ) + + header := elf.Header64{ + Ident: [16]byte{ + 0x7f, 'E', 'L', 'F', + byte(elf.ELFCLASS64), + byte(elf.ELFDATA2LSB), + byte(elf.EV_CURRENT), + }, + Type: uint16(elf.ET_REL), + Machine: uint16(elf.EM_X86_64), + Version: uint32(elf.EV_CURRENT), + Shoff: shdrsOff, + Ehsize: uint16(ehdrSize), + Shentsize: uint16(shdrSize), + Shnum: numSections, + Shstrndx: numSections - 1, // .shstrtab is last + } + + sections := []elf.Section64{ + {}, // SHN_UNDEF + { + Name: modinfoNameOff, + Type: uint32(elf.SHT_PROGBITS), + Off: modinfoOff, + Size: modinfoSize, + Addralign: 1, + }, + { + Name: shstrtabNameOff, + Type: uint32(elf.SHT_STRTAB), + Off: shstrtabOff, + Size: shstrtabSize, + Addralign: 1, + }, + } + + var buf bytes.Buffer + _ = binary.Write(&buf, binary.LittleEndian, header) + buf.Write(modinfo.Bytes()) + buf.Write(shstrtab) + for uint64(buf.Len()) < shdrsOff { + buf.WriteByte(0) + } + for _, s := range sections { + _ = binary.Write(&buf, binary.LittleEndian, s) + } + return buf.Bytes() +} + +func alignUp(v, align uint64) uint64 { + if v%align == 0 { + return v + } + return v + (align - v%align) +} + +func gzCompress(data []byte) []byte { + var buf bytes.Buffer + w := gzip.NewWriter(&buf) + _, _ = w.Write(data) + _ = w.Close() + return buf.Bytes() +} + +func xzCompress(data []byte) []byte { + var buf bytes.Buffer + w, _ := xz.NewWriter(&buf) + _, _ = w.Write(data) + _ = w.Close() + return buf.Bytes() +} + +func zstCompress(data []byte) []byte { + var buf bytes.Buffer + w, _ := zstd.NewWriter(&buf) + _, _ = w.Write(data) + _ = w.Close() + return buf.Bytes() +} + +// makeLocationReadCloser wraps a byte slice as a file.LocationReadCloser with the given path. +func makeLocationReadCloser(path string, data []byte) file.LocationReadCloser { + return file.LocationReadCloser{ + Location: file.NewVirtualLocation(path, path), + ReadCloser: io.NopCloser(bytes.NewReader(data)), + } +} + +func TestParseLinuxKernelModuleFile_Compressed(t *testing.T) { + modinfo := []string{ + "name=dummy_mod", + "version=1.2.3", + "vermagic=6.1.0-rc1 SMP mod_unload", + "license=GPL v2", + } + koBytes := minimalKOBytes(modinfo) + + tests := []struct { + name string + path string + data []byte + wantName string + wantVer string + wantKV string // expected KernelVersion from vermagic + }{ + { + name: "uncompressed .ko", + path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko", + data: koBytes, + wantName: "dummy_mod", + wantVer: "1.2.3", + wantKV: "6.1.0-rc1", + }, + { + name: "gzip-compressed .ko.gz", + path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.gz", + data: gzCompress(koBytes), + wantName: "dummy_mod", + wantVer: "1.2.3", + wantKV: "6.1.0-rc1", + }, + { + name: "xz-compressed .ko.xz", + path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.xz", + data: xzCompress(koBytes), + wantName: "dummy_mod", + wantVer: "1.2.3", + wantKV: "6.1.0-rc1", + }, + { + name: "zstd-compressed .ko.zst", + path: "/lib/modules/6.1.0-rc1/kernel/dummy_mod.ko.zst", + data: zstCompress(koBytes), + wantName: "dummy_mod", + wantVer: "1.2.3", + wantKV: "6.1.0-rc1", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := makeLocationReadCloser(tt.path, tt.data) + pkgs, rels, err := parseLinuxKernelModuleFile(testContext(t), nil, &generic.Environment{}, reader) + require.NoError(t, err) + require.Len(t, pkgs, 1) + assert.Empty(t, rels) + assert.Equal(t, tt.wantName, pkgs[0].Name) + assert.Equal(t, tt.wantVer, pkgs[0].Version) + + meta, ok := pkgs[0].Metadata.(pkg.LinuxKernelModule) + require.True(t, ok) + assert.Equal(t, tt.wantKV, meta.KernelVersion) + }) + } +} + +func TestDecompressedModuleReader(t *testing.T) { + koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP mod_unload"}) + + tests := []struct { + name string + path string + data []byte + }{ + {"uncompressed", "/lib/modules/5.15.0/kernel/test.ko", koBytes}, + {"gz", "/lib/modules/5.15.0/kernel/test.ko.gz", gzCompress(koBytes)}, + {"xz", "/lib/modules/5.15.0/kernel/test.ko.xz", xzCompress(koBytes)}, + {"zst", "/lib/modules/5.15.0/kernel/test.ko.zst", zstCompress(koBytes)}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + br := bytes.NewReader(tt.data) + wrapped := struct { + io.ReadCloser + io.ReaderAt + io.Seeker + }{ + ReadCloser: io.NopCloser(br), + ReaderAt: br, + Seeker: br, + } + got, err := decompressedModuleReader(testContext(t), tt.path, wrapped) + require.NoError(t, err) + require.NotNil(t, got) + t.Cleanup(func() { _ = got.Close() }) + + b, err := io.ReadAll(got) + require.NoError(t, err) + assert.Equal(t, koBytes, b, "decompressed bytes should match original .ko bytes") + }) + } +} + +func TestDecompressedModuleReader_TempFileRemovedOnClose(t *testing.T) { + koBytes := minimalKOBytes([]string{"name=test", "vermagic=5.15.0 SMP"}) + data := gzCompress(koBytes) + + br := bytes.NewReader(data) + wrapped := struct { + io.ReadCloser + io.ReaderAt + io.Seeker + }{ + ReadCloser: io.NopCloser(br), + ReaderAt: br, + Seeker: br, + } + + got, err := decompressedModuleReader(testContext(t), "/test.ko.gz", wrapped) + require.NoError(t, err) + + tfr, ok := got.(*tempFileUnionReader) + require.True(t, ok, "expected compressed path to spill to a temp file") + path := tfr.File.Name() + + _, err = os.Stat(path) + require.NoError(t, err, "temp file should exist before Close") + + require.NoError(t, got.Close()) + + _, err = os.Stat(path) + assert.True(t, os.IsNotExist(err), "temp file should be removed after Close, got err=%v", err) +}