chore: call cleanup on tmpfile and replace some io.ReadAlls with streams (#4629)

* fix(deb and snaps): prevent excess reads

Previously, Syft could allocate excess memory or tempfile space if there
were highly compressed objects in deb archives, or at paths where the
kernel changelog was expected by the snap cataloger. Use io.LimitReaders
for extracting parts of deb archives, and refactor the snap cataloger's
reading of the kernel changelog to use a streaming parsing, eliminating
the possibility of excess allocation.

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* fix: always cleanup temp file from file source

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

* use streaming strategy for deb archives

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>

---------

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
This commit is contained in:
Will Murphy 2026-02-17 17:32:35 -05:00 committed by GitHub
parent 2fe5f9c7b8
commit 0a3f7bb06e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 509 additions and 276 deletions

View File

@ -1,6 +1,7 @@
package debian
import (
"bufio"
"io"
"regexp"
"sort"
@ -14,23 +15,32 @@ import (
// For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax
var (
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P<license>.*?\.)`)
licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
)
func parseLicensesFromCopyright(reader io.Reader) []string {
findings := strset.New()
data, err := io.ReadAll(reader)
if err != nil {
// Fail-safe: return nothing if unable to read
return []string{}
}
scanner := bufio.NewScanner(reader)
content := string(data)
lines := strings.Split(content, "\n")
for _, line := range lines {
// State machine replacing licenseFirstSentenceAfterHeadingPattern.
// That regex only matched at the start of the file: a non-empty heading,
// a line of dashes, blank lines, then text up to the first period.
const (
expectHeading = iota
expectDashes
skipBlanks
captureLicense
headingDone // matched or impossible — stop checking
)
headingState := expectHeading
var licenseText strings.Builder
for scanner.Scan() {
line := scanner.Text()
// per-line regex checks (applied to every line)
if value := findLicenseClause(licensePattern, line); value != "" {
findings.Add(value)
}
@ -40,13 +50,39 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" {
findings.Add(value)
}
}
// some copyright files have a license declaration after the heading ex:
// End User License Agreement\n--------------------------
// we want to try and find these multi-line license declarations and make exceptions for them
if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" {
findings.Add(value)
// multi-line heading detection (only at start of file)
switch headingState {
case expectHeading:
if strings.TrimSpace(line) != "" {
headingState = expectDashes
} else {
headingState = headingDone
}
case expectDashes:
trimmed := strings.TrimSpace(line)
if len(trimmed) > 0 && strings.Trim(trimmed, "-") == "" {
headingState = skipBlanks
} else {
headingState = headingDone
}
case skipBlanks:
if strings.TrimSpace(line) != "" {
headingState = captureLicense
licenseText.WriteString(line)
if value := extractUpToFirstPeriod(licenseText.String()); value != "" {
findings.Add(value)
headingState = headingDone
}
}
case captureLicense:
licenseText.WriteString(" ")
licenseText.WriteString(line)
if value := extractUpToFirstPeriod(licenseText.String()); value != "" {
findings.Add(value)
headingState = headingDone
}
}
}
results := findings.List()
@ -55,6 +91,15 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
return results
}
// extractUpToFirstPeriod returns the license text up to the first period,
// processed through ensureIsSingleLicense, or "" if no period found yet.
func extractUpToFirstPeriod(s string) string {
if idx := strings.Index(s, "."); idx >= 0 {
return ensureIsSingleLicense(s[:idx+1])
}
return ""
}
func findLicenseClause(pattern *regexp.Regexp, line string) string {
valueGroup := "license"
matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line)

View File

@ -2,7 +2,6 @@ package debian
import (
"archive/tar"
"bytes"
"context"
"fmt"
"io"
@ -107,41 +106,56 @@ func processDataTar(dcReader io.ReadCloser) ([]string, error) {
func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) {
defer internal.CloseAndLogError(dcReader, "")
// Extract control, md5sums, and conffiles files from control.tar
tarReader := tar.NewReader(dcReader)
controlFileContent, md5Content, confContent, err := readControlFiles(tarReader)
if err != nil {
return nil, fmt.Errorf("failed to read control files: %w", err)
var metadata *pkg.DpkgArchiveEntry
var files []pkg.DpkgFileRecord
var confFileRecords []pkg.DpkgFileRecord
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("failed to read control tar: %w", err)
}
switch filepath.Base(header.Name) {
case "control":
// parseDpkgStatus already streams via bufio.Reader
entries, err := parseDpkgStatus(tarReader)
if err != nil {
return nil, fmt.Errorf("failed to parse control file: %w", err)
}
if len(entries) == 0 {
return nil, fmt.Errorf("no package entries found in control file")
}
entry := pkg.DpkgArchiveEntry(entries[0])
metadata = &entry
case "md5sums":
// parseDpkgMD5Info already streams via bufio.Scanner
files = parseDpkgMD5Info(tarReader)
case "conffiles":
// parseDpkgConffileInfo already streams via bufio.Scanner
confFileRecords = parseDpkgConffileInfo(tarReader)
}
}
if controlFileContent == nil {
if metadata == nil {
return nil, fmt.Errorf("control file not found in archive")
}
metadata, err := newDpkgArchiveMetadata(controlFileContent, md5Content, confContent)
if err != nil {
return nil, fmt.Errorf("failed to create package metadata: %w", err)
}
return &metadata, nil
}
func newDpkgArchiveMetadata(controlFile, md5sums, confFiles []byte) (pkg.DpkgArchiveEntry, error) {
// parse the control file to get package metadata
metadata, err := parseControlFile(string(controlFile))
if err != nil {
return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err)
}
// parse MD5 sums to get file records
var files []pkg.DpkgFileRecord
if len(md5sums) > 0 {
files = parseDpkgMD5Info(bytes.NewReader(md5sums))
}
// mark config files
if len(confFiles) > 0 {
markConfigFiles(confFiles, files)
if len(confFileRecords) > 0 && len(files) > 0 {
configPaths := make(map[string]struct{}, len(confFileRecords))
for _, cf := range confFileRecords {
configPaths[cf.Path] = struct{}{}
}
for i, f := range files {
if _, isConfig := configPaths[f.Path]; isConfig {
files[i].IsConfigFile = true
}
}
}
metadata.Files = files
@ -166,73 +180,3 @@ func decompressionStream(ctx context.Context, r io.Reader, filePath string) (io.
return rc, nil
}
// readControlFiles extracts important files from the control.tar archive
func readControlFiles(tarReader *tar.Reader) (controlFile, md5sums, conffiles []byte, err error) {
for {
header, err := tarReader.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, nil, err
}
switch filepath.Base(header.Name) {
case "control":
controlFile, err = io.ReadAll(tarReader)
if err != nil {
return nil, nil, nil, err
}
case "md5sums":
md5sums, err = io.ReadAll(tarReader)
if err != nil {
return nil, nil, nil, err
}
case "conffiles":
conffiles, err = io.ReadAll(tarReader)
if err != nil {
return nil, nil, nil, err
}
}
}
return controlFile, md5sums, conffiles, nil
}
// parseControlFile parses the content of a debian control file into package metadata
func parseControlFile(controlFileContent string) (pkg.DpkgArchiveEntry, error) {
// Reuse the existing dpkg status file parsing logic
reader := strings.NewReader(controlFileContent)
entries, err := parseDpkgStatus(reader)
if err != nil {
return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err)
}
if len(entries) == 0 {
return pkg.DpkgArchiveEntry{}, fmt.Errorf("no package entries found in control file")
}
// We expect only one entry from a .deb control file
return pkg.DpkgArchiveEntry(entries[0]), nil
}
// markConfigFiles marks files that are listed in conffiles as configuration files
func markConfigFiles(conffilesContent []byte, files []pkg.DpkgFileRecord) {
// Parse the conffiles content into DpkgFileRecord entries
confFiles := parseDpkgConffileInfo(bytes.NewReader(conffilesContent))
// Create a map for quick lookup of config files by path
configPathMap := make(map[string]struct{})
for _, confFile := range confFiles {
configPathMap[confFile.Path] = struct{}{}
}
// Mark files as config files if they're in the conffiles list
for i := range files {
if _, exists := configPathMap[files[i].Path]; exists {
files[i].IsConfigFile = true
}
}
}

View File

@ -3,31 +3,58 @@ package debian
import (
"archive/tar"
"bytes"
"io"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
func TestReadControlFiles(t *testing.T) {
func TestProcessControlTar(t *testing.T) {
tarBytes := createTestTarWithControlFiles(t)
tarReader := bytes.NewReader(tarBytes)
reader := tar.NewReader(tarReader)
controlFile, md5sums, conffiles, err := readControlFiles(reader)
metadata, err := processControlTar(io.NopCloser(bytes.NewReader(tarBytes)))
require.NoError(t, err)
assert.NotNil(t, controlFile, "expected control file to be found")
assert.NotNil(t, md5sums, "expected md5sums file to be found")
assert.NotNil(t, conffiles, "expected conffiles file to be found")
require.NotNil(t, metadata)
assert.Contains(t, string(controlFile), "Package: test-package")
assert.Contains(t, string(md5sums), "d41d8cd98f00b204e9800998ecf8427e")
assert.Contains(t, string(conffiles), "/etc/test")
assert.Equal(t, "test-package", metadata.Package)
assert.Equal(t, "1.0.0", metadata.Version)
// md5sums should have been parsed into file records
require.Len(t, metadata.Files, 1)
assert.Equal(t, "/usr/bin/test-command", metadata.Files[0].Path)
assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", metadata.Files[0].Digest.Value)
// conffiles should have marked config files
assert.True(t, metadata.Files[0].IsConfigFile, "file listed in conffiles should be marked as config")
}
func TestProcessControlTar_ConfigFileMarking(t *testing.T) {
// Create a tar where conffiles lists paths that overlap with md5sums entries
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\n"
writeTarEntry(t, tw, "control", controlContent)
md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n" +
"d41d8cd98f00b204e9800998ecf8427e etc/test/config.conf\n" +
"d41d8cd98f00b204e9800998ecf8427e usr/bin/other-command\n"
writeTarEntry(t, tw, "md5sums", md5Content)
conffilesContent := "/usr/bin/test-command\n/etc/test/config.conf\n"
writeTarEntry(t, tw, "conffiles", conffilesContent)
require.NoError(t, tw.Close())
metadata, err := processControlTar(io.NopCloser(bytes.NewReader(buf.Bytes())))
require.NoError(t, err)
require.Len(t, metadata.Files, 3)
assert.True(t, metadata.Files[0].IsConfigFile, "first file should be marked as config file")
assert.True(t, metadata.Files[1].IsConfigFile, "second file should be marked as config file")
assert.False(t, metadata.Files[2].IsConfigFile, "third file should not be marked as config file")
}
// createTestTarWithControlFiles creates a simple in-memory tar file with test control files
@ -35,108 +62,26 @@ func createTestTarWithControlFiles(t *testing.T) []byte {
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
// Add control file
controlContent := `Package: test-package
Version: 1.0.0
Architecture: all
Maintainer: Test <test@example.com>
Description: Test package
`
err := tw.WriteHeader(&tar.Header{
Name: "control",
Mode: 0644,
Size: int64(len(controlContent)),
})
require.NoError(t, err)
_, err = tw.Write([]byte(controlContent))
require.NoError(t, err)
controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\nMaintainer: Test <test@example.com>\nDescription: Test package\n"
writeTarEntry(t, tw, "control", controlContent)
// Add md5sums file
md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n"
err = tw.WriteHeader(&tar.Header{
Name: "md5sums",
Mode: 0644,
Size: int64(len(md5Content)),
})
require.NoError(t, err)
_, err = tw.Write([]byte(md5Content))
require.NoError(t, err)
writeTarEntry(t, tw, "md5sums", md5Content)
// Add conffiles file
conffilesContent := "/etc/test/config.conf\n"
err = tw.WriteHeader(&tar.Header{
Name: "conffiles",
Mode: 0644,
Size: int64(len(conffilesContent)),
})
require.NoError(t, err)
_, err = tw.Write([]byte(conffilesContent))
require.NoError(t, err)
// Close the tar writer
err = tw.Close()
require.NoError(t, err)
conffilesContent := "/usr/bin/test-command\n"
writeTarEntry(t, tw, "conffiles", conffilesContent)
require.NoError(t, tw.Close())
return buf.Bytes()
}
func TestMarkConfigFiles(t *testing.T) {
// Create test data
conffilesContent := []byte("/usr/bin/test-command\n/etc/test/config.conf\n")
files := []pkg.DpkgFileRecord{
{
Path: "/usr/bin/test-command",
Digest: &file.Digest{
Algorithm: "md5",
Value: "d41d8cd98f00b204e9800998ecf8427e",
},
},
{
Path: "/etc/test/config.conf",
Digest: &file.Digest{
Algorithm: "md5",
Value: "d41d8cd98f00b204e9800998ecf8427e",
},
},
{
Path: "/usr/bin/other-command",
Digest: &file.Digest{
Algorithm: "md5",
Value: "d41d8cd98f00b204e9800998ecf8427e",
},
},
}
markConfigFiles(conffilesContent, files)
assert.True(t, files[0].IsConfigFile, "first file should be marked as config file")
assert.True(t, files[1].IsConfigFile, "second file should be marked as config file")
assert.False(t, files[2].IsConfigFile, "third file should not be marked as config file")
}
func TestParseControlFile(t *testing.T) {
controlContent := `Package: test-package
Version: 1.2.3-4
Architecture: amd64
Maintainer: Test User <test@example.com>
Installed-Size: 1234
Depends: libc6, libtest
Description: This is a test package
More description text
And even more details
`
metadata, err := parseControlFile(controlContent)
func writeTarEntry(t *testing.T, tw *tar.Writer, name, content string) {
t.Helper()
require.NoError(t, tw.WriteHeader(&tar.Header{
Name: name,
Mode: 0644,
Size: int64(len(content)),
}))
_, err := tw.Write([]byte(content))
require.NoError(t, err)
assert.Equal(t, "test-package", metadata.Package)
assert.Equal(t, "1.2.3-4", metadata.Version)
assert.Equal(t, "amd64", metadata.Architecture)
assert.Equal(t, "Test User <test@example.com>", metadata.Maintainer)
assert.Equal(t, 1234, metadata.InstalledSize)
assert.Contains(t, metadata.Description, "This is a test package")
assert.Len(t, metadata.Depends, 2)
assert.Contains(t, metadata.Depends, "libc6")
assert.Contains(t, metadata.Depends, "libtest")
}

View File

@ -1,10 +1,10 @@
package snap
import (
"bufio"
"compress/gzip"
"context"
"fmt"
"io"
"regexp"
"strings"
@ -22,16 +22,28 @@ type kernelVersionInfo struct {
majorVersion string // e.g., "5.4"
}
// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version
// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version.
// The changelog is gzip-compressed and may be very large, so we stream it line-by-line
// rather than reading it entirely into memory.
func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
// The file should be gzipped
lines, err := readChangelogLines(reader)
gzReader, err := gzip.NewReader(reader)
if err != nil {
return nil, nil, err
return nil, nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err)
}
defer gzReader.Close()
scanner := bufio.NewScanner(gzReader)
// read the first line to extract kernel version
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
if !scanner.Scan() {
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("failed to read changelog content: %w", err)
}
return nil, nil, fmt.Errorf("changelog file is empty")
}
// pull from first line
versionInfo, err := extractKernelVersion(lines[0])
versionInfo, err := extractKernelVersion(scanner.Text())
if err != nil {
return nil, nil, err
}
@ -42,38 +54,21 @@ func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environ
packages := createMainKernelPackage(versionInfo, snapMetadata, reader.Location)
// Check for base kernel package
basePackage := findBaseKernelPackage(lines, versionInfo, snapMetadata, reader.Location)
if basePackage != nil {
packages = append(packages, *basePackage)
// stream remaining lines looking for the base kernel entry
baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/"))
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, baseKernelEntry) {
if basePackage := parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, reader.Location); basePackage != nil {
packages = append(packages, *basePackage)
}
break
}
}
return packages, nil, nil
}
// readChangelogLines reads and decompresses the changelog content
func readChangelogLines(reader file.LocationReadCloser) ([]string, error) {
gzReader, err := gzip.NewReader(reader)
if err != nil {
return nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err)
}
defer gzReader.Close()
content, err := io.ReadAll(gzReader)
if err != nil {
return nil, fmt.Errorf("failed to read changelog content: %w", err)
}
lines := strings.Split(string(content), "\n")
if len(lines) == 0 {
return nil, fmt.Errorf("changelog file is empty")
}
// Parse the first line to extract kernel version information
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
return lines, nil
}
// extractKernelVersion parses version information from the first changelog line
func extractKernelVersion(firstLine string) (*kernelVersionInfo, error) {
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
@ -117,19 +112,6 @@ func createMainKernelPackage(versionInfo *kernelVersionInfo, snapMetadata pkg.Sn
return []pkg.Package{kernelPkg}
}
// findBaseKernelPackage searches for and creates base kernel package if present
func findBaseKernelPackage(lines []string, versionInfo *kernelVersionInfo, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package {
baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/"))
for _, line := range lines {
if strings.Contains(line, baseKernelEntry) {
return parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, location)
}
}
return nil
}
// parseBaseKernelLine extracts base kernel version from a changelog line
func parseBaseKernelLine(line string, majorVersion string, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package {
baseKernelRegex := regexp.MustCompile(fmt.Sprintf(`(%s-[0-9]+)\.?[0-9]*`, regexp.QuoteMeta(majorVersion)))

View File

@ -0,0 +1,312 @@
package snap
import (
"bytes"
"compress/gzip"
"context"
"io"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
)
func gzipContent(t *testing.T, content string) []byte {
t.Helper()
var buf bytes.Buffer
w := gzip.NewWriter(&buf)
_, err := w.Write([]byte(content))
require.NoError(t, err)
require.NoError(t, w.Close())
return buf.Bytes()
}
func locationReadCloser(t *testing.T, data []byte) file.LocationReadCloser {
t.Helper()
return file.LocationReadCloser{
Location: file.NewLocation("test-fixtures/changelog.Debian.gz"),
ReadCloser: io.NopCloser(bytes.NewReader(data)),
}
}
func TestExtractKernelVersion(t *testing.T) {
tests := []struct {
name string
firstLine string
expected *kernelVersionInfo
expectError string
}{
{
name: "standard focal kernel",
firstLine: "linux (5.4.0-195.215) focal; urgency=medium",
expected: &kernelVersionInfo{
baseVersion: "5.4.0-195",
releaseVersion: "215",
fullVersion: "5.4.0-195.215",
majorVersion: "5.4",
},
},
{
name: "noble kernel 6.x",
firstLine: "linux (6.8.0-50.51) noble; urgency=medium",
expected: &kernelVersionInfo{
baseVersion: "6.8.0-50",
releaseVersion: "51",
fullVersion: "6.8.0-50.51",
majorVersion: "6.8",
},
},
{
name: "jammy kernel",
firstLine: "linux (5.15.0-130.140) jammy; urgency=medium",
expected: &kernelVersionInfo{
baseVersion: "5.15.0-130",
releaseVersion: "140",
fullVersion: "5.15.0-130.140",
majorVersion: "5.15",
},
},
{
name: "empty string",
firstLine: "",
expectError: "could not parse kernel version from changelog",
},
{
name: "no version match",
firstLine: "not a valid changelog line",
expectError: "could not parse kernel version from changelog",
},
{
name: "missing release version",
firstLine: "linux (5.4.0-195) focal; urgency=medium",
expectError: "could not parse kernel version from changelog",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := extractKernelVersion(tt.firstLine)
if tt.expectError != "" {
require.Error(t, err)
assert.Contains(t, err.Error(), tt.expectError)
return
}
require.NoError(t, err)
assert.Equal(t, tt.expected.baseVersion, result.baseVersion)
assert.Equal(t, tt.expected.releaseVersion, result.releaseVersion)
assert.Equal(t, tt.expected.fullVersion, result.fullVersion)
assert.Equal(t, tt.expected.majorVersion, result.majorVersion)
})
}
}
func TestCreateMainKernelPackage(t *testing.T) {
location := file.NewLocation("test-fixtures/changelog.Debian.gz")
versionInfo := &kernelVersionInfo{
baseVersion: "5.4.0-195",
releaseVersion: "215",
fullVersion: "5.4.0-195.215",
majorVersion: "5.4",
}
snapMetadata := pkg.SnapEntry{
SnapType: pkg.SnapTypeKernel,
}
packages := createMainKernelPackage(versionInfo, snapMetadata, location)
require.Len(t, packages, 1)
p := packages[0]
assert.Equal(t, "linux-image-5.4.0-195-generic", p.Name)
assert.Equal(t, "5.4.0-195.215", p.Version)
assert.Equal(t, pkg.DebPkg, p.Type)
metadata, ok := p.Metadata.(pkg.SnapEntry)
require.True(t, ok)
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType)
}
func TestParseBaseKernelLine(t *testing.T) {
location := file.NewLocation("test-fixtures/changelog.Debian.gz")
snapMetadata := pkg.SnapEntry{
SnapType: pkg.SnapTypeKernel,
}
tests := []struct {
name string
line string
majorVersion string
expectNil bool
expectedName string
expectedVer string
}{
{
name: "standard base kernel entry",
line: " [ Ubuntu: 5.4-100.200 ]",
majorVersion: "5.4",
expectedName: "linux-image-5.4-100-generic",
expectedVer: "5.4-100.200",
},
{
name: "6.x base kernel entry",
line: " [ Ubuntu: 6.8-40.41 ]",
majorVersion: "6.8",
expectedName: "linux-image-6.8-40-generic",
expectedVer: "6.8-40.41",
},
{
name: "no matching version",
line: " * some random changelog text here",
majorVersion: "5.4",
expectNil: true,
},
{
name: "empty line",
line: "",
majorVersion: "5.4",
expectNil: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := parseBaseKernelLine(tt.line, tt.majorVersion, snapMetadata, location)
if tt.expectNil {
assert.Nil(t, result)
return
}
require.NotNil(t, result)
assert.Equal(t, tt.expectedName, result.Name)
assert.Equal(t, tt.expectedVer, result.Version)
assert.Equal(t, pkg.DebPkg, result.Type)
metadata, ok := result.Metadata.(pkg.SnapEntry)
require.True(t, ok)
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType)
})
}
}
func TestParseKernelChangelog(t *testing.T) {
// Realistic changelog content modeled on Ubuntu kernel changelogs.
// The first line declares the patched kernel version.
// Somewhere later a line references the base upstream kernel.
fullChangelog := strings.Join([]string{
"linux (5.4.0-195.215) focal; urgency=medium",
"",
" * focal/linux: 5.4.0-195.215 -proposed tracker (LP: #2083390)",
"",
" [ Ubuntu: 5.4-100.200 ]",
"",
" * Some other entry",
"",
" -- Ubuntu Kernel Team <kernel-team@lists.ubuntu.com> Mon, 01 Jan 2024 00:00:00 +0000",
"",
}, "\n")
// Changelog where the base kernel entry line uses the release version pattern
// The code builds: fmt.Sprintf("%s/linux:", releaseVersion) → "215/linux:"
changelogWithBaseEntry := strings.Join([]string{
"linux (5.4.0-195.215) focal; urgency=medium",
"",
" * focal/linux: 5.4.0-195.215 -proposed tracker",
"",
" 215/linux: 5.4-100.200 base entry",
"",
" -- Ubuntu Kernel Team <kernel-team@lists.ubuntu.com> Mon, 01 Jan 2024 00:00:00 +0000",
"",
}, "\n")
// Changelog with only the header line and no base kernel match
minimalChangelog := "linux (6.8.0-50.51) noble; urgency=medium\n"
tests := []struct {
name string
input []byte
expectedCount int
expectedNames []string
expectedVers []string
expectError bool
errorContains string
}{
{
name: "full changelog with base kernel via release version pattern",
input: gzipContent(t, changelogWithBaseEntry),
expectedCount: 2,
expectedNames: []string{"linux-image-5.4.0-195-generic", "linux-image-5.4-100-generic"},
expectedVers: []string{"5.4.0-195.215", "5.4-100.200"},
},
{
name: "changelog without base kernel match returns only main package",
input: gzipContent(t, minimalChangelog),
expectedCount: 1,
expectedNames: []string{"linux-image-6.8.0-50-generic"},
expectedVers: []string{"6.8.0-50.51"},
},
{
name: "full changelog without matching release version pattern returns only main package",
input: gzipContent(t, fullChangelog),
expectedCount: 1,
expectedNames: []string{"linux-image-5.4.0-195-generic"},
expectedVers: []string{"5.4.0-195.215"},
},
{
name: "invalid gzip data",
input: []byte("not gzip data"),
expectError: true,
errorContains: "failed to create gzip reader",
},
{
// The old (slurp) implementation produces "could not parse kernel version"
// because strings.Split("", "\n") yields [""], not an empty slice.
// The new (streaming) implementation produces "changelog file is empty"
// because bufio.Scanner.Scan() returns false immediately.
// Both correctly reject empty content; only the message differs.
name: "empty gzip content",
input: gzipContent(t, ""),
expectError: true,
},
{
name: "gzip content with unparseable first line",
input: gzipContent(t, "this is not a valid kernel changelog\n"),
expectError: true,
errorContains: "could not parse kernel version from changelog",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reader := locationReadCloser(t, tt.input)
packages, relationships, err := parseKernelChangelog(
context.Background(), nil, &generic.Environment{}, reader,
)
if tt.expectError {
require.Error(t, err)
if tt.errorContains != "" {
assert.Contains(t, err.Error(), tt.errorContains)
}
return
}
require.NoError(t, err)
assert.Nil(t, relationships)
require.Len(t, packages, tt.expectedCount)
for i, p := range packages {
assert.Equal(t, tt.expectedNames[i], p.Name, "package %d name", i)
assert.Equal(t, tt.expectedVers[i], p.Version, "package %d version", i)
assert.Equal(t, pkg.DebPkg, p.Type, "package %d type", i)
metadata, ok := p.Metadata.(pkg.SnapEntry)
require.True(t, ok, "package %d metadata type", i)
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType, "package %d snap type", i)
}
})
}
}

View File

@ -76,6 +76,11 @@ func New(cfg Config) (source.Source, error) {
analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
if err != nil {
if cleanupFn != nil {
if cleanupErr := cleanupFn(); cleanupErr != nil {
log.Warnf("failed to cleanup temporary directory: %v", cleanupErr)
}
}
return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
}
@ -211,7 +216,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
if err != nil {
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
return "", cleanupFn, fmt.Errorf("unable to unarchive source file: %w", err)
}
log.Debugf("source path is an archive")