mirror of
https://github.com/anchore/syft.git
synced 2026-03-29 21:23:24 +02:00
chore: call cleanup on tmpfile and replace some io.ReadAlls with streams (#4629)
* fix(deb and snaps): prevent excess reads Previously, Syft could allocate excess memory or tempfile space if there were highly compressed objects in deb archives, or at paths where the kernel changelog was expected by the snap cataloger. Use io.LimitReaders for extracting parts of deb archives, and refactor the snap cataloger's reading of the kernel changelog to use a streaming parsing, eliminating the possibility of excess allocation. Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> * fix: always cleanup temp file from file source Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> * use streaming strategy for deb archives Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com> --------- Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
This commit is contained in:
parent
2fe5f9c7b8
commit
0a3f7bb06e
@ -1,6 +1,7 @@
|
||||
package debian
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"regexp"
|
||||
"sort"
|
||||
@ -14,23 +15,32 @@ import (
|
||||
// For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax
|
||||
|
||||
var (
|
||||
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
|
||||
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
|
||||
licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P<license>.*?\.)`)
|
||||
licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
|
||||
licensePattern = regexp.MustCompile(`^License: (?P<license>\S*)`)
|
||||
commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P<license>[0-9A-Za-z_.\-]+)`)
|
||||
licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?P<license>LICENSE AGREEMENT(?: FOR .+?)?)\s*$`)
|
||||
)
|
||||
|
||||
func parseLicensesFromCopyright(reader io.Reader) []string {
|
||||
findings := strset.New()
|
||||
data, err := io.ReadAll(reader)
|
||||
if err != nil {
|
||||
// Fail-safe: return nothing if unable to read
|
||||
return []string{}
|
||||
}
|
||||
scanner := bufio.NewScanner(reader)
|
||||
|
||||
content := string(data)
|
||||
lines := strings.Split(content, "\n")
|
||||
for _, line := range lines {
|
||||
// State machine replacing licenseFirstSentenceAfterHeadingPattern.
|
||||
// That regex only matched at the start of the file: a non-empty heading,
|
||||
// a line of dashes, blank lines, then text up to the first period.
|
||||
const (
|
||||
expectHeading = iota
|
||||
expectDashes
|
||||
skipBlanks
|
||||
captureLicense
|
||||
headingDone // matched or impossible — stop checking
|
||||
)
|
||||
headingState := expectHeading
|
||||
var licenseText strings.Builder
|
||||
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
|
||||
// per-line regex checks (applied to every line)
|
||||
if value := findLicenseClause(licensePattern, line); value != "" {
|
||||
findings.Add(value)
|
||||
}
|
||||
@ -40,13 +50,39 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
|
||||
if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" {
|
||||
findings.Add(value)
|
||||
}
|
||||
}
|
||||
|
||||
// some copyright files have a license declaration after the heading ex:
|
||||
// End User License Agreement\n--------------------------
|
||||
// we want to try and find these multi-line license declarations and make exceptions for them
|
||||
if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" {
|
||||
findings.Add(value)
|
||||
// multi-line heading detection (only at start of file)
|
||||
switch headingState {
|
||||
case expectHeading:
|
||||
if strings.TrimSpace(line) != "" {
|
||||
headingState = expectDashes
|
||||
} else {
|
||||
headingState = headingDone
|
||||
}
|
||||
case expectDashes:
|
||||
trimmed := strings.TrimSpace(line)
|
||||
if len(trimmed) > 0 && strings.Trim(trimmed, "-") == "" {
|
||||
headingState = skipBlanks
|
||||
} else {
|
||||
headingState = headingDone
|
||||
}
|
||||
case skipBlanks:
|
||||
if strings.TrimSpace(line) != "" {
|
||||
headingState = captureLicense
|
||||
licenseText.WriteString(line)
|
||||
if value := extractUpToFirstPeriod(licenseText.String()); value != "" {
|
||||
findings.Add(value)
|
||||
headingState = headingDone
|
||||
}
|
||||
}
|
||||
case captureLicense:
|
||||
licenseText.WriteString(" ")
|
||||
licenseText.WriteString(line)
|
||||
if value := extractUpToFirstPeriod(licenseText.String()); value != "" {
|
||||
findings.Add(value)
|
||||
headingState = headingDone
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results := findings.List()
|
||||
@ -55,6 +91,15 @@ func parseLicensesFromCopyright(reader io.Reader) []string {
|
||||
return results
|
||||
}
|
||||
|
||||
// extractUpToFirstPeriod returns the license text up to the first period,
|
||||
// processed through ensureIsSingleLicense, or "" if no period found yet.
|
||||
func extractUpToFirstPeriod(s string) string {
|
||||
if idx := strings.Index(s, "."); idx >= 0 {
|
||||
return ensureIsSingleLicense(s[:idx+1])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func findLicenseClause(pattern *regexp.Regexp, line string) string {
|
||||
valueGroup := "license"
|
||||
matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line)
|
||||
|
||||
@ -2,7 +2,6 @@ package debian
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
@ -107,41 +106,56 @@ func processDataTar(dcReader io.ReadCloser) ([]string, error) {
|
||||
func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) {
|
||||
defer internal.CloseAndLogError(dcReader, "")
|
||||
|
||||
// Extract control, md5sums, and conffiles files from control.tar
|
||||
tarReader := tar.NewReader(dcReader)
|
||||
controlFileContent, md5Content, confContent, err := readControlFiles(tarReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read control files: %w", err)
|
||||
|
||||
var metadata *pkg.DpkgArchiveEntry
|
||||
var files []pkg.DpkgFileRecord
|
||||
var confFileRecords []pkg.DpkgFileRecord
|
||||
|
||||
for {
|
||||
header, err := tarReader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read control tar: %w", err)
|
||||
}
|
||||
|
||||
switch filepath.Base(header.Name) {
|
||||
case "control":
|
||||
// parseDpkgStatus already streams via bufio.Reader
|
||||
entries, err := parseDpkgStatus(tarReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse control file: %w", err)
|
||||
}
|
||||
if len(entries) == 0 {
|
||||
return nil, fmt.Errorf("no package entries found in control file")
|
||||
}
|
||||
entry := pkg.DpkgArchiveEntry(entries[0])
|
||||
metadata = &entry
|
||||
case "md5sums":
|
||||
// parseDpkgMD5Info already streams via bufio.Scanner
|
||||
files = parseDpkgMD5Info(tarReader)
|
||||
case "conffiles":
|
||||
// parseDpkgConffileInfo already streams via bufio.Scanner
|
||||
confFileRecords = parseDpkgConffileInfo(tarReader)
|
||||
}
|
||||
}
|
||||
|
||||
if controlFileContent == nil {
|
||||
if metadata == nil {
|
||||
return nil, fmt.Errorf("control file not found in archive")
|
||||
}
|
||||
|
||||
metadata, err := newDpkgArchiveMetadata(controlFileContent, md5Content, confContent)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create package metadata: %w", err)
|
||||
}
|
||||
|
||||
return &metadata, nil
|
||||
}
|
||||
|
||||
func newDpkgArchiveMetadata(controlFile, md5sums, confFiles []byte) (pkg.DpkgArchiveEntry, error) {
|
||||
// parse the control file to get package metadata
|
||||
metadata, err := parseControlFile(string(controlFile))
|
||||
if err != nil {
|
||||
return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err)
|
||||
}
|
||||
|
||||
// parse MD5 sums to get file records
|
||||
var files []pkg.DpkgFileRecord
|
||||
if len(md5sums) > 0 {
|
||||
files = parseDpkgMD5Info(bytes.NewReader(md5sums))
|
||||
}
|
||||
|
||||
// mark config files
|
||||
if len(confFiles) > 0 {
|
||||
markConfigFiles(confFiles, files)
|
||||
if len(confFileRecords) > 0 && len(files) > 0 {
|
||||
configPaths := make(map[string]struct{}, len(confFileRecords))
|
||||
for _, cf := range confFileRecords {
|
||||
configPaths[cf.Path] = struct{}{}
|
||||
}
|
||||
for i, f := range files {
|
||||
if _, isConfig := configPaths[f.Path]; isConfig {
|
||||
files[i].IsConfigFile = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
metadata.Files = files
|
||||
@ -166,73 +180,3 @@ func decompressionStream(ctx context.Context, r io.Reader, filePath string) (io.
|
||||
|
||||
return rc, nil
|
||||
}
|
||||
|
||||
// readControlFiles extracts important files from the control.tar archive
|
||||
func readControlFiles(tarReader *tar.Reader) (controlFile, md5sums, conffiles []byte, err error) {
|
||||
for {
|
||||
header, err := tarReader.Next()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
|
||||
switch filepath.Base(header.Name) {
|
||||
case "control":
|
||||
controlFile, err = io.ReadAll(tarReader)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
case "md5sums":
|
||||
md5sums, err = io.ReadAll(tarReader)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
case "conffiles":
|
||||
conffiles, err = io.ReadAll(tarReader)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return controlFile, md5sums, conffiles, nil
|
||||
}
|
||||
|
||||
// parseControlFile parses the content of a debian control file into package metadata
|
||||
func parseControlFile(controlFileContent string) (pkg.DpkgArchiveEntry, error) {
|
||||
// Reuse the existing dpkg status file parsing logic
|
||||
reader := strings.NewReader(controlFileContent)
|
||||
|
||||
entries, err := parseDpkgStatus(reader)
|
||||
if err != nil {
|
||||
return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err)
|
||||
}
|
||||
|
||||
if len(entries) == 0 {
|
||||
return pkg.DpkgArchiveEntry{}, fmt.Errorf("no package entries found in control file")
|
||||
}
|
||||
|
||||
// We expect only one entry from a .deb control file
|
||||
return pkg.DpkgArchiveEntry(entries[0]), nil
|
||||
}
|
||||
|
||||
// markConfigFiles marks files that are listed in conffiles as configuration files
|
||||
func markConfigFiles(conffilesContent []byte, files []pkg.DpkgFileRecord) {
|
||||
// Parse the conffiles content into DpkgFileRecord entries
|
||||
confFiles := parseDpkgConffileInfo(bytes.NewReader(conffilesContent))
|
||||
|
||||
// Create a map for quick lookup of config files by path
|
||||
configPathMap := make(map[string]struct{})
|
||||
for _, confFile := range confFiles {
|
||||
configPathMap[confFile.Path] = struct{}{}
|
||||
}
|
||||
|
||||
// Mark files as config files if they're in the conffiles list
|
||||
for i := range files {
|
||||
if _, exists := configPathMap[files[i].Path]; exists {
|
||||
files[i].IsConfigFile = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,31 +3,58 @@ package debian
|
||||
import (
|
||||
"archive/tar"
|
||||
"bytes"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
func TestReadControlFiles(t *testing.T) {
|
||||
func TestProcessControlTar(t *testing.T) {
|
||||
tarBytes := createTestTarWithControlFiles(t)
|
||||
|
||||
tarReader := bytes.NewReader(tarBytes)
|
||||
reader := tar.NewReader(tarReader)
|
||||
|
||||
controlFile, md5sums, conffiles, err := readControlFiles(reader)
|
||||
metadata, err := processControlTar(io.NopCloser(bytes.NewReader(tarBytes)))
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, controlFile, "expected control file to be found")
|
||||
assert.NotNil(t, md5sums, "expected md5sums file to be found")
|
||||
assert.NotNil(t, conffiles, "expected conffiles file to be found")
|
||||
require.NotNil(t, metadata)
|
||||
|
||||
assert.Contains(t, string(controlFile), "Package: test-package")
|
||||
assert.Contains(t, string(md5sums), "d41d8cd98f00b204e9800998ecf8427e")
|
||||
assert.Contains(t, string(conffiles), "/etc/test")
|
||||
assert.Equal(t, "test-package", metadata.Package)
|
||||
assert.Equal(t, "1.0.0", metadata.Version)
|
||||
|
||||
// md5sums should have been parsed into file records
|
||||
require.Len(t, metadata.Files, 1)
|
||||
assert.Equal(t, "/usr/bin/test-command", metadata.Files[0].Path)
|
||||
assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", metadata.Files[0].Digest.Value)
|
||||
|
||||
// conffiles should have marked config files
|
||||
assert.True(t, metadata.Files[0].IsConfigFile, "file listed in conffiles should be marked as config")
|
||||
}
|
||||
|
||||
func TestProcessControlTar_ConfigFileMarking(t *testing.T) {
|
||||
// Create a tar where conffiles lists paths that overlap with md5sums entries
|
||||
var buf bytes.Buffer
|
||||
tw := tar.NewWriter(&buf)
|
||||
|
||||
controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\n"
|
||||
writeTarEntry(t, tw, "control", controlContent)
|
||||
|
||||
md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n" +
|
||||
"d41d8cd98f00b204e9800998ecf8427e etc/test/config.conf\n" +
|
||||
"d41d8cd98f00b204e9800998ecf8427e usr/bin/other-command\n"
|
||||
writeTarEntry(t, tw, "md5sums", md5Content)
|
||||
|
||||
conffilesContent := "/usr/bin/test-command\n/etc/test/config.conf\n"
|
||||
writeTarEntry(t, tw, "conffiles", conffilesContent)
|
||||
|
||||
require.NoError(t, tw.Close())
|
||||
|
||||
metadata, err := processControlTar(io.NopCloser(bytes.NewReader(buf.Bytes())))
|
||||
require.NoError(t, err)
|
||||
require.Len(t, metadata.Files, 3)
|
||||
|
||||
assert.True(t, metadata.Files[0].IsConfigFile, "first file should be marked as config file")
|
||||
assert.True(t, metadata.Files[1].IsConfigFile, "second file should be marked as config file")
|
||||
assert.False(t, metadata.Files[2].IsConfigFile, "third file should not be marked as config file")
|
||||
}
|
||||
|
||||
// createTestTarWithControlFiles creates a simple in-memory tar file with test control files
|
||||
@ -35,108 +62,26 @@ func createTestTarWithControlFiles(t *testing.T) []byte {
|
||||
var buf bytes.Buffer
|
||||
tw := tar.NewWriter(&buf)
|
||||
|
||||
// Add control file
|
||||
controlContent := `Package: test-package
|
||||
Version: 1.0.0
|
||||
Architecture: all
|
||||
Maintainer: Test <test@example.com>
|
||||
Description: Test package
|
||||
`
|
||||
err := tw.WriteHeader(&tar.Header{
|
||||
Name: "control",
|
||||
Mode: 0644,
|
||||
Size: int64(len(controlContent)),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
_, err = tw.Write([]byte(controlContent))
|
||||
require.NoError(t, err)
|
||||
controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\nMaintainer: Test <test@example.com>\nDescription: Test package\n"
|
||||
writeTarEntry(t, tw, "control", controlContent)
|
||||
|
||||
// Add md5sums file
|
||||
md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n"
|
||||
err = tw.WriteHeader(&tar.Header{
|
||||
Name: "md5sums",
|
||||
Mode: 0644,
|
||||
Size: int64(len(md5Content)),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
_, err = tw.Write([]byte(md5Content))
|
||||
require.NoError(t, err)
|
||||
writeTarEntry(t, tw, "md5sums", md5Content)
|
||||
|
||||
// Add conffiles file
|
||||
conffilesContent := "/etc/test/config.conf\n"
|
||||
err = tw.WriteHeader(&tar.Header{
|
||||
Name: "conffiles",
|
||||
Mode: 0644,
|
||||
Size: int64(len(conffilesContent)),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
_, err = tw.Write([]byte(conffilesContent))
|
||||
require.NoError(t, err)
|
||||
|
||||
// Close the tar writer
|
||||
err = tw.Close()
|
||||
require.NoError(t, err)
|
||||
conffilesContent := "/usr/bin/test-command\n"
|
||||
writeTarEntry(t, tw, "conffiles", conffilesContent)
|
||||
|
||||
require.NoError(t, tw.Close())
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func TestMarkConfigFiles(t *testing.T) {
|
||||
// Create test data
|
||||
conffilesContent := []byte("/usr/bin/test-command\n/etc/test/config.conf\n")
|
||||
|
||||
files := []pkg.DpkgFileRecord{
|
||||
{
|
||||
Path: "/usr/bin/test-command",
|
||||
Digest: &file.Digest{
|
||||
Algorithm: "md5",
|
||||
Value: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
},
|
||||
},
|
||||
{
|
||||
Path: "/etc/test/config.conf",
|
||||
Digest: &file.Digest{
|
||||
Algorithm: "md5",
|
||||
Value: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
},
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/other-command",
|
||||
Digest: &file.Digest{
|
||||
Algorithm: "md5",
|
||||
Value: "d41d8cd98f00b204e9800998ecf8427e",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
markConfigFiles(conffilesContent, files)
|
||||
|
||||
assert.True(t, files[0].IsConfigFile, "first file should be marked as config file")
|
||||
assert.True(t, files[1].IsConfigFile, "second file should be marked as config file")
|
||||
assert.False(t, files[2].IsConfigFile, "third file should not be marked as config file")
|
||||
}
|
||||
|
||||
func TestParseControlFile(t *testing.T) {
|
||||
controlContent := `Package: test-package
|
||||
Version: 1.2.3-4
|
||||
Architecture: amd64
|
||||
Maintainer: Test User <test@example.com>
|
||||
Installed-Size: 1234
|
||||
Depends: libc6, libtest
|
||||
Description: This is a test package
|
||||
More description text
|
||||
And even more details
|
||||
`
|
||||
|
||||
metadata, err := parseControlFile(controlContent)
|
||||
|
||||
func writeTarEntry(t *testing.T, tw *tar.Writer, name, content string) {
|
||||
t.Helper()
|
||||
require.NoError(t, tw.WriteHeader(&tar.Header{
|
||||
Name: name,
|
||||
Mode: 0644,
|
||||
Size: int64(len(content)),
|
||||
}))
|
||||
_, err := tw.Write([]byte(content))
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, "test-package", metadata.Package)
|
||||
assert.Equal(t, "1.2.3-4", metadata.Version)
|
||||
assert.Equal(t, "amd64", metadata.Architecture)
|
||||
assert.Equal(t, "Test User <test@example.com>", metadata.Maintainer)
|
||||
assert.Equal(t, 1234, metadata.InstalledSize)
|
||||
assert.Contains(t, metadata.Description, "This is a test package")
|
||||
assert.Len(t, metadata.Depends, 2)
|
||||
assert.Contains(t, metadata.Depends, "libc6")
|
||||
assert.Contains(t, metadata.Depends, "libtest")
|
||||
}
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
package snap
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
@ -22,16 +22,28 @@ type kernelVersionInfo struct {
|
||||
majorVersion string // e.g., "5.4"
|
||||
}
|
||||
|
||||
// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version
|
||||
// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version.
|
||||
// The changelog is gzip-compressed and may be very large, so we stream it line-by-line
|
||||
// rather than reading it entirely into memory.
|
||||
func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
// The file should be gzipped
|
||||
lines, err := readChangelogLines(reader)
|
||||
gzReader, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
return nil, nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
scanner := bufio.NewScanner(gzReader)
|
||||
|
||||
// read the first line to extract kernel version
|
||||
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
|
||||
if !scanner.Scan() {
|
||||
if err := scanner.Err(); err != nil {
|
||||
return nil, nil, fmt.Errorf("failed to read changelog content: %w", err)
|
||||
}
|
||||
return nil, nil, fmt.Errorf("changelog file is empty")
|
||||
}
|
||||
|
||||
// pull from first line
|
||||
versionInfo, err := extractKernelVersion(lines[0])
|
||||
versionInfo, err := extractKernelVersion(scanner.Text())
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
@ -42,38 +54,21 @@ func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environ
|
||||
|
||||
packages := createMainKernelPackage(versionInfo, snapMetadata, reader.Location)
|
||||
|
||||
// Check for base kernel package
|
||||
basePackage := findBaseKernelPackage(lines, versionInfo, snapMetadata, reader.Location)
|
||||
if basePackage != nil {
|
||||
packages = append(packages, *basePackage)
|
||||
// stream remaining lines looking for the base kernel entry
|
||||
baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/"))
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.Contains(line, baseKernelEntry) {
|
||||
if basePackage := parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, reader.Location); basePackage != nil {
|
||||
packages = append(packages, *basePackage)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return packages, nil, nil
|
||||
}
|
||||
|
||||
// readChangelogLines reads and decompresses the changelog content
|
||||
func readChangelogLines(reader file.LocationReadCloser) ([]string, error) {
|
||||
gzReader, err := gzip.NewReader(reader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err)
|
||||
}
|
||||
defer gzReader.Close()
|
||||
|
||||
content, err := io.ReadAll(gzReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read changelog content: %w", err)
|
||||
}
|
||||
|
||||
lines := strings.Split(string(content), "\n")
|
||||
if len(lines) == 0 {
|
||||
return nil, fmt.Errorf("changelog file is empty")
|
||||
}
|
||||
|
||||
// Parse the first line to extract kernel version information
|
||||
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
|
||||
return lines, nil
|
||||
}
|
||||
|
||||
// extractKernelVersion parses version information from the first changelog line
|
||||
func extractKernelVersion(firstLine string) (*kernelVersionInfo, error) {
|
||||
// Format: "linux (5.4.0-195.215) focal; urgency=medium"
|
||||
@ -117,19 +112,6 @@ func createMainKernelPackage(versionInfo *kernelVersionInfo, snapMetadata pkg.Sn
|
||||
return []pkg.Package{kernelPkg}
|
||||
}
|
||||
|
||||
// findBaseKernelPackage searches for and creates base kernel package if present
|
||||
func findBaseKernelPackage(lines []string, versionInfo *kernelVersionInfo, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package {
|
||||
baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/"))
|
||||
|
||||
for _, line := range lines {
|
||||
if strings.Contains(line, baseKernelEntry) {
|
||||
return parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, location)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseBaseKernelLine extracts base kernel version from a changelog line
|
||||
func parseBaseKernelLine(line string, majorVersion string, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package {
|
||||
baseKernelRegex := regexp.MustCompile(fmt.Sprintf(`(%s-[0-9]+)\.?[0-9]*`, regexp.QuoteMeta(majorVersion)))
|
||||
|
||||
312
syft/pkg/cataloger/snap/parse_kernel_changelog_test.go
Normal file
312
syft/pkg/cataloger/snap/parse_kernel_changelog_test.go
Normal file
@ -0,0 +1,312 @@
|
||||
package snap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
"github.com/anchore/syft/syft/pkg/cataloger/generic"
|
||||
)
|
||||
|
||||
func gzipContent(t *testing.T, content string) []byte {
|
||||
t.Helper()
|
||||
var buf bytes.Buffer
|
||||
w := gzip.NewWriter(&buf)
|
||||
_, err := w.Write([]byte(content))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, w.Close())
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func locationReadCloser(t *testing.T, data []byte) file.LocationReadCloser {
|
||||
t.Helper()
|
||||
return file.LocationReadCloser{
|
||||
Location: file.NewLocation("test-fixtures/changelog.Debian.gz"),
|
||||
ReadCloser: io.NopCloser(bytes.NewReader(data)),
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractKernelVersion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
firstLine string
|
||||
expected *kernelVersionInfo
|
||||
expectError string
|
||||
}{
|
||||
{
|
||||
name: "standard focal kernel",
|
||||
firstLine: "linux (5.4.0-195.215) focal; urgency=medium",
|
||||
expected: &kernelVersionInfo{
|
||||
baseVersion: "5.4.0-195",
|
||||
releaseVersion: "215",
|
||||
fullVersion: "5.4.0-195.215",
|
||||
majorVersion: "5.4",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "noble kernel 6.x",
|
||||
firstLine: "linux (6.8.0-50.51) noble; urgency=medium",
|
||||
expected: &kernelVersionInfo{
|
||||
baseVersion: "6.8.0-50",
|
||||
releaseVersion: "51",
|
||||
fullVersion: "6.8.0-50.51",
|
||||
majorVersion: "6.8",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "jammy kernel",
|
||||
firstLine: "linux (5.15.0-130.140) jammy; urgency=medium",
|
||||
expected: &kernelVersionInfo{
|
||||
baseVersion: "5.15.0-130",
|
||||
releaseVersion: "140",
|
||||
fullVersion: "5.15.0-130.140",
|
||||
majorVersion: "5.15",
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "empty string",
|
||||
firstLine: "",
|
||||
expectError: "could not parse kernel version from changelog",
|
||||
},
|
||||
{
|
||||
name: "no version match",
|
||||
firstLine: "not a valid changelog line",
|
||||
expectError: "could not parse kernel version from changelog",
|
||||
},
|
||||
{
|
||||
name: "missing release version",
|
||||
firstLine: "linux (5.4.0-195) focal; urgency=medium",
|
||||
expectError: "could not parse kernel version from changelog",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result, err := extractKernelVersion(tt.firstLine)
|
||||
if tt.expectError != "" {
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), tt.expectError)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.expected.baseVersion, result.baseVersion)
|
||||
assert.Equal(t, tt.expected.releaseVersion, result.releaseVersion)
|
||||
assert.Equal(t, tt.expected.fullVersion, result.fullVersion)
|
||||
assert.Equal(t, tt.expected.majorVersion, result.majorVersion)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCreateMainKernelPackage(t *testing.T) {
|
||||
location := file.NewLocation("test-fixtures/changelog.Debian.gz")
|
||||
versionInfo := &kernelVersionInfo{
|
||||
baseVersion: "5.4.0-195",
|
||||
releaseVersion: "215",
|
||||
fullVersion: "5.4.0-195.215",
|
||||
majorVersion: "5.4",
|
||||
}
|
||||
snapMetadata := pkg.SnapEntry{
|
||||
SnapType: pkg.SnapTypeKernel,
|
||||
}
|
||||
|
||||
packages := createMainKernelPackage(versionInfo, snapMetadata, location)
|
||||
|
||||
require.Len(t, packages, 1)
|
||||
p := packages[0]
|
||||
assert.Equal(t, "linux-image-5.4.0-195-generic", p.Name)
|
||||
assert.Equal(t, "5.4.0-195.215", p.Version)
|
||||
assert.Equal(t, pkg.DebPkg, p.Type)
|
||||
|
||||
metadata, ok := p.Metadata.(pkg.SnapEntry)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType)
|
||||
}
|
||||
|
||||
func TestParseBaseKernelLine(t *testing.T) {
|
||||
location := file.NewLocation("test-fixtures/changelog.Debian.gz")
|
||||
snapMetadata := pkg.SnapEntry{
|
||||
SnapType: pkg.SnapTypeKernel,
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
line string
|
||||
majorVersion string
|
||||
expectNil bool
|
||||
expectedName string
|
||||
expectedVer string
|
||||
}{
|
||||
{
|
||||
name: "standard base kernel entry",
|
||||
line: " [ Ubuntu: 5.4-100.200 ]",
|
||||
majorVersion: "5.4",
|
||||
expectedName: "linux-image-5.4-100-generic",
|
||||
expectedVer: "5.4-100.200",
|
||||
},
|
||||
{
|
||||
name: "6.x base kernel entry",
|
||||
line: " [ Ubuntu: 6.8-40.41 ]",
|
||||
majorVersion: "6.8",
|
||||
expectedName: "linux-image-6.8-40-generic",
|
||||
expectedVer: "6.8-40.41",
|
||||
},
|
||||
{
|
||||
name: "no matching version",
|
||||
line: " * some random changelog text here",
|
||||
majorVersion: "5.4",
|
||||
expectNil: true,
|
||||
},
|
||||
{
|
||||
name: "empty line",
|
||||
line: "",
|
||||
majorVersion: "5.4",
|
||||
expectNil: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := parseBaseKernelLine(tt.line, tt.majorVersion, snapMetadata, location)
|
||||
if tt.expectNil {
|
||||
assert.Nil(t, result)
|
||||
return
|
||||
}
|
||||
require.NotNil(t, result)
|
||||
assert.Equal(t, tt.expectedName, result.Name)
|
||||
assert.Equal(t, tt.expectedVer, result.Version)
|
||||
assert.Equal(t, pkg.DebPkg, result.Type)
|
||||
|
||||
metadata, ok := result.Metadata.(pkg.SnapEntry)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseKernelChangelog(t *testing.T) {
|
||||
// Realistic changelog content modeled on Ubuntu kernel changelogs.
|
||||
// The first line declares the patched kernel version.
|
||||
// Somewhere later a line references the base upstream kernel.
|
||||
fullChangelog := strings.Join([]string{
|
||||
"linux (5.4.0-195.215) focal; urgency=medium",
|
||||
"",
|
||||
" * focal/linux: 5.4.0-195.215 -proposed tracker (LP: #2083390)",
|
||||
"",
|
||||
" [ Ubuntu: 5.4-100.200 ]",
|
||||
"",
|
||||
" * Some other entry",
|
||||
"",
|
||||
" -- Ubuntu Kernel Team <kernel-team@lists.ubuntu.com> Mon, 01 Jan 2024 00:00:00 +0000",
|
||||
"",
|
||||
}, "\n")
|
||||
|
||||
// Changelog where the base kernel entry line uses the release version pattern
|
||||
// The code builds: fmt.Sprintf("%s/linux:", releaseVersion) → "215/linux:"
|
||||
changelogWithBaseEntry := strings.Join([]string{
|
||||
"linux (5.4.0-195.215) focal; urgency=medium",
|
||||
"",
|
||||
" * focal/linux: 5.4.0-195.215 -proposed tracker",
|
||||
"",
|
||||
" 215/linux: 5.4-100.200 base entry",
|
||||
"",
|
||||
" -- Ubuntu Kernel Team <kernel-team@lists.ubuntu.com> Mon, 01 Jan 2024 00:00:00 +0000",
|
||||
"",
|
||||
}, "\n")
|
||||
|
||||
// Changelog with only the header line and no base kernel match
|
||||
minimalChangelog := "linux (6.8.0-50.51) noble; urgency=medium\n"
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
input []byte
|
||||
expectedCount int
|
||||
expectedNames []string
|
||||
expectedVers []string
|
||||
expectError bool
|
||||
errorContains string
|
||||
}{
|
||||
{
|
||||
name: "full changelog with base kernel via release version pattern",
|
||||
input: gzipContent(t, changelogWithBaseEntry),
|
||||
expectedCount: 2,
|
||||
expectedNames: []string{"linux-image-5.4.0-195-generic", "linux-image-5.4-100-generic"},
|
||||
expectedVers: []string{"5.4.0-195.215", "5.4-100.200"},
|
||||
},
|
||||
{
|
||||
name: "changelog without base kernel match returns only main package",
|
||||
input: gzipContent(t, minimalChangelog),
|
||||
expectedCount: 1,
|
||||
expectedNames: []string{"linux-image-6.8.0-50-generic"},
|
||||
expectedVers: []string{"6.8.0-50.51"},
|
||||
},
|
||||
{
|
||||
name: "full changelog without matching release version pattern returns only main package",
|
||||
input: gzipContent(t, fullChangelog),
|
||||
expectedCount: 1,
|
||||
expectedNames: []string{"linux-image-5.4.0-195-generic"},
|
||||
expectedVers: []string{"5.4.0-195.215"},
|
||||
},
|
||||
{
|
||||
name: "invalid gzip data",
|
||||
input: []byte("not gzip data"),
|
||||
expectError: true,
|
||||
errorContains: "failed to create gzip reader",
|
||||
},
|
||||
{
|
||||
// The old (slurp) implementation produces "could not parse kernel version"
|
||||
// because strings.Split("", "\n") yields [""], not an empty slice.
|
||||
// The new (streaming) implementation produces "changelog file is empty"
|
||||
// because bufio.Scanner.Scan() returns false immediately.
|
||||
// Both correctly reject empty content; only the message differs.
|
||||
name: "empty gzip content",
|
||||
input: gzipContent(t, ""),
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "gzip content with unparseable first line",
|
||||
input: gzipContent(t, "this is not a valid kernel changelog\n"),
|
||||
expectError: true,
|
||||
errorContains: "could not parse kernel version from changelog",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
reader := locationReadCloser(t, tt.input)
|
||||
|
||||
packages, relationships, err := parseKernelChangelog(
|
||||
context.Background(), nil, &generic.Environment{}, reader,
|
||||
)
|
||||
|
||||
if tt.expectError {
|
||||
require.Error(t, err)
|
||||
if tt.errorContains != "" {
|
||||
assert.Contains(t, err.Error(), tt.errorContains)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
assert.Nil(t, relationships)
|
||||
require.Len(t, packages, tt.expectedCount)
|
||||
|
||||
for i, p := range packages {
|
||||
assert.Equal(t, tt.expectedNames[i], p.Name, "package %d name", i)
|
||||
assert.Equal(t, tt.expectedVers[i], p.Version, "package %d version", i)
|
||||
assert.Equal(t, pkg.DebPkg, p.Type, "package %d type", i)
|
||||
|
||||
metadata, ok := p.Metadata.(pkg.SnapEntry)
|
||||
require.True(t, ok, "package %d metadata type", i)
|
||||
assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType, "package %d snap type", i)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -76,6 +76,11 @@ func New(cfg Config) (source.Source, error) {
|
||||
|
||||
analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive)
|
||||
if err != nil {
|
||||
if cleanupFn != nil {
|
||||
if cleanupErr := cleanupFn(); cleanupErr != nil {
|
||||
log.Warnf("failed to cleanup temporary directory: %v", cleanupErr)
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err)
|
||||
}
|
||||
|
||||
@ -211,7 +216,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro
|
||||
if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok {
|
||||
analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("unable to unarchive source file: %w", err)
|
||||
return "", cleanupFn, fmt.Errorf("unable to unarchive source file: %w", err)
|
||||
}
|
||||
|
||||
log.Debugf("source path is an archive")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user