diff --git a/syft/pkg/cataloger/debian/parse_copyright.go b/syft/pkg/cataloger/debian/parse_copyright.go index 7b7985710..f4b879842 100644 --- a/syft/pkg/cataloger/debian/parse_copyright.go +++ b/syft/pkg/cataloger/debian/parse_copyright.go @@ -1,6 +1,7 @@ package debian import ( + "bufio" "io" "regexp" "sort" @@ -14,23 +15,32 @@ import ( // For more information see: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/#license-syntax var ( - licensePattern = regexp.MustCompile(`^License: (?P\S*)`) - commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P[0-9A-Za-z_.\-]+)`) - licenseFirstSentenceAfterHeadingPattern = regexp.MustCompile(`(?is)^[^\n]+?\n[-]+?\n+(?P.*?\.)`) - licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?PLICENSE AGREEMENT(?: FOR .+?)?)\s*$`) + licensePattern = regexp.MustCompile(`^License: (?P\S*)`) + commonLicensePathPattern = regexp.MustCompile(`/usr/share/common-licenses/(?P[0-9A-Za-z_.\-]+)`) + licenseAgreementHeadingPattern = regexp.MustCompile(`(?i)^\s*(?PLICENSE AGREEMENT(?: FOR .+?)?)\s*$`) ) func parseLicensesFromCopyright(reader io.Reader) []string { findings := strset.New() - data, err := io.ReadAll(reader) - if err != nil { - // Fail-safe: return nothing if unable to read - return []string{} - } + scanner := bufio.NewScanner(reader) - content := string(data) - lines := strings.Split(content, "\n") - for _, line := range lines { + // State machine replacing licenseFirstSentenceAfterHeadingPattern. + // That regex only matched at the start of the file: a non-empty heading, + // a line of dashes, blank lines, then text up to the first period. + const ( + expectHeading = iota + expectDashes + skipBlanks + captureLicense + headingDone // matched or impossible — stop checking + ) + headingState := expectHeading + var licenseText strings.Builder + + for scanner.Scan() { + line := scanner.Text() + + // per-line regex checks (applied to every line) if value := findLicenseClause(licensePattern, line); value != "" { findings.Add(value) } @@ -40,13 +50,39 @@ func parseLicensesFromCopyright(reader io.Reader) []string { if value := findLicenseClause(licenseAgreementHeadingPattern, line); value != "" { findings.Add(value) } - } - // some copyright files have a license declaration after the heading ex: - // End User License Agreement\n-------------------------- - // we want to try and find these multi-line license declarations and make exceptions for them - if value := findLicenseClause(licenseFirstSentenceAfterHeadingPattern, content); value != "" { - findings.Add(value) + // multi-line heading detection (only at start of file) + switch headingState { + case expectHeading: + if strings.TrimSpace(line) != "" { + headingState = expectDashes + } else { + headingState = headingDone + } + case expectDashes: + trimmed := strings.TrimSpace(line) + if len(trimmed) > 0 && strings.Trim(trimmed, "-") == "" { + headingState = skipBlanks + } else { + headingState = headingDone + } + case skipBlanks: + if strings.TrimSpace(line) != "" { + headingState = captureLicense + licenseText.WriteString(line) + if value := extractUpToFirstPeriod(licenseText.String()); value != "" { + findings.Add(value) + headingState = headingDone + } + } + case captureLicense: + licenseText.WriteString(" ") + licenseText.WriteString(line) + if value := extractUpToFirstPeriod(licenseText.String()); value != "" { + findings.Add(value) + headingState = headingDone + } + } } results := findings.List() @@ -55,6 +91,15 @@ func parseLicensesFromCopyright(reader io.Reader) []string { return results } +// extractUpToFirstPeriod returns the license text up to the first period, +// processed through ensureIsSingleLicense, or "" if no period found yet. +func extractUpToFirstPeriod(s string) string { + if idx := strings.Index(s, "."); idx >= 0 { + return ensureIsSingleLicense(s[:idx+1]) + } + return "" +} + func findLicenseClause(pattern *regexp.Regexp, line string) string { valueGroup := "license" matchesByGroup := internal.MatchNamedCaptureGroups(pattern, line) diff --git a/syft/pkg/cataloger/debian/parse_deb_archive.go b/syft/pkg/cataloger/debian/parse_deb_archive.go index 8b78a2147..0f977c05e 100644 --- a/syft/pkg/cataloger/debian/parse_deb_archive.go +++ b/syft/pkg/cataloger/debian/parse_deb_archive.go @@ -2,7 +2,6 @@ package debian import ( "archive/tar" - "bytes" "context" "fmt" "io" @@ -107,41 +106,56 @@ func processDataTar(dcReader io.ReadCloser) ([]string, error) { func processControlTar(dcReader io.ReadCloser) (*pkg.DpkgArchiveEntry, error) { defer internal.CloseAndLogError(dcReader, "") - // Extract control, md5sums, and conffiles files from control.tar tarReader := tar.NewReader(dcReader) - controlFileContent, md5Content, confContent, err := readControlFiles(tarReader) - if err != nil { - return nil, fmt.Errorf("failed to read control files: %w", err) + + var metadata *pkg.DpkgArchiveEntry + var files []pkg.DpkgFileRecord + var confFileRecords []pkg.DpkgFileRecord + + for { + header, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, fmt.Errorf("failed to read control tar: %w", err) + } + + switch filepath.Base(header.Name) { + case "control": + // parseDpkgStatus already streams via bufio.Reader + entries, err := parseDpkgStatus(tarReader) + if err != nil { + return nil, fmt.Errorf("failed to parse control file: %w", err) + } + if len(entries) == 0 { + return nil, fmt.Errorf("no package entries found in control file") + } + entry := pkg.DpkgArchiveEntry(entries[0]) + metadata = &entry + case "md5sums": + // parseDpkgMD5Info already streams via bufio.Scanner + files = parseDpkgMD5Info(tarReader) + case "conffiles": + // parseDpkgConffileInfo already streams via bufio.Scanner + confFileRecords = parseDpkgConffileInfo(tarReader) + } } - if controlFileContent == nil { + if metadata == nil { return nil, fmt.Errorf("control file not found in archive") } - metadata, err := newDpkgArchiveMetadata(controlFileContent, md5Content, confContent) - if err != nil { - return nil, fmt.Errorf("failed to create package metadata: %w", err) - } - - return &metadata, nil -} - -func newDpkgArchiveMetadata(controlFile, md5sums, confFiles []byte) (pkg.DpkgArchiveEntry, error) { - // parse the control file to get package metadata - metadata, err := parseControlFile(string(controlFile)) - if err != nil { - return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err) - } - - // parse MD5 sums to get file records - var files []pkg.DpkgFileRecord - if len(md5sums) > 0 { - files = parseDpkgMD5Info(bytes.NewReader(md5sums)) - } - - // mark config files - if len(confFiles) > 0 { - markConfigFiles(confFiles, files) + if len(confFileRecords) > 0 && len(files) > 0 { + configPaths := make(map[string]struct{}, len(confFileRecords)) + for _, cf := range confFileRecords { + configPaths[cf.Path] = struct{}{} + } + for i, f := range files { + if _, isConfig := configPaths[f.Path]; isConfig { + files[i].IsConfigFile = true + } + } } metadata.Files = files @@ -166,73 +180,3 @@ func decompressionStream(ctx context.Context, r io.Reader, filePath string) (io. return rc, nil } - -// readControlFiles extracts important files from the control.tar archive -func readControlFiles(tarReader *tar.Reader) (controlFile, md5sums, conffiles []byte, err error) { - for { - header, err := tarReader.Next() - if err == io.EOF { - break - } - if err != nil { - return nil, nil, nil, err - } - - switch filepath.Base(header.Name) { - case "control": - controlFile, err = io.ReadAll(tarReader) - if err != nil { - return nil, nil, nil, err - } - case "md5sums": - md5sums, err = io.ReadAll(tarReader) - if err != nil { - return nil, nil, nil, err - } - case "conffiles": - conffiles, err = io.ReadAll(tarReader) - if err != nil { - return nil, nil, nil, err - } - } - } - - return controlFile, md5sums, conffiles, nil -} - -// parseControlFile parses the content of a debian control file into package metadata -func parseControlFile(controlFileContent string) (pkg.DpkgArchiveEntry, error) { - // Reuse the existing dpkg status file parsing logic - reader := strings.NewReader(controlFileContent) - - entries, err := parseDpkgStatus(reader) - if err != nil { - return pkg.DpkgArchiveEntry{}, fmt.Errorf("failed to parse control file: %w", err) - } - - if len(entries) == 0 { - return pkg.DpkgArchiveEntry{}, fmt.Errorf("no package entries found in control file") - } - - // We expect only one entry from a .deb control file - return pkg.DpkgArchiveEntry(entries[0]), nil -} - -// markConfigFiles marks files that are listed in conffiles as configuration files -func markConfigFiles(conffilesContent []byte, files []pkg.DpkgFileRecord) { - // Parse the conffiles content into DpkgFileRecord entries - confFiles := parseDpkgConffileInfo(bytes.NewReader(conffilesContent)) - - // Create a map for quick lookup of config files by path - configPathMap := make(map[string]struct{}) - for _, confFile := range confFiles { - configPathMap[confFile.Path] = struct{}{} - } - - // Mark files as config files if they're in the conffiles list - for i := range files { - if _, exists := configPathMap[files[i].Path]; exists { - files[i].IsConfigFile = true - } - } -} diff --git a/syft/pkg/cataloger/debian/parse_deb_archive_test.go b/syft/pkg/cataloger/debian/parse_deb_archive_test.go index 4bfc46836..c9baee738 100644 --- a/syft/pkg/cataloger/debian/parse_deb_archive_test.go +++ b/syft/pkg/cataloger/debian/parse_deb_archive_test.go @@ -3,31 +3,58 @@ package debian import ( "archive/tar" "bytes" + "io" "testing" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - - "github.com/anchore/syft/syft/file" - "github.com/anchore/syft/syft/pkg" ) -func TestReadControlFiles(t *testing.T) { +func TestProcessControlTar(t *testing.T) { tarBytes := createTestTarWithControlFiles(t) - tarReader := bytes.NewReader(tarBytes) - reader := tar.NewReader(tarReader) - - controlFile, md5sums, conffiles, err := readControlFiles(reader) + metadata, err := processControlTar(io.NopCloser(bytes.NewReader(tarBytes))) require.NoError(t, err) - assert.NotNil(t, controlFile, "expected control file to be found") - assert.NotNil(t, md5sums, "expected md5sums file to be found") - assert.NotNil(t, conffiles, "expected conffiles file to be found") + require.NotNil(t, metadata) - assert.Contains(t, string(controlFile), "Package: test-package") - assert.Contains(t, string(md5sums), "d41d8cd98f00b204e9800998ecf8427e") - assert.Contains(t, string(conffiles), "/etc/test") + assert.Equal(t, "test-package", metadata.Package) + assert.Equal(t, "1.0.0", metadata.Version) + + // md5sums should have been parsed into file records + require.Len(t, metadata.Files, 1) + assert.Equal(t, "/usr/bin/test-command", metadata.Files[0].Path) + assert.Equal(t, "d41d8cd98f00b204e9800998ecf8427e", metadata.Files[0].Digest.Value) + + // conffiles should have marked config files + assert.True(t, metadata.Files[0].IsConfigFile, "file listed in conffiles should be marked as config") +} + +func TestProcessControlTar_ConfigFileMarking(t *testing.T) { + // Create a tar where conffiles lists paths that overlap with md5sums entries + var buf bytes.Buffer + tw := tar.NewWriter(&buf) + + controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\n" + writeTarEntry(t, tw, "control", controlContent) + + md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n" + + "d41d8cd98f00b204e9800998ecf8427e etc/test/config.conf\n" + + "d41d8cd98f00b204e9800998ecf8427e usr/bin/other-command\n" + writeTarEntry(t, tw, "md5sums", md5Content) + + conffilesContent := "/usr/bin/test-command\n/etc/test/config.conf\n" + writeTarEntry(t, tw, "conffiles", conffilesContent) + + require.NoError(t, tw.Close()) + + metadata, err := processControlTar(io.NopCloser(bytes.NewReader(buf.Bytes()))) + require.NoError(t, err) + require.Len(t, metadata.Files, 3) + + assert.True(t, metadata.Files[0].IsConfigFile, "first file should be marked as config file") + assert.True(t, metadata.Files[1].IsConfigFile, "second file should be marked as config file") + assert.False(t, metadata.Files[2].IsConfigFile, "third file should not be marked as config file") } // createTestTarWithControlFiles creates a simple in-memory tar file with test control files @@ -35,108 +62,26 @@ func createTestTarWithControlFiles(t *testing.T) []byte { var buf bytes.Buffer tw := tar.NewWriter(&buf) - // Add control file - controlContent := `Package: test-package -Version: 1.0.0 -Architecture: all -Maintainer: Test -Description: Test package -` - err := tw.WriteHeader(&tar.Header{ - Name: "control", - Mode: 0644, - Size: int64(len(controlContent)), - }) - require.NoError(t, err) - _, err = tw.Write([]byte(controlContent)) - require.NoError(t, err) + controlContent := "Package: test-package\nVersion: 1.0.0\nArchitecture: all\nMaintainer: Test \nDescription: Test package\n" + writeTarEntry(t, tw, "control", controlContent) - // Add md5sums file md5Content := "d41d8cd98f00b204e9800998ecf8427e usr/bin/test-command\n" - err = tw.WriteHeader(&tar.Header{ - Name: "md5sums", - Mode: 0644, - Size: int64(len(md5Content)), - }) - require.NoError(t, err) - _, err = tw.Write([]byte(md5Content)) - require.NoError(t, err) + writeTarEntry(t, tw, "md5sums", md5Content) - // Add conffiles file - conffilesContent := "/etc/test/config.conf\n" - err = tw.WriteHeader(&tar.Header{ - Name: "conffiles", - Mode: 0644, - Size: int64(len(conffilesContent)), - }) - require.NoError(t, err) - _, err = tw.Write([]byte(conffilesContent)) - require.NoError(t, err) - - // Close the tar writer - err = tw.Close() - require.NoError(t, err) + conffilesContent := "/usr/bin/test-command\n" + writeTarEntry(t, tw, "conffiles", conffilesContent) + require.NoError(t, tw.Close()) return buf.Bytes() } -func TestMarkConfigFiles(t *testing.T) { - // Create test data - conffilesContent := []byte("/usr/bin/test-command\n/etc/test/config.conf\n") - - files := []pkg.DpkgFileRecord{ - { - Path: "/usr/bin/test-command", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "d41d8cd98f00b204e9800998ecf8427e", - }, - }, - { - Path: "/etc/test/config.conf", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "d41d8cd98f00b204e9800998ecf8427e", - }, - }, - { - Path: "/usr/bin/other-command", - Digest: &file.Digest{ - Algorithm: "md5", - Value: "d41d8cd98f00b204e9800998ecf8427e", - }, - }, - } - - markConfigFiles(conffilesContent, files) - - assert.True(t, files[0].IsConfigFile, "first file should be marked as config file") - assert.True(t, files[1].IsConfigFile, "second file should be marked as config file") - assert.False(t, files[2].IsConfigFile, "third file should not be marked as config file") -} - -func TestParseControlFile(t *testing.T) { - controlContent := `Package: test-package -Version: 1.2.3-4 -Architecture: amd64 -Maintainer: Test User -Installed-Size: 1234 -Depends: libc6, libtest -Description: This is a test package - More description text - And even more details -` - - metadata, err := parseControlFile(controlContent) - +func writeTarEntry(t *testing.T, tw *tar.Writer, name, content string) { + t.Helper() + require.NoError(t, tw.WriteHeader(&tar.Header{ + Name: name, + Mode: 0644, + Size: int64(len(content)), + })) + _, err := tw.Write([]byte(content)) require.NoError(t, err) - assert.Equal(t, "test-package", metadata.Package) - assert.Equal(t, "1.2.3-4", metadata.Version) - assert.Equal(t, "amd64", metadata.Architecture) - assert.Equal(t, "Test User ", metadata.Maintainer) - assert.Equal(t, 1234, metadata.InstalledSize) - assert.Contains(t, metadata.Description, "This is a test package") - assert.Len(t, metadata.Depends, 2) - assert.Contains(t, metadata.Depends, "libc6") - assert.Contains(t, metadata.Depends, "libtest") } diff --git a/syft/pkg/cataloger/snap/parse_kernel_changelog.go b/syft/pkg/cataloger/snap/parse_kernel_changelog.go index 43e5b4ab0..5b5a81b4f 100644 --- a/syft/pkg/cataloger/snap/parse_kernel_changelog.go +++ b/syft/pkg/cataloger/snap/parse_kernel_changelog.go @@ -1,10 +1,10 @@ package snap import ( + "bufio" "compress/gzip" "context" "fmt" - "io" "regexp" "strings" @@ -22,16 +22,28 @@ type kernelVersionInfo struct { majorVersion string // e.g., "5.4" } -// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version +// parseKernelChangelog parses changelog files from kernel snaps to extract kernel version. +// The changelog is gzip-compressed and may be very large, so we stream it line-by-line +// rather than reading it entirely into memory. func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - // The file should be gzipped - lines, err := readChangelogLines(reader) + gzReader, err := gzip.NewReader(reader) if err != nil { - return nil, nil, err + return nil, nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err) + } + defer gzReader.Close() + + scanner := bufio.NewScanner(gzReader) + + // read the first line to extract kernel version + // Format: "linux (5.4.0-195.215) focal; urgency=medium" + if !scanner.Scan() { + if err := scanner.Err(); err != nil { + return nil, nil, fmt.Errorf("failed to read changelog content: %w", err) + } + return nil, nil, fmt.Errorf("changelog file is empty") } - // pull from first line - versionInfo, err := extractKernelVersion(lines[0]) + versionInfo, err := extractKernelVersion(scanner.Text()) if err != nil { return nil, nil, err } @@ -42,38 +54,21 @@ func parseKernelChangelog(_ context.Context, _ file.Resolver, _ *generic.Environ packages := createMainKernelPackage(versionInfo, snapMetadata, reader.Location) - // Check for base kernel package - basePackage := findBaseKernelPackage(lines, versionInfo, snapMetadata, reader.Location) - if basePackage != nil { - packages = append(packages, *basePackage) + // stream remaining lines looking for the base kernel entry + baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/")) + for scanner.Scan() { + line := scanner.Text() + if strings.Contains(line, baseKernelEntry) { + if basePackage := parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, reader.Location); basePackage != nil { + packages = append(packages, *basePackage) + } + break + } } return packages, nil, nil } -// readChangelogLines reads and decompresses the changelog content -func readChangelogLines(reader file.LocationReadCloser) ([]string, error) { - gzReader, err := gzip.NewReader(reader) - if err != nil { - return nil, fmt.Errorf("failed to create gzip reader for changelog: %w", err) - } - defer gzReader.Close() - - content, err := io.ReadAll(gzReader) - if err != nil { - return nil, fmt.Errorf("failed to read changelog content: %w", err) - } - - lines := strings.Split(string(content), "\n") - if len(lines) == 0 { - return nil, fmt.Errorf("changelog file is empty") - } - - // Parse the first line to extract kernel version information - // Format: "linux (5.4.0-195.215) focal; urgency=medium" - return lines, nil -} - // extractKernelVersion parses version information from the first changelog line func extractKernelVersion(firstLine string) (*kernelVersionInfo, error) { // Format: "linux (5.4.0-195.215) focal; urgency=medium" @@ -117,19 +112,6 @@ func createMainKernelPackage(versionInfo *kernelVersionInfo, snapMetadata pkg.Sn return []pkg.Package{kernelPkg} } -// findBaseKernelPackage searches for and creates base kernel package if present -func findBaseKernelPackage(lines []string, versionInfo *kernelVersionInfo, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package { - baseKernelEntry := fmt.Sprintf("%s/linux:", strings.ReplaceAll(versionInfo.releaseVersion, ";", "/")) - - for _, line := range lines { - if strings.Contains(line, baseKernelEntry) { - return parseBaseKernelLine(line, versionInfo.majorVersion, snapMetadata, location) - } - } - - return nil -} - // parseBaseKernelLine extracts base kernel version from a changelog line func parseBaseKernelLine(line string, majorVersion string, snapMetadata pkg.SnapEntry, location file.Location) *pkg.Package { baseKernelRegex := regexp.MustCompile(fmt.Sprintf(`(%s-[0-9]+)\.?[0-9]*`, regexp.QuoteMeta(majorVersion))) diff --git a/syft/pkg/cataloger/snap/parse_kernel_changelog_test.go b/syft/pkg/cataloger/snap/parse_kernel_changelog_test.go new file mode 100644 index 000000000..3f564234a --- /dev/null +++ b/syft/pkg/cataloger/snap/parse_kernel_changelog_test.go @@ -0,0 +1,312 @@ +package snap + +import ( + "bytes" + "compress/gzip" + "context" + "io" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/anchore/syft/syft/file" + "github.com/anchore/syft/syft/pkg" + "github.com/anchore/syft/syft/pkg/cataloger/generic" +) + +func gzipContent(t *testing.T, content string) []byte { + t.Helper() + var buf bytes.Buffer + w := gzip.NewWriter(&buf) + _, err := w.Write([]byte(content)) + require.NoError(t, err) + require.NoError(t, w.Close()) + return buf.Bytes() +} + +func locationReadCloser(t *testing.T, data []byte) file.LocationReadCloser { + t.Helper() + return file.LocationReadCloser{ + Location: file.NewLocation("test-fixtures/changelog.Debian.gz"), + ReadCloser: io.NopCloser(bytes.NewReader(data)), + } +} + +func TestExtractKernelVersion(t *testing.T) { + tests := []struct { + name string + firstLine string + expected *kernelVersionInfo + expectError string + }{ + { + name: "standard focal kernel", + firstLine: "linux (5.4.0-195.215) focal; urgency=medium", + expected: &kernelVersionInfo{ + baseVersion: "5.4.0-195", + releaseVersion: "215", + fullVersion: "5.4.0-195.215", + majorVersion: "5.4", + }, + }, + { + name: "noble kernel 6.x", + firstLine: "linux (6.8.0-50.51) noble; urgency=medium", + expected: &kernelVersionInfo{ + baseVersion: "6.8.0-50", + releaseVersion: "51", + fullVersion: "6.8.0-50.51", + majorVersion: "6.8", + }, + }, + { + name: "jammy kernel", + firstLine: "linux (5.15.0-130.140) jammy; urgency=medium", + expected: &kernelVersionInfo{ + baseVersion: "5.15.0-130", + releaseVersion: "140", + fullVersion: "5.15.0-130.140", + majorVersion: "5.15", + }, + }, + { + name: "empty string", + firstLine: "", + expectError: "could not parse kernel version from changelog", + }, + { + name: "no version match", + firstLine: "not a valid changelog line", + expectError: "could not parse kernel version from changelog", + }, + { + name: "missing release version", + firstLine: "linux (5.4.0-195) focal; urgency=medium", + expectError: "could not parse kernel version from changelog", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := extractKernelVersion(tt.firstLine) + if tt.expectError != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), tt.expectError) + return + } + require.NoError(t, err) + assert.Equal(t, tt.expected.baseVersion, result.baseVersion) + assert.Equal(t, tt.expected.releaseVersion, result.releaseVersion) + assert.Equal(t, tt.expected.fullVersion, result.fullVersion) + assert.Equal(t, tt.expected.majorVersion, result.majorVersion) + }) + } +} + +func TestCreateMainKernelPackage(t *testing.T) { + location := file.NewLocation("test-fixtures/changelog.Debian.gz") + versionInfo := &kernelVersionInfo{ + baseVersion: "5.4.0-195", + releaseVersion: "215", + fullVersion: "5.4.0-195.215", + majorVersion: "5.4", + } + snapMetadata := pkg.SnapEntry{ + SnapType: pkg.SnapTypeKernel, + } + + packages := createMainKernelPackage(versionInfo, snapMetadata, location) + + require.Len(t, packages, 1) + p := packages[0] + assert.Equal(t, "linux-image-5.4.0-195-generic", p.Name) + assert.Equal(t, "5.4.0-195.215", p.Version) + assert.Equal(t, pkg.DebPkg, p.Type) + + metadata, ok := p.Metadata.(pkg.SnapEntry) + require.True(t, ok) + assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType) +} + +func TestParseBaseKernelLine(t *testing.T) { + location := file.NewLocation("test-fixtures/changelog.Debian.gz") + snapMetadata := pkg.SnapEntry{ + SnapType: pkg.SnapTypeKernel, + } + + tests := []struct { + name string + line string + majorVersion string + expectNil bool + expectedName string + expectedVer string + }{ + { + name: "standard base kernel entry", + line: " [ Ubuntu: 5.4-100.200 ]", + majorVersion: "5.4", + expectedName: "linux-image-5.4-100-generic", + expectedVer: "5.4-100.200", + }, + { + name: "6.x base kernel entry", + line: " [ Ubuntu: 6.8-40.41 ]", + majorVersion: "6.8", + expectedName: "linux-image-6.8-40-generic", + expectedVer: "6.8-40.41", + }, + { + name: "no matching version", + line: " * some random changelog text here", + majorVersion: "5.4", + expectNil: true, + }, + { + name: "empty line", + line: "", + majorVersion: "5.4", + expectNil: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := parseBaseKernelLine(tt.line, tt.majorVersion, snapMetadata, location) + if tt.expectNil { + assert.Nil(t, result) + return + } + require.NotNil(t, result) + assert.Equal(t, tt.expectedName, result.Name) + assert.Equal(t, tt.expectedVer, result.Version) + assert.Equal(t, pkg.DebPkg, result.Type) + + metadata, ok := result.Metadata.(pkg.SnapEntry) + require.True(t, ok) + assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType) + }) + } +} + +func TestParseKernelChangelog(t *testing.T) { + // Realistic changelog content modeled on Ubuntu kernel changelogs. + // The first line declares the patched kernel version. + // Somewhere later a line references the base upstream kernel. + fullChangelog := strings.Join([]string{ + "linux (5.4.0-195.215) focal; urgency=medium", + "", + " * focal/linux: 5.4.0-195.215 -proposed tracker (LP: #2083390)", + "", + " [ Ubuntu: 5.4-100.200 ]", + "", + " * Some other entry", + "", + " -- Ubuntu Kernel Team Mon, 01 Jan 2024 00:00:00 +0000", + "", + }, "\n") + + // Changelog where the base kernel entry line uses the release version pattern + // The code builds: fmt.Sprintf("%s/linux:", releaseVersion) → "215/linux:" + changelogWithBaseEntry := strings.Join([]string{ + "linux (5.4.0-195.215) focal; urgency=medium", + "", + " * focal/linux: 5.4.0-195.215 -proposed tracker", + "", + " 215/linux: 5.4-100.200 base entry", + "", + " -- Ubuntu Kernel Team Mon, 01 Jan 2024 00:00:00 +0000", + "", + }, "\n") + + // Changelog with only the header line and no base kernel match + minimalChangelog := "linux (6.8.0-50.51) noble; urgency=medium\n" + + tests := []struct { + name string + input []byte + expectedCount int + expectedNames []string + expectedVers []string + expectError bool + errorContains string + }{ + { + name: "full changelog with base kernel via release version pattern", + input: gzipContent(t, changelogWithBaseEntry), + expectedCount: 2, + expectedNames: []string{"linux-image-5.4.0-195-generic", "linux-image-5.4-100-generic"}, + expectedVers: []string{"5.4.0-195.215", "5.4-100.200"}, + }, + { + name: "changelog without base kernel match returns only main package", + input: gzipContent(t, minimalChangelog), + expectedCount: 1, + expectedNames: []string{"linux-image-6.8.0-50-generic"}, + expectedVers: []string{"6.8.0-50.51"}, + }, + { + name: "full changelog without matching release version pattern returns only main package", + input: gzipContent(t, fullChangelog), + expectedCount: 1, + expectedNames: []string{"linux-image-5.4.0-195-generic"}, + expectedVers: []string{"5.4.0-195.215"}, + }, + { + name: "invalid gzip data", + input: []byte("not gzip data"), + expectError: true, + errorContains: "failed to create gzip reader", + }, + { + // The old (slurp) implementation produces "could not parse kernel version" + // because strings.Split("", "\n") yields [""], not an empty slice. + // The new (streaming) implementation produces "changelog file is empty" + // because bufio.Scanner.Scan() returns false immediately. + // Both correctly reject empty content; only the message differs. + name: "empty gzip content", + input: gzipContent(t, ""), + expectError: true, + }, + { + name: "gzip content with unparseable first line", + input: gzipContent(t, "this is not a valid kernel changelog\n"), + expectError: true, + errorContains: "could not parse kernel version from changelog", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reader := locationReadCloser(t, tt.input) + + packages, relationships, err := parseKernelChangelog( + context.Background(), nil, &generic.Environment{}, reader, + ) + + if tt.expectError { + require.Error(t, err) + if tt.errorContains != "" { + assert.Contains(t, err.Error(), tt.errorContains) + } + return + } + + require.NoError(t, err) + assert.Nil(t, relationships) + require.Len(t, packages, tt.expectedCount) + + for i, p := range packages { + assert.Equal(t, tt.expectedNames[i], p.Name, "package %d name", i) + assert.Equal(t, tt.expectedVers[i], p.Version, "package %d version", i) + assert.Equal(t, pkg.DebPkg, p.Type, "package %d type", i) + + metadata, ok := p.Metadata.(pkg.SnapEntry) + require.True(t, ok, "package %d metadata type", i) + assert.Equal(t, pkg.SnapTypeKernel, metadata.SnapType, "package %d snap type", i) + } + }) + } +} diff --git a/syft/source/filesource/file_source.go b/syft/source/filesource/file_source.go index 3e3ca71ba..1c49e4905 100644 --- a/syft/source/filesource/file_source.go +++ b/syft/source/filesource/file_source.go @@ -76,6 +76,11 @@ func New(cfg Config) (source.Source, error) { analysisPath, cleanupFn, err := fileAnalysisPath(cfg.Path, cfg.SkipExtractArchive) if err != nil { + if cleanupFn != nil { + if cleanupErr := cleanupFn(); cleanupErr != nil { + log.Warnf("failed to cleanup temporary directory: %v", cleanupErr) + } + } return nil, fmt.Errorf("unable to extract file analysis path=%q: %w", cfg.Path, err) } @@ -211,7 +216,7 @@ func fileAnalysisPath(path string, skipExtractArchive bool) (string, func() erro if unarchiver, ok := envelopedUnarchiver.(archives.Extractor); err == nil && ok { analysisPath, cleanupFn, err = unarchiveToTmp(path, unarchiver) if err != nil { - return "", nil, fmt.Errorf("unable to unarchive source file: %w", err) + return "", cleanupFn, fmt.Errorf("unable to unarchive source file: %w", err) } log.Debugf("source path is an archive")