fix: java archives excluded due to incorrect license glob results (#4449)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2025-12-08 15:58:13 -05:00 committed by GitHub
parent d950ac1fae
commit 9e3150b7ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 47 additions and 128 deletions

View File

@ -1,46 +0,0 @@
package file
// GlobMatch evaluates the given glob pattern against the given "name" string, indicating if there is a match or not.
// Source: https://research.swtch.com/glob.go
func GlobMatch(pattern, name string) bool {
px := 0
nx := 0
nextPx := 0
nextNx := 0
for px < len(pattern) || nx < len(name) {
if px < len(pattern) {
c := pattern[px]
switch c {
default: // ordinary character
if nx < len(name) && name[nx] == c {
px++
nx++
continue
}
case '?': // single-character wildcard
if nx < len(name) {
px++
nx++
continue
}
case '*': // zero-or-more-character wildcard
// Try to match at nx.
// If that doesn't work out,
// restart at nx+1 next.
nextPx = px
nextNx = nx + 1
px++
continue
}
}
// Mismatch. Maybe restart.
if 0 < nextNx && nextNx <= len(name) {
px = nextPx
nx = nextNx
continue
}
return false
}
// Matched all of pattern to all of name. Success.
return true
}

View File

@ -1,39 +0,0 @@
package file
import (
"strings"
"testing"
)
func TestGlobMatch(t *testing.T) {
var tests = []struct {
pattern string
data string
ok bool
}{
{"", "", true},
{"x", "", false},
{"", "x", false},
{"abc", "abc", true},
{"*", "abc", true},
{"*c", "abc", true},
{"*b", "abc", false},
{"a*", "abc", true},
{"b*", "abc", false},
{"a*", "a", true},
{"*a", "a", true},
{"a*b*c*d*e*", "axbxcxdxe", true},
{"a*b*c*d*e*", "axbxcxdxexxx", true},
{"a*b?c*x", "abxbbxdbxebxczzx", true},
{"a*b?c*x", "abxbbxdbxebxczzy", false},
{"a*a*a*a*b", strings.Repeat("a", 100), false},
{"*x", "xxx", true},
{"/home/place/**", "/home/place/a/thing", true},
}
for _, test := range tests {
if GlobMatch(test.pattern, test.data) != test.ok {
t.Errorf("failed glob='%s' data='%s'", test.pattern, test.data)
}
}
}

View File

@ -0,0 +1 @@
this file is in a subdirectory

View File

@ -7,14 +7,14 @@ import (
"path/filepath" "path/filepath"
"syscall" "syscall"
"testing" "testing"
"github.com/stretchr/testify/assert"
) )
var expectedZipArchiveEntries = []string{ var expectedZipArchiveEntries = []string{
"some-dir" + string(os.PathSeparator), "some-dir/",
filepath.Join("some-dir", "a-file.txt"), "some-dir/a-file.txt",
"b-file.txt", "b-file.txt",
"b-file/",
"b-file/in-subdir.txt",
"nested.zip", "nested.zip",
} }
@ -59,12 +59,6 @@ func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string
} }
func assertNoError(t testing.TB, fn func() error) func() {
return func() {
assert.NoError(t, fn())
}
}
// setupZipFileTest encapsulates common test setup work for zip file tests. It returns a cleanup function, // setupZipFileTest encapsulates common test setup work for zip file tests. It returns a cleanup function,
// which should be called (typically deferred) by the caller, the path of the created zip archive, and an error, // which should be called (typically deferred) by the caller, the path of the created zip archive, and an error,
// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil // which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil

View File

@ -6,6 +6,7 @@ import (
"sort" "sort"
"strings" "strings"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives" "github.com/mholt/archives"
"github.com/scylladb/go-set/strset" "github.com/scylladb/go-set/strset"
@ -44,12 +45,17 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
z[entry] = info z[entry] = info
} }
// GlobMatch returns the path keys that match the given value(s). // GlobMatch returns the path keys to files (not directories) that match the given value(s).
func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string { func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string {
uniqueMatches := strset.New() uniqueMatches := strset.New()
for _, pattern := range patterns { for _, pattern := range patterns {
for entry := range z { for entry := range z {
fileInfo := z[entry]
if fileInfo != nil && fileInfo.IsDir() {
continue
}
// We want to match globs as if entries begin with a leading slash (akin to an absolute path) // We want to match globs as if entries begin with a leading slash (akin to an absolute path)
// so that glob logic is consistent inside and outside of ZIP archives // so that glob logic is consistent inside and outside of ZIP archives
normalizedEntry := normalizeZipEntryName(caseInsensitive, entry) normalizedEntry := normalizeZipEntryName(caseInsensitive, entry)
@ -57,7 +63,13 @@ func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []s
if caseInsensitive { if caseInsensitive {
pattern = strings.ToLower(pattern) pattern = strings.ToLower(pattern)
} }
if GlobMatch(pattern, normalizedEntry) {
matches, err := doublestar.Match(pattern, normalizedEntry)
if err != nil {
log.Debugf("error with match pattern '%s', including by default: %v", pattern, err)
matches = true
}
if matches {
uniqueMatches.Add(entry) uniqueMatches.Add(entry)
} }
} }

View File

@ -9,6 +9,8 @@ import (
"os" "os"
"path" "path"
"testing" "testing"
"github.com/stretchr/testify/require"
) )
func TestNewZipFileManifest(t *testing.T) { func TestNewZipFileManifest(t *testing.T) {
@ -107,23 +109,27 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
cases := []struct { cases := []struct {
glob string glob string
expected string expected []string
}{ }{
{ {
"/b*", "/b*",
"b-file.txt", []string{"b-file.txt"},
}, },
{ {
"*/a-file.txt", "/b*/**",
"some-dir/a-file.txt", []string{"b-file.txt", "b-file/in-subdir.txt"},
}, },
{ {
"*/A-file.txt", "**/a-file.txt",
"some-dir/a-file.txt", []string{"some-dir/a-file.txt"},
},
{
"**/A-file.txt",
[]string{"some-dir/a-file.txt"},
}, },
{ {
"**/*.zip", "**/*.zip",
"nested.zip", []string{"nested.zip"},
}, },
} }
@ -133,11 +139,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
results := z.GlobMatch(true, glob) results := z.GlobMatch(true, glob)
if len(results) == 1 && results[0] == tc.expected { require.ElementsMatch(t, tc.expected, results)
return
}
t.Errorf("unexpected results for glob '%s': %+v", glob, results)
}) })
} }
} }

View File

@ -184,7 +184,7 @@ func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pk
relationships = append(relationships, nestedRelationships...) relationships = append(relationships, nestedRelationships...)
} else { } else {
// .jar and .war files are present in archives, are others? or generally just consider them top-level? // .jar and .war files are present in archives, are others? or generally just consider them top-level?
nestedArchives := j.fileManifest.GlobMatch(true, "*.jar", "*.war") nestedArchives := j.fileManifest.GlobMatch(true, "**/*.jar", "**/*.war")
if len(nestedArchives) > 0 { if len(nestedArchives) > 0 {
slices.Sort(nestedArchives) slices.Sort(nestedArchives)
errs = unknown.Appendf(errs, j.location, "nested archives not cataloged: %v", strings.Join(nestedArchives, ", ")) errs = unknown.Appendf(errs, j.location, "nested archives not cataloged: %v", strings.Join(nestedArchives, ", "))
@ -252,10 +252,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
return nil, err return nil, err
} }
name, version, lics, parsedPom, err := j.discoverNameVersionLicense(ctx, manifest) name, version, lics, parsedPom := j.discoverNameVersionLicense(ctx, manifest)
if err != nil {
return nil, err
}
var pkgPomProject *pkg.JavaPomProject var pkgPomProject *pkg.JavaPomProject
if parsedPom != nil { if parsedPom != nil {
pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project) pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project)
@ -280,7 +277,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
}, nil }, nil
} }
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject, error) { func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject) {
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest // we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar // TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
lics := pkg.NewLicensesFromLocationWithContext(ctx, j.location, selectLicenses(manifest)...) lics := pkg.NewLicensesFromLocationWithContext(ctx, j.location, selectLicenses(manifest)...)
@ -300,10 +297,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
} }
if len(lics) == 0 { if len(lics) == 0 {
fileLicenses, err := j.getLicenseFromFileInArchive(ctx) fileLicenses := j.getLicenseFromFileInArchive(ctx)
if err != nil {
return "", "", nil, parsedPom, err
}
if fileLicenses != nil { if fileLicenses != nil {
lics = append(lics, fileLicenses...) lics = append(lics, fileLicenses...)
} }
@ -317,7 +311,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest) lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
} }
return artifactID, version, lics, parsedPom, nil return artifactID, version, lics, parsedPom
} }
// findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info // findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info
@ -562,7 +556,7 @@ func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Dige
return digests, nil return digests, nil
} }
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.License, error) { func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) []pkg.License {
// prefer identified licenses, fall back to unknown // prefer identified licenses, fall back to unknown
var identified []pkg.License var identified []pkg.License
var unidentified []pkg.License var unidentified []pkg.License
@ -578,7 +572,8 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
if len(licenseMatches) > 0 { if len(licenseMatches) > 0 {
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...) contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
if err != nil { if err != nil {
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err) log.Debugf("unable to extract java license (%s): %w", j.location, err)
continue
} }
for _, licenseMatch := range licenseMatches { for _, licenseMatch := range licenseMatches {
@ -602,10 +597,10 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
} }
if len(identified) == 0 { if len(identified) == 0 {
return unidentified, nil return unidentified
} }
return identified, nil return identified
} }
func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) { func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {

View File

@ -11,7 +11,7 @@ import (
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
) )
const pomPropertiesGlob = "*pom.properties" const pomPropertiesGlob = "**/*pom.properties"
func parsePomProperties(path string, reader io.Reader) (*pkg.JavaPomProperties, error) { func parsePomProperties(path string, reader io.Reader) (*pkg.JavaPomProperties, error) {
var props pkg.JavaPomProperties var props pkg.JavaPomProperties

View File

@ -16,7 +16,7 @@ import (
) )
const ( const (
pomXMLGlob = "*pom.xml" pomXMLGlob = "**/*pom.xml"
pomCatalogerName = "java-pom-cataloger" pomCatalogerName = "java-pom-cataloger"
) )

View File

@ -6,9 +6,9 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/bmatcuk/doublestar/v4"
"github.com/go-viper/mapstructure/v2" "github.com/go-viper/mapstructure/v2"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg"
@ -141,7 +141,7 @@ func getFieldType(key, in string) any {
// of egg metadata (as opposed to a directory that contains more metadata // of egg metadata (as opposed to a directory that contains more metadata
// files). // files).
func isEggRegularFile(path string) bool { func isEggRegularFile(path string) bool {
return intFile.GlobMatch(eggInfoGlob, path) return doublestar.MatchUnvalidated(eggInfoGlob, path)
} }
// determineSitePackagesRootPath returns the path of the site packages root, // determineSitePackagesRootPath returns the path of the site packages root,