fix: java archives excluded due to incorrect license glob results (#4449)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2025-12-08 15:58:13 -05:00 committed by GitHub
parent d950ac1fae
commit 9e3150b7ee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 47 additions and 128 deletions

View File

@ -1,46 +0,0 @@
package file
// GlobMatch evaluates the given glob pattern against the given "name" string, indicating if there is a match or not.
// Source: https://research.swtch.com/glob.go
func GlobMatch(pattern, name string) bool {
px := 0
nx := 0
nextPx := 0
nextNx := 0
for px < len(pattern) || nx < len(name) {
if px < len(pattern) {
c := pattern[px]
switch c {
default: // ordinary character
if nx < len(name) && name[nx] == c {
px++
nx++
continue
}
case '?': // single-character wildcard
if nx < len(name) {
px++
nx++
continue
}
case '*': // zero-or-more-character wildcard
// Try to match at nx.
// If that doesn't work out,
// restart at nx+1 next.
nextPx = px
nextNx = nx + 1
px++
continue
}
}
// Mismatch. Maybe restart.
if 0 < nextNx && nextNx <= len(name) {
px = nextPx
nx = nextNx
continue
}
return false
}
// Matched all of pattern to all of name. Success.
return true
}

View File

@ -1,39 +0,0 @@
package file
import (
"strings"
"testing"
)
func TestGlobMatch(t *testing.T) {
var tests = []struct {
pattern string
data string
ok bool
}{
{"", "", true},
{"x", "", false},
{"", "x", false},
{"abc", "abc", true},
{"*", "abc", true},
{"*c", "abc", true},
{"*b", "abc", false},
{"a*", "abc", true},
{"b*", "abc", false},
{"a*", "a", true},
{"*a", "a", true},
{"a*b*c*d*e*", "axbxcxdxe", true},
{"a*b*c*d*e*", "axbxcxdxexxx", true},
{"a*b?c*x", "abxbbxdbxebxczzx", true},
{"a*b?c*x", "abxbbxdbxebxczzy", false},
{"a*a*a*a*b", strings.Repeat("a", 100), false},
{"*x", "xxx", true},
{"/home/place/**", "/home/place/a/thing", true},
}
for _, test := range tests {
if GlobMatch(test.pattern, test.data) != test.ok {
t.Errorf("failed glob='%s' data='%s'", test.pattern, test.data)
}
}
}

View File

@ -0,0 +1 @@
this file is in a subdirectory

View File

@ -7,14 +7,14 @@ import (
"path/filepath"
"syscall"
"testing"
"github.com/stretchr/testify/assert"
)
var expectedZipArchiveEntries = []string{
"some-dir" + string(os.PathSeparator),
filepath.Join("some-dir", "a-file.txt"),
"some-dir/",
"some-dir/a-file.txt",
"b-file.txt",
"b-file/",
"b-file/in-subdir.txt",
"nested.zip",
}
@ -59,12 +59,6 @@ func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string
}
func assertNoError(t testing.TB, fn func() error) func() {
return func() {
assert.NoError(t, fn())
}
}
// setupZipFileTest encapsulates common test setup work for zip file tests. It returns a cleanup function,
// which should be called (typically deferred) by the caller, the path of the created zip archive, and an error,
// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil

View File

@ -6,6 +6,7 @@ import (
"sort"
"strings"
"github.com/bmatcuk/doublestar/v4"
"github.com/mholt/archives"
"github.com/scylladb/go-set/strset"
@ -44,12 +45,17 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
z[entry] = info
}
// GlobMatch returns the path keys that match the given value(s).
// GlobMatch returns the path keys to files (not directories) that match the given value(s).
func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string {
uniqueMatches := strset.New()
for _, pattern := range patterns {
for entry := range z {
fileInfo := z[entry]
if fileInfo != nil && fileInfo.IsDir() {
continue
}
// We want to match globs as if entries begin with a leading slash (akin to an absolute path)
// so that glob logic is consistent inside and outside of ZIP archives
normalizedEntry := normalizeZipEntryName(caseInsensitive, entry)
@ -57,7 +63,13 @@ func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []s
if caseInsensitive {
pattern = strings.ToLower(pattern)
}
if GlobMatch(pattern, normalizedEntry) {
matches, err := doublestar.Match(pattern, normalizedEntry)
if err != nil {
log.Debugf("error with match pattern '%s', including by default: %v", pattern, err)
matches = true
}
if matches {
uniqueMatches.Add(entry)
}
}

View File

@ -9,6 +9,8 @@ import (
"os"
"path"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewZipFileManifest(t *testing.T) {
@ -107,23 +109,27 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
cases := []struct {
glob string
expected string
expected []string
}{
{
"/b*",
"b-file.txt",
[]string{"b-file.txt"},
},
{
"*/a-file.txt",
"some-dir/a-file.txt",
"/b*/**",
[]string{"b-file.txt", "b-file/in-subdir.txt"},
},
{
"*/A-file.txt",
"some-dir/a-file.txt",
"**/a-file.txt",
[]string{"some-dir/a-file.txt"},
},
{
"**/A-file.txt",
[]string{"some-dir/a-file.txt"},
},
{
"**/*.zip",
"nested.zip",
[]string{"nested.zip"},
},
}
@ -133,11 +139,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
results := z.GlobMatch(true, glob)
if len(results) == 1 && results[0] == tc.expected {
return
}
t.Errorf("unexpected results for glob '%s': %+v", glob, results)
require.ElementsMatch(t, tc.expected, results)
})
}
}

View File

@ -184,7 +184,7 @@ func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pk
relationships = append(relationships, nestedRelationships...)
} else {
// .jar and .war files are present in archives, are others? or generally just consider them top-level?
nestedArchives := j.fileManifest.GlobMatch(true, "*.jar", "*.war")
nestedArchives := j.fileManifest.GlobMatch(true, "**/*.jar", "**/*.war")
if len(nestedArchives) > 0 {
slices.Sort(nestedArchives)
errs = unknown.Appendf(errs, j.location, "nested archives not cataloged: %v", strings.Join(nestedArchives, ", "))
@ -252,10 +252,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
return nil, err
}
name, version, lics, parsedPom, err := j.discoverNameVersionLicense(ctx, manifest)
if err != nil {
return nil, err
}
name, version, lics, parsedPom := j.discoverNameVersionLicense(ctx, manifest)
var pkgPomProject *pkg.JavaPomProject
if parsedPom != nil {
pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project)
@ -280,7 +277,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
}, nil
}
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject, error) {
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject) {
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
lics := pkg.NewLicensesFromLocationWithContext(ctx, j.location, selectLicenses(manifest)...)
@ -300,10 +297,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
}
if len(lics) == 0 {
fileLicenses, err := j.getLicenseFromFileInArchive(ctx)
if err != nil {
return "", "", nil, parsedPom, err
}
fileLicenses := j.getLicenseFromFileInArchive(ctx)
if fileLicenses != nil {
lics = append(lics, fileLicenses...)
}
@ -317,7 +311,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
}
return artifactID, version, lics, parsedPom, nil
return artifactID, version, lics, parsedPom
}
// findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info
@ -562,7 +556,7 @@ func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Dige
return digests, nil
}
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.License, error) {
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) []pkg.License {
// prefer identified licenses, fall back to unknown
var identified []pkg.License
var unidentified []pkg.License
@ -578,7 +572,8 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
if len(licenseMatches) > 0 {
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
if err != nil {
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
log.Debugf("unable to extract java license (%s): %w", j.location, err)
continue
}
for _, licenseMatch := range licenseMatches {
@ -602,10 +597,10 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
}
if len(identified) == 0 {
return unidentified, nil
return unidentified
}
return identified, nil
return identified
}
func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {

View File

@ -11,7 +11,7 @@ import (
"github.com/anchore/syft/syft/pkg"
)
const pomPropertiesGlob = "*pom.properties"
const pomPropertiesGlob = "**/*pom.properties"
func parsePomProperties(path string, reader io.Reader) (*pkg.JavaPomProperties, error) {
var props pkg.JavaPomProperties

View File

@ -16,7 +16,7 @@ import (
)
const (
pomXMLGlob = "*pom.xml"
pomXMLGlob = "**/*pom.xml"
pomCatalogerName = "java-pom-cataloger"
)

View File

@ -6,9 +6,9 @@ import (
"path/filepath"
"strings"
"github.com/bmatcuk/doublestar/v4"
"github.com/go-viper/mapstructure/v2"
intFile "github.com/anchore/syft/internal/file"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
@ -141,7 +141,7 @@ func getFieldType(key, in string) any {
// of egg metadata (as opposed to a directory that contains more metadata
// files).
func isEggRegularFile(path string) bool {
return intFile.GlobMatch(eggInfoGlob, path)
return doublestar.MatchUnvalidated(eggInfoGlob, path)
}
// determineSitePackagesRootPath returns the path of the site packages root,