mirror of
https://github.com/anchore/syft.git
synced 2026-02-12 10:36:45 +01:00
fix: java archives excluded due to incorrect license glob results (#4449)
Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
parent
d950ac1fae
commit
9e3150b7ee
@ -1,46 +0,0 @@
|
||||
package file
|
||||
|
||||
// GlobMatch evaluates the given glob pattern against the given "name" string, indicating if there is a match or not.
|
||||
// Source: https://research.swtch.com/glob.go
|
||||
func GlobMatch(pattern, name string) bool {
|
||||
px := 0
|
||||
nx := 0
|
||||
nextPx := 0
|
||||
nextNx := 0
|
||||
for px < len(pattern) || nx < len(name) {
|
||||
if px < len(pattern) {
|
||||
c := pattern[px]
|
||||
switch c {
|
||||
default: // ordinary character
|
||||
if nx < len(name) && name[nx] == c {
|
||||
px++
|
||||
nx++
|
||||
continue
|
||||
}
|
||||
case '?': // single-character wildcard
|
||||
if nx < len(name) {
|
||||
px++
|
||||
nx++
|
||||
continue
|
||||
}
|
||||
case '*': // zero-or-more-character wildcard
|
||||
// Try to match at nx.
|
||||
// If that doesn't work out,
|
||||
// restart at nx+1 next.
|
||||
nextPx = px
|
||||
nextNx = nx + 1
|
||||
px++
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Mismatch. Maybe restart.
|
||||
if 0 < nextNx && nextNx <= len(name) {
|
||||
px = nextPx
|
||||
nx = nextNx
|
||||
continue
|
||||
}
|
||||
return false
|
||||
}
|
||||
// Matched all of pattern to all of name. Success.
|
||||
return true
|
||||
}
|
||||
@ -1,39 +0,0 @@
|
||||
package file
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGlobMatch(t *testing.T) {
|
||||
var tests = []struct {
|
||||
pattern string
|
||||
data string
|
||||
ok bool
|
||||
}{
|
||||
{"", "", true},
|
||||
{"x", "", false},
|
||||
{"", "x", false},
|
||||
{"abc", "abc", true},
|
||||
{"*", "abc", true},
|
||||
{"*c", "abc", true},
|
||||
{"*b", "abc", false},
|
||||
{"a*", "abc", true},
|
||||
{"b*", "abc", false},
|
||||
{"a*", "a", true},
|
||||
{"*a", "a", true},
|
||||
{"a*b*c*d*e*", "axbxcxdxe", true},
|
||||
{"a*b*c*d*e*", "axbxcxdxexxx", true},
|
||||
{"a*b?c*x", "abxbbxdbxebxczzx", true},
|
||||
{"a*b?c*x", "abxbbxdbxebxczzy", false},
|
||||
{"a*a*a*a*b", strings.Repeat("a", 100), false},
|
||||
{"*x", "xxx", true},
|
||||
{"/home/place/**", "/home/place/a/thing", true},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
if GlobMatch(test.pattern, test.data) != test.ok {
|
||||
t.Errorf("failed glob='%s' data='%s'", test.pattern, test.data)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1 @@
|
||||
this file is in a subdirectory
|
||||
@ -7,14 +7,14 @@ import (
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
var expectedZipArchiveEntries = []string{
|
||||
"some-dir" + string(os.PathSeparator),
|
||||
filepath.Join("some-dir", "a-file.txt"),
|
||||
"some-dir/",
|
||||
"some-dir/a-file.txt",
|
||||
"b-file.txt",
|
||||
"b-file/",
|
||||
"b-file/in-subdir.txt",
|
||||
"nested.zip",
|
||||
}
|
||||
|
||||
@ -59,12 +59,6 @@ func createZipArchive(t testing.TB, sourceDirPath, destinationArchivePath string
|
||||
|
||||
}
|
||||
|
||||
func assertNoError(t testing.TB, fn func() error) func() {
|
||||
return func() {
|
||||
assert.NoError(t, fn())
|
||||
}
|
||||
}
|
||||
|
||||
// setupZipFileTest encapsulates common test setup work for zip file tests. It returns a cleanup function,
|
||||
// which should be called (typically deferred) by the caller, the path of the created zip archive, and an error,
|
||||
// which should trigger a fatal test failure in the consuming test. The returned cleanup function will never be nil
|
||||
|
||||
@ -6,6 +6,7 @@ import (
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/mholt/archives"
|
||||
"github.com/scylladb/go-set/strset"
|
||||
|
||||
@ -44,12 +45,17 @@ func (z ZipFileManifest) Add(entry string, info os.FileInfo) {
|
||||
z[entry] = info
|
||||
}
|
||||
|
||||
// GlobMatch returns the path keys that match the given value(s).
|
||||
// GlobMatch returns the path keys to files (not directories) that match the given value(s).
|
||||
func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []string {
|
||||
uniqueMatches := strset.New()
|
||||
|
||||
for _, pattern := range patterns {
|
||||
for entry := range z {
|
||||
fileInfo := z[entry]
|
||||
if fileInfo != nil && fileInfo.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// We want to match globs as if entries begin with a leading slash (akin to an absolute path)
|
||||
// so that glob logic is consistent inside and outside of ZIP archives
|
||||
normalizedEntry := normalizeZipEntryName(caseInsensitive, entry)
|
||||
@ -57,7 +63,13 @@ func (z ZipFileManifest) GlobMatch(caseInsensitive bool, patterns ...string) []s
|
||||
if caseInsensitive {
|
||||
pattern = strings.ToLower(pattern)
|
||||
}
|
||||
if GlobMatch(pattern, normalizedEntry) {
|
||||
|
||||
matches, err := doublestar.Match(pattern, normalizedEntry)
|
||||
if err != nil {
|
||||
log.Debugf("error with match pattern '%s', including by default: %v", pattern, err)
|
||||
matches = true
|
||||
}
|
||||
if matches {
|
||||
uniqueMatches.Add(entry)
|
||||
}
|
||||
}
|
||||
|
||||
@ -9,6 +9,8 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewZipFileManifest(t *testing.T) {
|
||||
@ -107,23 +109,27 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
|
||||
|
||||
cases := []struct {
|
||||
glob string
|
||||
expected string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
"/b*",
|
||||
"b-file.txt",
|
||||
[]string{"b-file.txt"},
|
||||
},
|
||||
{
|
||||
"*/a-file.txt",
|
||||
"some-dir/a-file.txt",
|
||||
"/b*/**",
|
||||
[]string{"b-file.txt", "b-file/in-subdir.txt"},
|
||||
},
|
||||
{
|
||||
"*/A-file.txt",
|
||||
"some-dir/a-file.txt",
|
||||
"**/a-file.txt",
|
||||
[]string{"some-dir/a-file.txt"},
|
||||
},
|
||||
{
|
||||
"**/A-file.txt",
|
||||
[]string{"some-dir/a-file.txt"},
|
||||
},
|
||||
{
|
||||
"**/*.zip",
|
||||
"nested.zip",
|
||||
[]string{"nested.zip"},
|
||||
},
|
||||
}
|
||||
|
||||
@ -133,11 +139,7 @@ func TestZipFileManifest_GlobMatch(t *testing.T) {
|
||||
|
||||
results := z.GlobMatch(true, glob)
|
||||
|
||||
if len(results) == 1 && results[0] == tc.expected {
|
||||
return
|
||||
}
|
||||
|
||||
t.Errorf("unexpected results for glob '%s': %+v", glob, results)
|
||||
require.ElementsMatch(t, tc.expected, results)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -184,7 +184,7 @@ func (j *archiveParser) parse(ctx context.Context, parentPkg *pkg.Package) ([]pk
|
||||
relationships = append(relationships, nestedRelationships...)
|
||||
} else {
|
||||
// .jar and .war files are present in archives, are others? or generally just consider them top-level?
|
||||
nestedArchives := j.fileManifest.GlobMatch(true, "*.jar", "*.war")
|
||||
nestedArchives := j.fileManifest.GlobMatch(true, "**/*.jar", "**/*.war")
|
||||
if len(nestedArchives) > 0 {
|
||||
slices.Sort(nestedArchives)
|
||||
errs = unknown.Appendf(errs, j.location, "nested archives not cataloged: %v", strings.Join(nestedArchives, ", "))
|
||||
@ -252,10 +252,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
|
||||
return nil, err
|
||||
}
|
||||
|
||||
name, version, lics, parsedPom, err := j.discoverNameVersionLicense(ctx, manifest)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
name, version, lics, parsedPom := j.discoverNameVersionLicense(ctx, manifest)
|
||||
var pkgPomProject *pkg.JavaPomProject
|
||||
if parsedPom != nil {
|
||||
pkgPomProject = newPomProject(ctx, j.maven, parsedPom.path, parsedPom.project)
|
||||
@ -280,7 +277,7 @@ func (j *archiveParser) discoverMainPackage(ctx context.Context) (*pkg.Package,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject, error) {
|
||||
func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest *pkg.JavaManifest) (string, string, []pkg.License, *parsedPomProject) {
|
||||
// we use j.location because we want to associate the license declaration with where we discovered the contents in the manifest
|
||||
// TODO: when we support locations of paths within archives we should start passing the specific manifest location object instead of the top jar
|
||||
lics := pkg.NewLicensesFromLocationWithContext(ctx, j.location, selectLicenses(manifest)...)
|
||||
@ -300,10 +297,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
|
||||
}
|
||||
|
||||
if len(lics) == 0 {
|
||||
fileLicenses, err := j.getLicenseFromFileInArchive(ctx)
|
||||
if err != nil {
|
||||
return "", "", nil, parsedPom, err
|
||||
}
|
||||
fileLicenses := j.getLicenseFromFileInArchive(ctx)
|
||||
if fileLicenses != nil {
|
||||
lics = append(lics, fileLicenses...)
|
||||
}
|
||||
@ -317,7 +311,7 @@ func (j *archiveParser) discoverNameVersionLicense(ctx context.Context, manifest
|
||||
lics = j.findLicenseFromJavaMetadata(ctx, groupID, artifactID, version, parsedPom, manifest)
|
||||
}
|
||||
|
||||
return artifactID, version, lics, parsedPom, nil
|
||||
return artifactID, version, lics, parsedPom
|
||||
}
|
||||
|
||||
// findLicenseFromJavaMetadata attempts to find license information from all available maven metadata properties and pom info
|
||||
@ -562,7 +556,7 @@ func getDigestsFromArchive(ctx context.Context, archivePath string) ([]file.Dige
|
||||
return digests, nil
|
||||
}
|
||||
|
||||
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.License, error) {
|
||||
func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) []pkg.License {
|
||||
// prefer identified licenses, fall back to unknown
|
||||
var identified []pkg.License
|
||||
var unidentified []pkg.License
|
||||
@ -578,7 +572,8 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
|
||||
if len(licenseMatches) > 0 {
|
||||
contents, err := intFile.ContentsFromZip(ctx, j.archivePath, licenseMatches...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to extract java license (%s): %w", j.location, err)
|
||||
log.Debugf("unable to extract java license (%s): %w", j.location, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, licenseMatch := range licenseMatches {
|
||||
@ -602,10 +597,10 @@ func (j *archiveParser) getLicenseFromFileInArchive(ctx context.Context) ([]pkg.
|
||||
}
|
||||
|
||||
if len(identified) == 0 {
|
||||
return unidentified, nil
|
||||
return unidentified
|
||||
}
|
||||
|
||||
return identified, nil
|
||||
return identified
|
||||
}
|
||||
|
||||
func (j *archiveParser) discoverPkgsFromNestedArchives(ctx context.Context, parentPkg *pkg.Package) ([]pkg.Package, []artifact.Relationship, error) {
|
||||
|
||||
@ -11,7 +11,7 @@ import (
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
)
|
||||
|
||||
const pomPropertiesGlob = "*pom.properties"
|
||||
const pomPropertiesGlob = "**/*pom.properties"
|
||||
|
||||
func parsePomProperties(path string, reader io.Reader) (*pkg.JavaPomProperties, error) {
|
||||
var props pkg.JavaPomProperties
|
||||
|
||||
@ -16,7 +16,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
pomXMLGlob = "*pom.xml"
|
||||
pomXMLGlob = "**/*pom.xml"
|
||||
pomCatalogerName = "java-pom-cataloger"
|
||||
)
|
||||
|
||||
|
||||
@ -6,9 +6,9 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/bmatcuk/doublestar/v4"
|
||||
"github.com/go-viper/mapstructure/v2"
|
||||
|
||||
intFile "github.com/anchore/syft/internal/file"
|
||||
"github.com/anchore/syft/internal/log"
|
||||
"github.com/anchore/syft/syft/file"
|
||||
"github.com/anchore/syft/syft/pkg"
|
||||
@ -141,7 +141,7 @@ func getFieldType(key, in string) any {
|
||||
// of egg metadata (as opposed to a directory that contains more metadata
|
||||
// files).
|
||||
func isEggRegularFile(path string) bool {
|
||||
return intFile.GlobMatch(eggInfoGlob, path)
|
||||
return doublestar.MatchUnvalidated(eggInfoGlob, path)
|
||||
}
|
||||
|
||||
// determineSitePackagesRootPath returns the path of the site packages root,
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user