fix: normalize python package names from dependency lists (#4408)

Because package names in METADATA files may have upper case like
Werkzeug or Jinja2, but Syft artifacts have normalized names and are
lower case, like werkzeug or jinja2, Syft would miss emitting dependency
relationships. Therefore, normalize dependency names before comparing
with existing artifacts.

Signed-off-by: Will Murphy <willmurphyscode@users.noreply.github.com>
This commit is contained in:
Will Murphy 2025-11-25 10:20:21 -05:00 committed by GitHub
parent 7e02bdfe45
commit c95893209d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 161 additions and 2 deletions

View File

@ -567,6 +567,7 @@ func Test_PackageCataloger_Relationships(t *testing.T) {
"colorama @ 0.4.6 (.) [dependency-of] pygments @ 2.18.0 (.)", "colorama @ 0.4.6 (.) [dependency-of] pygments @ 2.18.0 (.)",
"colorama @ 0.4.6 (.) [dependency-of] uvicorn @ 0.29.0 (.)", // proof of uvicorn[standard] "colorama @ 0.4.6 (.) [dependency-of] uvicorn @ 0.29.0 (.)", // proof of uvicorn[standard]
"dnspython @ 2.6.1 (.) [dependency-of] email-validator @ 2.1.1 (.)", "dnspython @ 2.6.1 (.) [dependency-of] email-validator @ 2.1.1 (.)",
"email-validator @ 2.1.1 (.) [dependency-of] fastapi @ 0.111.0 (.)",
"email-validator @ 2.1.1 (.) [dependency-of] pydantic @ 2.7.1 (.)", "email-validator @ 2.1.1 (.) [dependency-of] pydantic @ 2.7.1 (.)",
"fastapi @ 0.111.0 (.) [dependency-of] fastapi-cli @ 0.0.4 (.)", "fastapi @ 0.111.0 (.) [dependency-of] fastapi-cli @ 0.0.4 (.)",
"fastapi-cli @ 0.0.4 (.) [dependency-of] fastapi @ 0.111.0 (.)", "fastapi-cli @ 0.0.4 (.) [dependency-of] fastapi @ 0.111.0 (.)",

View File

@ -181,7 +181,9 @@ func extractPackageName(s string) string {
// requests (>= 2.8.1) --> requests // requests (>= 2.8.1) --> requests
// requests ; python_version < "2.7" --> requests // requests ; python_version < "2.7" --> requests
return strings.TrimSpace(internal.SplitAny(s, "[(<!=>~;")[0]) name := strings.TrimSpace(internal.SplitAny(s, "[(<!=>~;")[0])
// normalize the name to match how packages are stored (lowercase, with hyphens instead of underscores)
return normalize(name)
} }
// extractPackageNames applies extractPackageName to each string in the slice. // extractPackageNames applies extractPackageName to each string in the slice.

View File

@ -222,7 +222,7 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) {
Variants: []dependency.ProvidesRequires{ Variants: []dependency.ProvidesRequires{
{ {
Provides: []string{"requests[socks]"}, Provides: []string{"requests[socks]"},
Requires: []string{"PySocks"}, Requires: []string{"pysocks"},
}, },
{ {
Provides: []string{"requests[use-chardet-on-py3]"}, Provides: []string{"requests[use-chardet-on-py3]"},
@ -276,6 +276,162 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) {
} }
} }
func Test_extractPackageName(t *testing.T) {
tests := []struct {
name string
input string
want string
}{
{
name: "simple package name",
input: "requests",
want: "requests",
},
{
name: "package with version constraint",
input: "requests >= 2.8.1",
want: "requests",
},
{
name: "package with parentheses version constraint",
input: "requests (>= 2.8.1)",
want: "requests",
},
{
name: "package with extras",
input: "requests[security,tests]",
want: "requests",
},
{
name: "package with extras and version",
input: "requests[security] >= 2.8.1",
want: "requests",
},
{
name: "package with environment marker",
input: "requests ; python_version < \"2.7\"",
want: "requests",
},
{
name: "package with everything",
input: "requests[security] >= 2.8.1 ; python_version < \"3\"",
want: "requests",
},
{
name: "package name with capitals (normalization test)",
input: "Werkzeug (>=0.15)",
want: "werkzeug",
},
{
name: "package name with mixed case",
input: "Jinja2 (>=2.10.1)",
want: "jinja2",
},
{
name: "package name with underscores",
input: "some_package >= 1.0",
want: "some-package",
},
{
name: "package name with mixed separators",
input: "Some_Package.Name >= 1.0",
want: "some-package-name",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := extractPackageName(tt.input)
assert.Equal(t, tt.want, got)
})
}
}
func Test_wheelEggDependencySpecifier(t *testing.T) {
tests := []struct {
name string
p pkg.Package
want dependency.Specification
}{
{
name: "no dependencies",
p: pkg.Package{
Name: "foo",
Metadata: pkg.PythonPackage{
RequiresDist: []string{},
},
},
want: dependency.Specification{
ProvidesRequires: dependency.ProvidesRequires{
Provides: []string{"foo"},
},
},
},
{
name: "simple dependencies",
p: pkg.Package{
Name: "requests",
Metadata: pkg.PythonPackage{
RequiresDist: []string{
"certifi>=2017.4.17",
"urllib3<1.27,>=1.21.1",
},
},
},
want: dependency.Specification{
ProvidesRequires: dependency.ProvidesRequires{
Provides: []string{"requests"},
Requires: []string{"certifi", "urllib3"},
},
},
},
{
name: "dependencies with capital letters (Flask-like)",
p: pkg.Package{
Name: "flask",
Metadata: pkg.PythonPackage{
RequiresDist: []string{
"Werkzeug (>=0.15)",
"Jinja2 (>=2.10.1)",
"itsdangerous (>=0.24)",
"click (>=5.1)",
},
},
},
want: dependency.Specification{
ProvidesRequires: dependency.ProvidesRequires{
Provides: []string{"flask"},
// Requires are returned in the order they appear in RequiresDist
Requires: []string{"werkzeug", "jinja2", "itsdangerous", "click"},
},
},
},
{
name: "dependencies with extras",
p: pkg.Package{
Name: "foo",
Metadata: pkg.PythonPackage{
RequiresDist: []string{
"bar >= 1.0",
"pytest ; extra == 'dev'",
"sphinx ; extra == 'docs'",
},
},
},
want: dependency.Specification{
ProvidesRequires: dependency.ProvidesRequires{
Provides: []string{"foo"},
Requires: []string{"bar", "pytest", "sphinx"},
},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, wheelEggDependencySpecifier(tt.p))
})
}
}
func Test_pdmLockDependencySpecifier(t *testing.T) { func Test_pdmLockDependencySpecifier(t *testing.T) {
tests := []struct { tests := []struct {