From c95893209d7870bcbc7015cc96b840400162db1a Mon Sep 17 00:00:00 2001 From: Will Murphy Date: Tue, 25 Nov 2025 10:20:21 -0500 Subject: [PATCH] fix: normalize python package names from dependency lists (#4408) Because package names in METADATA files may have upper case like Werkzeug or Jinja2, but Syft artifacts have normalized names and are lower case, like werkzeug or jinja2, Syft would miss emitting dependency relationships. Therefore, normalize dependency names before comparing with existing artifacts. Signed-off-by: Will Murphy --- syft/pkg/cataloger/python/cataloger_test.go | 1 + syft/pkg/cataloger/python/dependency.go | 4 +- syft/pkg/cataloger/python/dependency_test.go | 158 ++++++++++++++++++- 3 files changed, 161 insertions(+), 2 deletions(-) diff --git a/syft/pkg/cataloger/python/cataloger_test.go b/syft/pkg/cataloger/python/cataloger_test.go index 25fa4cc7e..d64dbb56a 100644 --- a/syft/pkg/cataloger/python/cataloger_test.go +++ b/syft/pkg/cataloger/python/cataloger_test.go @@ -567,6 +567,7 @@ func Test_PackageCataloger_Relationships(t *testing.T) { "colorama @ 0.4.6 (.) [dependency-of] pygments @ 2.18.0 (.)", "colorama @ 0.4.6 (.) [dependency-of] uvicorn @ 0.29.0 (.)", // proof of uvicorn[standard] "dnspython @ 2.6.1 (.) [dependency-of] email-validator @ 2.1.1 (.)", + "email-validator @ 2.1.1 (.) [dependency-of] fastapi @ 0.111.0 (.)", "email-validator @ 2.1.1 (.) [dependency-of] pydantic @ 2.7.1 (.)", "fastapi @ 0.111.0 (.) [dependency-of] fastapi-cli @ 0.0.4 (.)", "fastapi-cli @ 0.0.4 (.) [dependency-of] fastapi @ 0.111.0 (.)", diff --git a/syft/pkg/cataloger/python/dependency.go b/syft/pkg/cataloger/python/dependency.go index 0e0595760..cc70159cb 100644 --- a/syft/pkg/cataloger/python/dependency.go +++ b/syft/pkg/cataloger/python/dependency.go @@ -181,7 +181,9 @@ func extractPackageName(s string) string { // requests (>= 2.8.1) --> requests // requests ; python_version < "2.7" --> requests - return strings.TrimSpace(internal.SplitAny(s, "[(~;")[0]) + name := strings.TrimSpace(internal.SplitAny(s, "[(~;")[0]) + // normalize the name to match how packages are stored (lowercase, with hyphens instead of underscores) + return normalize(name) } // extractPackageNames applies extractPackageName to each string in the slice. diff --git a/syft/pkg/cataloger/python/dependency_test.go b/syft/pkg/cataloger/python/dependency_test.go index 8a8780f42..1dceda546 100644 --- a/syft/pkg/cataloger/python/dependency_test.go +++ b/syft/pkg/cataloger/python/dependency_test.go @@ -222,7 +222,7 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) { Variants: []dependency.ProvidesRequires{ { Provides: []string{"requests[socks]"}, - Requires: []string{"PySocks"}, + Requires: []string{"pysocks"}, }, { Provides: []string{"requests[use-chardet-on-py3]"}, @@ -276,6 +276,162 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) { } } +func Test_extractPackageName(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "simple package name", + input: "requests", + want: "requests", + }, + { + name: "package with version constraint", + input: "requests >= 2.8.1", + want: "requests", + }, + { + name: "package with parentheses version constraint", + input: "requests (>= 2.8.1)", + want: "requests", + }, + { + name: "package with extras", + input: "requests[security,tests]", + want: "requests", + }, + { + name: "package with extras and version", + input: "requests[security] >= 2.8.1", + want: "requests", + }, + { + name: "package with environment marker", + input: "requests ; python_version < \"2.7\"", + want: "requests", + }, + { + name: "package with everything", + input: "requests[security] >= 2.8.1 ; python_version < \"3\"", + want: "requests", + }, + { + name: "package name with capitals (normalization test)", + input: "Werkzeug (>=0.15)", + want: "werkzeug", + }, + { + name: "package name with mixed case", + input: "Jinja2 (>=2.10.1)", + want: "jinja2", + }, + { + name: "package name with underscores", + input: "some_package >= 1.0", + want: "some-package", + }, + { + name: "package name with mixed separators", + input: "Some_Package.Name >= 1.0", + want: "some-package-name", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := extractPackageName(tt.input) + assert.Equal(t, tt.want, got) + }) + } +} + +func Test_wheelEggDependencySpecifier(t *testing.T) { + tests := []struct { + name string + p pkg.Package + want dependency.Specification + }{ + { + name: "no dependencies", + p: pkg.Package{ + Name: "foo", + Metadata: pkg.PythonPackage{ + RequiresDist: []string{}, + }, + }, + want: dependency.Specification{ + ProvidesRequires: dependency.ProvidesRequires{ + Provides: []string{"foo"}, + }, + }, + }, + { + name: "simple dependencies", + p: pkg.Package{ + Name: "requests", + Metadata: pkg.PythonPackage{ + RequiresDist: []string{ + "certifi>=2017.4.17", + "urllib3<1.27,>=1.21.1", + }, + }, + }, + want: dependency.Specification{ + ProvidesRequires: dependency.ProvidesRequires{ + Provides: []string{"requests"}, + Requires: []string{"certifi", "urllib3"}, + }, + }, + }, + { + name: "dependencies with capital letters (Flask-like)", + p: pkg.Package{ + Name: "flask", + Metadata: pkg.PythonPackage{ + RequiresDist: []string{ + "Werkzeug (>=0.15)", + "Jinja2 (>=2.10.1)", + "itsdangerous (>=0.24)", + "click (>=5.1)", + }, + }, + }, + want: dependency.Specification{ + ProvidesRequires: dependency.ProvidesRequires{ + Provides: []string{"flask"}, + // Requires are returned in the order they appear in RequiresDist + Requires: []string{"werkzeug", "jinja2", "itsdangerous", "click"}, + }, + }, + }, + { + name: "dependencies with extras", + p: pkg.Package{ + Name: "foo", + Metadata: pkg.PythonPackage{ + RequiresDist: []string{ + "bar >= 1.0", + "pytest ; extra == 'dev'", + "sphinx ; extra == 'docs'", + }, + }, + }, + want: dependency.Specification{ + ProvidesRequires: dependency.ProvidesRequires{ + Provides: []string{"foo"}, + Requires: []string{"bar", "pytest", "sphinx"}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, wheelEggDependencySpecifier(tt.p)) + }) + } +} + func Test_pdmLockDependencySpecifier(t *testing.T) { tests := []struct {