From 2d1ada1d0085ec60ac15f6cdb023b2a4014547ca Mon Sep 17 00:00:00 2001 From: Hala Ali <129986297+HalaAli198@users.noreply.github.com> Date: Mon, 13 Oct 2025 15:10:42 -0400 Subject: [PATCH] fix: enhance setup.py parser to handle unquoted dependencies (#4255) * fix: add support for unquoted Python dependencies in setup.py - Add regex pattern to match unquoted package==version format - Handles common .split() pattern used in projects like mayan-edms - Maintains backward compatibility with quoted dependencies - Prevents duplicate package detection Signed-off-by: Hala Ali alih16@vcu.edu Signed-off-by: HalaAli198 * fix: apply gofmt formatting Signed-off-by: HalaAli198 * lint: incorporate new changes and refactor complexity Signed-off-by: Christopher Phillips --------- Signed-off-by: HalaAli198 Signed-off-by: Christopher Phillips Co-authored-by: Christopher Phillips --- syft/pkg/cataloger/python/parse_setup.go | 110 +++++++++++++----- syft/pkg/cataloger/python/parse_setup_test.go | 88 ++++++++++++++ .../test-fixtures/setup/mixed-format-setup.py | 17 +++ .../setup/multiline-split-setup.py | 23 ++++ 4 files changed, 207 insertions(+), 31 deletions(-) create mode 100644 syft/pkg/cataloger/python/test-fixtures/setup/mixed-format-setup.py create mode 100644 syft/pkg/cataloger/python/test-fixtures/setup/multiline-split-setup.py diff --git a/syft/pkg/cataloger/python/parse_setup.go b/syft/pkg/cataloger/python/parse_setup.go index 3332508ac..6a9148763 100644 --- a/syft/pkg/cataloger/python/parse_setup.go +++ b/syft/pkg/cataloger/python/parse_setup.go @@ -22,6 +22,7 @@ var _ generic.Parser = parseSetup // "mypy==v0.770", --> match(name=mypy version=v0.770) // " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770) var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`) +var unquotedPinnedDependency = regexp.MustCompile(`^\s*(\w+)\s*==\s*([\w\.\-]+)`) func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var packages []pkg.Package @@ -32,42 +33,89 @@ func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, read line := scanner.Text() line = strings.TrimRight(line, "\n") - for _, match := range pinnedDependency.FindAllString(line, -1) { - parts := strings.Split(match, "==") - if len(parts) != 2 { - continue - } - name := strings.Trim(parts[0], "'\"") - name = strings.TrimSpace(name) - name = strings.Trim(name, "'\"") - - version := strings.TrimSpace(parts[len(parts)-1]) - version = strings.Trim(version, "'\"") - - if hasTemplateDirective(name) || hasTemplateDirective(version) { - // this can happen in more dynamic setup.py where there is templating - continue - } - - if name == "" || version == "" { - log.WithFields("path", reader.RealPath).Debugf("unable to parse package in setup.py line: %q", line) - continue - } - - packages = append( - packages, - newPackageForIndex( - name, - version, - reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), - ), - ) - } + packages = processQuotedDependencies(line, reader, packages) + packages = processUnquotedDependency(line, reader, packages) } return packages, nil, nil } +func processQuotedDependencies(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { + for _, match := range pinnedDependency.FindAllString(line, -1) { + if p, ok := parseQuotedDependency(match, line, reader); ok { + packages = append(packages, p) + } + } + return packages +} + +func parseQuotedDependency(match, line string, reader file.LocationReadCloser) (pkg.Package, bool) { + parts := strings.Split(match, "==") + if len(parts) != 2 { + return pkg.Package{}, false + } + + name := cleanDependencyString(parts[0]) + version := cleanDependencyString(parts[len(parts)-1]) + + return validateAndCreatePackage(name, version, line, reader) +} + +// processUnquotedDependency extracts and processes an unquoted dependency from a line +func processUnquotedDependency(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { + matches := unquotedPinnedDependency.FindStringSubmatch(line) + if len(matches) != 3 { + return packages + } + + name := strings.TrimSpace(matches[1]) + version := strings.TrimSpace(matches[2]) + + if p, ok := validateAndCreatePackage(name, version, line, reader); ok { + if !isDuplicatePackage(p, packages) { + packages = append(packages, p) + } + } + + return packages +} + +func cleanDependencyString(s string) string { + s = strings.Trim(s, "'\"") + s = strings.TrimSpace(s) + s = strings.Trim(s, "'\"") + return s +} + +func validateAndCreatePackage(name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) { + if hasTemplateDirective(name) || hasTemplateDirective(version) { + // this can happen in more dynamic setup.py where there is templating + return pkg.Package{}, false + } + + if name == "" || version == "" { + log.WithFields("path", reader.RealPath).Debugf("unable to parse package in setup.py line: %q", line) + return pkg.Package{}, false + } + + p := newPackageForIndex( + name, + version, + reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), + ) + + return p, true +} + +func isDuplicatePackage(p pkg.Package, packages []pkg.Package) bool { + for _, existing := range packages { + if existing.Name == p.Name && existing.Version == p.Version { + return true + } + } + return false +} + func hasTemplateDirective(s string) bool { return strings.Contains(s, `%s`) || strings.Contains(s, `{`) || strings.Contains(s, `}`) } diff --git a/syft/pkg/cataloger/python/parse_setup_test.go b/syft/pkg/cataloger/python/parse_setup_test.go index 665007296..74cb604a5 100644 --- a/syft/pkg/cataloger/python/parse_setup_test.go +++ b/syft/pkg/cataloger/python/parse_setup_test.go @@ -61,6 +61,94 @@ func TestParseSetup(t *testing.T) { fixture: "test-fixtures/setup/dynamic-setup.py", expected: nil, }, + { + fixture: "test-fixtures/setup/multiline-split-setup.py", + expected: []pkg.Package{ + { + Name: "black", + Version: "23.12.1", + PURL: "pkg:pypi/black@23.12.1", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "cairosvg", + Version: "2.7.1", + PURL: "pkg:pypi/cairosvg@2.7.1", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "celery", + Version: "5.3.4", + PURL: "pkg:pypi/celery@5.3.4", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "django", + Version: "4.2.23", + PURL: "pkg:pypi/django@4.2.23", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "mypy", + Version: "1.7.1", + PURL: "pkg:pypi/mypy@1.7.1", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "pillow", + Version: "11.0.0", + PURL: "pkg:pypi/pillow@11.0.0", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "pytest", + Version: "7.4.3", + PURL: "pkg:pypi/pytest@7.4.3", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "requests", + Version: "2.31.0", + PURL: "pkg:pypi/requests@2.31.0", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + }, + }, + { + // Test mixed quoted and unquoted dependencies - ensure no duplicates + fixture: "test-fixtures/setup/mixed-format-setup.py", + expected: []pkg.Package{ + { + Name: "requests", + Version: "2.31.0", + PURL: "pkg:pypi/requests@2.31.0", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "django", + Version: "4.2.23", + PURL: "pkg:pypi/django@4.2.23", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + { + Name: "flask", + Version: "3.0.0", + PURL: "pkg:pypi/flask@3.0.0", + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + }, + }, } for _, tt := range tests { diff --git a/syft/pkg/cataloger/python/test-fixtures/setup/mixed-format-setup.py b/syft/pkg/cataloger/python/test-fixtures/setup/mixed-format-setup.py new file mode 100644 index 000000000..eec597bd0 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/setup/mixed-format-setup.py @@ -0,0 +1,17 @@ +from setuptools import setup + +# Test case to ensure duplicate detection works correctly +# when same dependencies appear in both quoted and unquoted forms + +setup( + name='mixed-format-project', + version='1.0.0', + install_requires=[ + # Quoted dependencies (should be caught by pinnedDependency regex) + "requests==2.31.0", + "django==4.2.23", + ] + """ +requests==2.31.0 +flask==3.0.0 +""".split(), +) diff --git a/syft/pkg/cataloger/python/test-fixtures/setup/multiline-split-setup.py b/syft/pkg/cataloger/python/test-fixtures/setup/multiline-split-setup.py new file mode 100644 index 000000000..a14b27ae7 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/setup/multiline-split-setup.py @@ -0,0 +1,23 @@ +from setuptools import setup + +# Example setup.py using multiline string with .split() pattern +# This pattern is commonly seen in projects like mayan-edms + +setup( + name='example-project', + version='1.0.0', + install_requires=""" +django==4.2.23 +CairoSVG==2.7.1 +Pillow==11.0.0 +requests==2.31.0 +celery==5.3.4 +""".split(), + extras_require={ + 'dev': """ +pytest==7.4.3 +black==23.12.1 +mypy==1.7.1 +""".split(), + }, +)