fix: enhance setup.py parser to handle unquoted dependencies (#4255)

* fix: add support for unquoted Python dependencies in setup.py

- Add regex pattern to match unquoted package==version format
- Handles common .split() pattern used in projects like mayan-edms
- Maintains backward compatibility with quoted dependencies
- Prevents duplicate package detection
Signed-off-by: Hala Ali alih16@vcu.edu

Signed-off-by: HalaAli198 <alih16@vcu.edu>

* fix: apply gofmt formatting

Signed-off-by: HalaAli198 <alih16@vcu.edu>

* lint: incorporate new changes and refactor complexity

Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>

---------

Signed-off-by: HalaAli198 <alih16@vcu.edu>
Signed-off-by: Christopher Phillips <spiffcs@users.noreply.github.com>
Co-authored-by: Christopher Phillips <spiffcs@users.noreply.github.com>
This commit is contained in:
Hala Ali 2025-10-13 15:10:42 -04:00 committed by GitHub
parent 8ffe15c710
commit 2d1ada1d00
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 207 additions and 31 deletions

View File

@ -22,6 +22,7 @@ var _ generic.Parser = parseSetup
// "mypy==v0.770", --> match(name=mypy version=v0.770)
// " mypy2 == v0.770", ' mypy3== v0.770', --> match(name=mypy2 version=v0.770), match(name=mypy3, version=v0.770)
var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`)
var unquotedPinnedDependency = regexp.MustCompile(`^\s*(\w+)\s*==\s*([\w\.\-]+)`)
func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package
@ -32,42 +33,89 @@ func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, read
line := scanner.Text()
line = strings.TrimRight(line, "\n")
for _, match := range pinnedDependency.FindAllString(line, -1) {
parts := strings.Split(match, "==")
if len(parts) != 2 {
continue
}
name := strings.Trim(parts[0], "'\"")
name = strings.TrimSpace(name)
name = strings.Trim(name, "'\"")
version := strings.TrimSpace(parts[len(parts)-1])
version = strings.Trim(version, "'\"")
if hasTemplateDirective(name) || hasTemplateDirective(version) {
// this can happen in more dynamic setup.py where there is templating
continue
}
if name == "" || version == "" {
log.WithFields("path", reader.RealPath).Debugf("unable to parse package in setup.py line: %q", line)
continue
}
packages = append(
packages,
newPackageForIndex(
name,
version,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
),
)
}
packages = processQuotedDependencies(line, reader, packages)
packages = processUnquotedDependency(line, reader, packages)
}
return packages, nil, nil
}
func processQuotedDependencies(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package {
for _, match := range pinnedDependency.FindAllString(line, -1) {
if p, ok := parseQuotedDependency(match, line, reader); ok {
packages = append(packages, p)
}
}
return packages
}
func parseQuotedDependency(match, line string, reader file.LocationReadCloser) (pkg.Package, bool) {
parts := strings.Split(match, "==")
if len(parts) != 2 {
return pkg.Package{}, false
}
name := cleanDependencyString(parts[0])
version := cleanDependencyString(parts[len(parts)-1])
return validateAndCreatePackage(name, version, line, reader)
}
// processUnquotedDependency extracts and processes an unquoted dependency from a line
func processUnquotedDependency(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package {
matches := unquotedPinnedDependency.FindStringSubmatch(line)
if len(matches) != 3 {
return packages
}
name := strings.TrimSpace(matches[1])
version := strings.TrimSpace(matches[2])
if p, ok := validateAndCreatePackage(name, version, line, reader); ok {
if !isDuplicatePackage(p, packages) {
packages = append(packages, p)
}
}
return packages
}
func cleanDependencyString(s string) string {
s = strings.Trim(s, "'\"")
s = strings.TrimSpace(s)
s = strings.Trim(s, "'\"")
return s
}
func validateAndCreatePackage(name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) {
if hasTemplateDirective(name) || hasTemplateDirective(version) {
// this can happen in more dynamic setup.py where there is templating
return pkg.Package{}, false
}
if name == "" || version == "" {
log.WithFields("path", reader.RealPath).Debugf("unable to parse package in setup.py line: %q", line)
return pkg.Package{}, false
}
p := newPackageForIndex(
name,
version,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),
)
return p, true
}
func isDuplicatePackage(p pkg.Package, packages []pkg.Package) bool {
for _, existing := range packages {
if existing.Name == p.Name && existing.Version == p.Version {
return true
}
}
return false
}
func hasTemplateDirective(s string) bool {
return strings.Contains(s, `%s`) || strings.Contains(s, `{`) || strings.Contains(s, `}`)
}

View File

@ -61,6 +61,94 @@ func TestParseSetup(t *testing.T) {
fixture: "test-fixtures/setup/dynamic-setup.py",
expected: nil,
},
{
fixture: "test-fixtures/setup/multiline-split-setup.py",
expected: []pkg.Package{
{
Name: "black",
Version: "23.12.1",
PURL: "pkg:pypi/black@23.12.1",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "cairosvg",
Version: "2.7.1",
PURL: "pkg:pypi/cairosvg@2.7.1",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "celery",
Version: "5.3.4",
PURL: "pkg:pypi/celery@5.3.4",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "django",
Version: "4.2.23",
PURL: "pkg:pypi/django@4.2.23",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "mypy",
Version: "1.7.1",
PURL: "pkg:pypi/mypy@1.7.1",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "pillow",
Version: "11.0.0",
PURL: "pkg:pypi/pillow@11.0.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "pytest",
Version: "7.4.3",
PURL: "pkg:pypi/pytest@7.4.3",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "requests",
Version: "2.31.0",
PURL: "pkg:pypi/requests@2.31.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
},
},
{
// Test mixed quoted and unquoted dependencies - ensure no duplicates
fixture: "test-fixtures/setup/mixed-format-setup.py",
expected: []pkg.Package{
{
Name: "requests",
Version: "2.31.0",
PURL: "pkg:pypi/requests@2.31.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "django",
Version: "4.2.23",
PURL: "pkg:pypi/django@4.2.23",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
{
Name: "flask",
Version: "3.0.0",
PURL: "pkg:pypi/flask@3.0.0",
Language: pkg.Python,
Type: pkg.PythonPkg,
},
},
},
}
for _, tt := range tests {

View File

@ -0,0 +1,17 @@
from setuptools import setup
# Test case to ensure duplicate detection works correctly
# when same dependencies appear in both quoted and unquoted forms
setup(
name='mixed-format-project',
version='1.0.0',
install_requires=[
# Quoted dependencies (should be caught by pinnedDependency regex)
"requests==2.31.0",
"django==4.2.23",
] + """
requests==2.31.0
flask==3.0.0
""".split(),
)

View File

@ -0,0 +1,23 @@
from setuptools import setup
# Example setup.py using multiline string with .split() pattern
# This pattern is commonly seen in projects like mayan-edms
setup(
name='example-project',
version='1.0.0',
install_requires="""
django==4.2.23
CairoSVG==2.7.1
Pillow==11.0.0
requests==2.31.0
celery==5.3.4
""".split(),
extras_require={
'dev': """
pytest==7.4.3
black==23.12.1
mypy==1.7.1
""".split(),
},
)