feat: Add license enrichment from pypi to python packages (#4295)

* feat: Add license enrichment from pypi to python packages
* Implement license caching and improve test coverage
---------
Signed-off-by: Tim Olshansky <456103+timols@users.noreply.github.com>
This commit is contained in:
Tim Olshansky 2025-11-06 13:05:08 -08:00 committed by GitHub
parent 4e06a7ab32
commit bbef262b8f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 1038 additions and 67 deletions

View File

@ -198,9 +198,10 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config {
}, },
Nix: nix.DefaultConfig(). Nix: nix.DefaultConfig().
WithCaptureOwnedFiles(cfg.Nix.CaptureOwnedFiles), WithCaptureOwnedFiles(cfg.Nix.CaptureOwnedFiles),
Python: python.CatalogerConfig{ Python: python.DefaultCatalogerConfig().
GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Python), cfg.Python.SearchRemoteLicenses)).
}, WithPypiBaseURL(cfg.Python.PypiBaseURL).
WithGuessUnpinnedRequirements(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Python), cfg.Python.GuessUnpinnedRequirements)),
JavaArchive: java.DefaultArchiveCatalogerConfig(). JavaArchive: java.DefaultArchiveCatalogerConfig().
WithUseMavenLocalRepository(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseMavenLocalRepository)). WithUseMavenLocalRepository(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseMavenLocalRepository)).
WithMavenLocalRepositoryDir(cfg.Java.MavenLocalRepositoryDir). WithMavenLocalRepositoryDir(cfg.Java.MavenLocalRepositoryDir).
@ -320,6 +321,7 @@ var publicisedEnrichmentOptions = []string{
task.Golang, task.Golang,
task.Java, task.Java,
task.JavaScript, task.JavaScript,
task.Python,
} }
func enrichmentEnabled(enrichDirectives []string, features ...string) *bool { func enrichmentEnabled(enrichDirectives []string, features ...string) *bool {

View File

@ -3,7 +3,9 @@ package options
import "github.com/anchore/clio" import "github.com/anchore/clio"
type pythonConfig struct { type pythonConfig struct {
GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
PypiBaseURL string `json:"pypi-base-url" yaml:"pypi-base-url" mapstructure:"pypi-base-url"`
GuessUnpinnedRequirements *bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
} }
var _ interface { var _ interface {
@ -11,6 +13,8 @@ var _ interface {
} = (*pythonConfig)(nil) } = (*pythonConfig)(nil)
func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) {
descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`)
descriptions.Add(&o.PypiBaseURL, `base Pypi url to use`)
descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version
(e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could
be based on the version requirements specified (e.g. "1.0.0"). When enabled the lowest expressible version be based on the version requirements specified (e.g. "1.0.0"). When enabled the lowest expressible version

View File

@ -52,6 +52,9 @@ const (
JavaScript = "javascript" JavaScript = "javascript"
Node = "node" Node = "node"
NPM = "npm" NPM = "npm"
// Python ecosystem labels
Python = "python"
) )
//nolint:funlen //nolint:funlen
@ -109,7 +112,7 @@ func DefaultPackageTaskFactories() Factories {
func(cfg CatalogingFactoryConfig) pkg.Cataloger { func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return python.NewPackageCataloger(cfg.PackagesConfig.Python) return python.NewPackageCataloger(cfg.PackagesConfig.Python)
}, },
pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "python", pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, Python,
), ),
newSimplePackageTaskFactory(ruby.NewGemFileLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem"), newSimplePackageTaskFactory(ruby.NewGemFileLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem"),
newSimplePackageTaskFactory(ruby.NewGemSpecCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem", "gemspec"), newSimplePackageTaskFactory(ruby.NewGemSpecCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem", "gemspec"),
@ -127,7 +130,7 @@ func DefaultPackageTaskFactories() Factories {
pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#", pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#",
), ),
newSimplePackageTaskFactory(dotnet.NewDotnetPackagesLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#"), newSimplePackageTaskFactory(dotnet.NewDotnetPackagesLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#"),
newSimplePackageTaskFactory(python.NewInstalledPackageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "python"), newSimplePackageTaskFactory(python.NewInstalledPackageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, Python),
newPackageTaskFactory( newPackageTaskFactory(
func(cfg CatalogingFactoryConfig) pkg.Cataloger { func(cfg CatalogingFactoryConfig) pkg.Cataloger {
return golang.NewGoModuleBinaryCataloger(cfg.PackagesConfig.Golang) return golang.NewGoModuleBinaryCataloger(cfg.PackagesConfig.Golang)

View File

@ -10,28 +10,21 @@ import (
const eggInfoGlob = "**/*.egg-info" const eggInfoGlob = "**/*.egg-info"
type CatalogerConfig struct {
// GuessUnpinnedRequirements attempts to infer package versions from version constraints when no explicit version is specified in requirements files.
// app-config: python.guess-unpinned-requirements
GuessUnpinnedRequirements bool `yaml:"guess-unpinned-requirements" json:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
GuessUnpinnedRequirements: false,
}
}
// NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. // NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files.
func NewPackageCataloger(cfg CatalogerConfig) pkg.Cataloger { func NewPackageCataloger(cfg CatalogerConfig) pkg.Cataloger {
rqp := newRequirementsParser(cfg) poetryLockParser := newPoetryLockParser(cfg)
pipfileLockParser := newPipfileLockParser(cfg)
setupFileParser := newSetupFileParser(cfg)
uvLockParser := newUvLockParser(cfg)
pdmLockParser := newPdmLockParser(cfg)
requirementsFileParser := newRequirementsParser(cfg)
return generic.NewCataloger("python-package-cataloger"). return generic.NewCataloger("python-package-cataloger").
WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt"). WithParserByGlobs(requirementsFileParser.parseRequirementsTxt, "**/*requirements*.txt").
WithParserByGlobs(parsePoetryLock, "**/poetry.lock"). WithParserByGlobs(poetryLockParser.parsePoetryLock, "**/poetry.lock").
WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock"). WithParserByGlobs(pipfileLockParser.parsePipfileLock, "**/Pipfile.lock").
WithParserByGlobs(parseSetup, "**/setup.py"). WithParserByGlobs(setupFileParser.parseSetupFile, "**/setup.py").
WithParserByGlobs(parseUvLock, "**/uv.lock"). WithParserByGlobs(uvLockParser.parseUvLock, "**/uv.lock").
WithParserByGlobs(parsePdmLock, "**/pdm.lock") WithParserByGlobs(pdmLockParser.parsePdmLock, "**/pdm.lock")
} }
// NewInstalledPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. // NewInstalledPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories.

View File

@ -0,0 +1,40 @@
package python
const pypiBaseURL = "https://pypi.org/pypi"
type CatalogerConfig struct {
// GuessUnpinnedRequirements attempts to infer package versions from version constraints when no explicit version is specified in requirements files.
// app-config: python.guess-unpinned-requirements
GuessUnpinnedRequirements bool `yaml:"guess-unpinned-requirements" json:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"`
// SearchRemoteLicenses enables querying the NPM registry API to retrieve license information for packages that are missing license data in their local metadata.
// app-config: python.search-remote-licenses
SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"`
// PypiBaseURL specifies the base URL for the Pypi registry API used when searching for remote license information.
// app-config: python.pypi-base-url
PypiBaseURL string `json:"pypi-base-url" yaml:"pypi-base-url" mapstructure:"pypi-base-url"`
}
func DefaultCatalogerConfig() CatalogerConfig {
return CatalogerConfig{
GuessUnpinnedRequirements: false,
SearchRemoteLicenses: false,
PypiBaseURL: pypiBaseURL,
}
}
func (c CatalogerConfig) WithSearchRemoteLicenses(input bool) CatalogerConfig {
c.SearchRemoteLicenses = input
return c
}
func (c CatalogerConfig) WithGuessUnpinnedRequirements(input bool) CatalogerConfig {
c.GuessUnpinnedRequirements = input
return c
}
func (c CatalogerConfig) WithPypiBaseURL(input string) CatalogerConfig {
if input != "" {
c.PypiBaseURL = input
}
return c
}

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"os" "os"
"testing" "testing"
@ -259,7 +260,8 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) {
fh, err := os.Open(tt.fixture) fh, err := os.Open(tt.fixture)
require.NoError(t, err) require.NoError(t, err)
pkgs, err := poetryLockPackages(file.NewLocationReadCloser(file.NewLocation(tt.fixture), fh)) plp := newPoetryLockParser(DefaultCatalogerConfig())
pkgs, err := plp.poetryLockPackages(context.TODO(), file.NewLocationReadCloser(file.NewLocation(tt.fixture), fh))
require.NoError(t, err) require.NoError(t, err)
var got []dependency.Specification var got []dependency.Specification

View File

@ -0,0 +1,131 @@
package python
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/anchore/syft/internal/cache"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/pkg"
)
type pythonLicenseResolver struct {
catalogerConfig CatalogerConfig
licenseCache cache.Resolver[[]pkg.License]
}
func newPythonLicenseResolver(config CatalogerConfig) pythonLicenseResolver {
return pythonLicenseResolver{
licenseCache: cache.GetResolverCachingErrors[[]pkg.License]("python", "v1"),
catalogerConfig: config,
}
}
func (lr *pythonLicenseResolver) getLicenses(ctx context.Context, packageName string, packageVersion string) pkg.LicenseSet {
var licenseSet pkg.LicenseSet
if lr.catalogerConfig.SearchRemoteLicenses {
licenses, err := lr.getLicensesFromRemote(ctx, packageName, packageVersion)
if err == nil && licenses != nil {
licenseSet = pkg.NewLicenseSet(licenses...)
}
if err != nil {
log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err)
}
}
return licenseSet
}
func (lr *pythonLicenseResolver) getLicensesFromRemote(ctx context.Context, packageName string, packageVersion string) ([]pkg.License, error) {
return lr.licenseCache.Resolve(fmt.Sprintf("%s/%s", packageName, packageVersion), func() ([]pkg.License, error) {
license, err := getLicenseFromPypiRegistry(lr.catalogerConfig.PypiBaseURL, packageName, packageVersion)
if err == nil && license != "" {
licenses := pkg.NewLicensesFromValuesWithContext(ctx, license)
return licenses, nil
}
if err != nil {
log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err)
}
return nil, err
})
}
func formatPypiRegistryURL(baseURL, packageName, version string) (requestURL string, err error) {
if packageName == "" {
return "", fmt.Errorf("unable to format pypi request for a blank package name")
}
urlPath := []string{packageName, version, "json"}
requestURL, err = url.JoinPath(baseURL, urlPath...)
if err != nil {
return requestURL, fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err)
}
return requestURL, nil
}
func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, error) {
// "https://pypi.org/pypi/%s/%s/json", packageName, version
requestURL, err := formatPypiRegistryURL(baseURL, packageName, version)
if err != nil {
return "", fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err)
}
log.WithFields("url", requestURL).Info("downloading python package from pypi")
pypiRequest, err := http.NewRequest(http.MethodGet, requestURL, nil)
if err != nil {
return "", fmt.Errorf("unable to format remote request: %w", err)
}
httpClient := &http.Client{
Timeout: time.Second * 10,
}
resp, err := httpClient.Do(pypiRequest)
if err != nil {
return "", fmt.Errorf("unable to get package from pypi registry: %w", err)
}
defer func() {
if err := resp.Body.Close(); err != nil {
log.Errorf("unable to close body: %+v", err)
}
}()
if resp.StatusCode != 200 {
return "", fmt.Errorf("unable to get package from pypi registry")
}
bytes, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("unable to parse package from pypi registry: %w", err)
}
dec := json.NewDecoder(strings.NewReader(string(bytes)))
// Read "license" from the response
var pypiResponse struct {
Info struct {
License string `json:"license"`
LicenseExpression string `json:"license_expression"`
} `json:"info"`
}
if err := dec.Decode(&pypiResponse); err != nil {
return "", fmt.Errorf("unable to parse license from pypi registry: %w", err)
}
var license string
if pypiResponse.Info.LicenseExpression != "" {
license = pypiResponse.Info.LicenseExpression
} else {
license = pypiResponse.Info.License
}
log.Tracef("Retrieved License: %s", license)
return license, nil
}

View File

@ -0,0 +1,177 @@
package python
import (
"fmt"
"io"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/stretchr/testify/require"
)
func TestFormatPyPiRegistryURL(t *testing.T) {
tests := []struct {
name string
version string
expected string
expectedError error
}{
{
name: "package1",
version: "1.0",
expected: "https://pypi.org/pypi/package1/1.0/json",
expectedError: nil,
},
{
name: "package-1",
version: "",
expected: "https://pypi.org/pypi/package-1/json",
expectedError: nil,
},
{
name: "_",
version: "a",
expected: "https://pypi.org/pypi/_/a/json",
expectedError: nil,
},
{
name: "",
version: "a",
expected: "",
expectedError: fmt.Errorf("unable to format pypi request for a blank package name"),
},
}
cfg := DefaultCatalogerConfig()
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
got, err := formatPypiRegistryURL(cfg.PypiBaseURL, test.name, test.version)
require.Equal(t, test.expected, got)
if test.expectedError != nil {
require.ErrorContains(t, err, test.expectedError.Error())
} else {
require.NoError(t, err)
}
})
}
}
func TestGetLicenseFromPypiRegistry(t *testing.T) {
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
version string
requestHandlers []handlerPath
expected string
expectedError error
}{
{
name: "certifi",
version: "2025.10.5",
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expected: "MPL-2.0",
},
{
name: "package",
version: "1.0",
requestHandlers: []handlerPath{
{
path: "/package/1.0/json",
handler: generateMockPypiRegistryHandlerWithStatus("", http.StatusNotFound),
},
},
expected: "",
expectedError: fmt.Errorf("unable to get package from pypi registry"),
},
{
name: "package",
version: "2.0",
requestHandlers: []handlerPath{
{
path: "/package/2.0/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response_bad.json"),
},
},
expected: "",
expectedError: fmt.Errorf("unable to parse license from pypi registry: EOF"),
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
got, err := getLicenseFromPypiRegistry(url, tc.name, tc.version)
require.Equal(t, tc.expected, got)
if tc.expectedError != nil {
require.ErrorContains(t, err, tc.expectedError.Error())
} else {
require.NoError(t, err)
}
})
}
}
type handlerPath struct {
path string
handler func(w http.ResponseWriter, r *http.Request)
}
func generateMockPypiRegistryHandler(responseFixture string) func(w http.ResponseWriter, r *http.Request) {
return generateMockPypiRegistryHandlerWithStatus(responseFixture, http.StatusOK)
}
func generateMockPypiRegistryHandlerWithStatus(responseFixture string, mockHttpStatus int) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
if mockHttpStatus != http.StatusOK {
http.Error(w, fmt.Errorf("Error for status").Error(), http.StatusNotFound)
return
}
w.WriteHeader(http.StatusOK)
// Copy the file's content to the response writer
file, err := os.Open(responseFixture)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
defer file.Close()
_, err = io.Copy(w, file)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
}
}
// setup sets up a test HTTP server for mocking requests to a particular registry.
// The returned url is injected into the Config so the client uses the test server.
// Tests should register handlers on mux to simulate the expected request/response structure
func setupPypiRegistry() (mux *http.ServeMux, serverURL string, teardown func()) {
// mux is the HTTP request multiplexer used with the test server.
mux = http.NewServeMux()
// We want to ensure that tests catch mistakes where the endpoint URL is
// specified as absolute rather than relative. It only makes a difference
// when there's a non-empty base URL path. So, use that. See issue #752.
apiHandler := http.NewServeMux()
apiHandler.Handle("/", mux)
// server is a test HTTP server used to provide mock API responses.
server := httptest.NewServer(apiHandler)
return mux, server.URL, server.Close
}

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"fmt" "fmt"
"regexp" "regexp"
"strings" "strings"
@ -17,12 +18,14 @@ func normalize(name string) string {
return strings.ToLower(normalized) return strings.ToLower(normalized)
} }
func newPackageForIndex(name, version string, locations ...file.Location) pkg.Package { func newPackageForIndex(ctx context.Context, lr pythonLicenseResolver, name, version string, locations ...file.Location) pkg.Package {
name = normalize(name) name = normalize(name)
licenseSet := lr.getLicenses(ctx, name, version)
p := pkg.Package{ p := pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Licenses: licenseSet,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
PURL: packageURL(name, version, nil), PURL: packageURL(name, version, nil),
Language: pkg.Python, Language: pkg.Python,
@ -34,12 +37,14 @@ func newPackageForIndex(name, version string, locations ...file.Location) pkg.Pa
return p return p
} }
func newPackageForIndexWithMetadata(name, version string, metadata interface{}, locations ...file.Location) pkg.Package { func newPackageForIndexWithMetadata(ctx context.Context, lr pythonLicenseResolver, name, version string, metadata interface{}, locations ...file.Location) pkg.Package {
name = normalize(name) name = normalize(name)
licenseSet := lr.getLicenses(ctx, name, version)
p := pkg.Package{ p := pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Licenses: licenseSet,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
PURL: packageURL(name, version, nil), PURL: packageURL(name, version, nil),
Language: pkg.Python, Language: pkg.Python,
@ -52,12 +57,14 @@ func newPackageForIndexWithMetadata(name, version string, metadata interface{},
return p return p
} }
func newPackageForRequirementsWithMetadata(name, version string, metadata pkg.PythonRequirementsEntry, locations ...file.Location) pkg.Package { func newPackageForRequirementsWithMetadata(ctx context.Context, lr pythonLicenseResolver, name, version string, metadata pkg.PythonRequirementsEntry, locations ...file.Location) pkg.Package {
name = normalize(name) name = normalize(name)
licenseSet := lr.getLicenses(ctx, name, version)
p := pkg.Package{ p := pkg.Package{
Name: name, Name: name,
Version: version, Version: version,
Licenses: licenseSet,
Locations: file.NewLocationSet(locations...), Locations: file.NewLocationSet(locations...),
PURL: packageURL(name, version, nil), PURL: packageURL(name, version, nil),
Language: pkg.Python, Language: pkg.Python,

View File

@ -39,10 +39,20 @@ type pdmLockPackageFile struct {
Hash string `toml:"hash"` Hash string `toml:"hash"`
} }
var _ generic.Parser = parsePdmLock type pdmLockParser struct {
cfg CatalogerConfig
licenseResolver pythonLicenseResolver
}
func newPdmLockParser(cfg CatalogerConfig) pdmLockParser {
return pdmLockParser{
cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
}
}
// parsePdmLock is a parser function for pdm.lock contents, returning python packages discovered. // parsePdmLock is a parser function for pdm.lock contents, returning python packages discovered.
func parsePdmLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (plp pdmLockParser) parsePdmLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var lock pdmLock var lock pdmLock
_, err := toml.NewDecoder(reader).Decode(&lock) _, err := toml.NewDecoder(reader).Decode(&lock)
if err != nil { if err != nil {
@ -85,6 +95,8 @@ func parsePdmLock(_ context.Context, _ file.Resolver, _ *generic.Environment, re
} }
pkgs = append(pkgs, newPackageForIndexWithMetadata( pkgs = append(pkgs, newPackageForIndexWithMetadata(
ctx,
plp.licenseResolver,
p.Name, p.Name,
p.Version, p.Version,
pythonPkgMetadata, pythonPkgMetadata,

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -352,12 +353,81 @@ func TestParsePdmLock(t *testing.T) {
}, },
} }
pkgtest.TestFileParser(t, fixture, parsePdmLock, expectedPkgs, expectedRelationships) pdmLockParser := newPdmLockParser(DefaultCatalogerConfig())
pkgtest.TestFileParser(t, fixture, pdmLockParser.parsePdmLock, expectedPkgs, expectedRelationships)
}
func TestParsePdmLockWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/pdm.lock"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: pkg.PythonPdmLockEntry{
Summary: "Python package for providing Mozilla's CA Bundle.",
Files: []pkg.PythonFileRecord{
{
Path: "",
Digest: &pkg.PythonFileDigest{
Algorithm: "sha256",
Value: "47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43",
},
},
{
Path: "",
Digest: &pkg.PythonFileDigest{
Algorithm: "sha256",
Value: "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de",
},
},
},
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
pdmLockParser := newPdmLockParser(tc.config)
pkgtest.TestFileParser(t, fixture, pdmLockParser.parsePdmLock, tc.expectedPackages, nil)
})
}
} }
func Test_corruptPdmLock(t *testing.T) { func Test_corruptPdmLock(t *testing.T) {
pdmLockParser := newPdmLockParser(DefaultCatalogerConfig())
pkgtest.NewCatalogTester(). pkgtest.NewCatalogTester().
FromFile(t, "test-fixtures/glob-paths/src/pdm.lock"). FromFile(t, "test-fixtures/glob-paths/src/pdm.lock").
WithError(). WithError().
TestParser(t, parsePdmLock) TestParser(t, pdmLockParser.parsePdmLock)
} }

View File

@ -39,10 +39,20 @@ type pipfileLockDependency struct {
Index string `json:"index"` Index string `json:"index"`
} }
var _ generic.Parser = parsePipfileLock type pipfileLockParser struct {
cfg CatalogerConfig
licenseResolver pythonLicenseResolver
}
func newPipfileLockParser(cfg CatalogerConfig) pipfileLockParser {
return pipfileLockParser{
cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
}
}
// parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered. // parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered.
func parsePipfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (plp pipfileLockParser) parsePipfileLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pkgs := make([]pkg.Package, 0) pkgs := make([]pkg.Package, 0)
dec := json.NewDecoder(reader) dec := json.NewDecoder(reader)
@ -66,7 +76,7 @@ func parsePipfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment
index = "https://pypi.org/simple" index = "https://pypi.org/simple"
} }
version := strings.TrimPrefix(pkgMeta.Version, "==") version := strings.TrimPrefix(pkgMeta.Version, "==")
pkgs = append(pkgs, newPackageForIndexWithMetadata(name, version, pkg.PythonPipfileLockEntry{Index: index, Hashes: pkgMeta.Hashes}, reader.Location)) pkgs = append(pkgs, newPackageForIndexWithMetadata(ctx, plp.licenseResolver, name, version, pkg.PythonPipfileLockEntry{Index: index, Hashes: pkgMeta.Hashes}, reader.Location))
} }
} }

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -78,12 +79,69 @@ func TestParsePipFileLock(t *testing.T) {
// TODO: relationships are not under test // TODO: relationships are not under test
var expectedRelationships []artifact.Relationship var expectedRelationships []artifact.Relationship
pkgtest.TestFileParser(t, fixture, parsePipfileLock, expectedPkgs, expectedRelationships) pipfileLockParser := newPipfileLockParser(DefaultCatalogerConfig())
pkgtest.TestFileParser(t, fixture, pipfileLockParser.parsePipfileLock, expectedPkgs, expectedRelationships)
}
func TestParsePipfileLockWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/Pipfile.lock"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: pkg.PythonPipfileLockEntry{
Index: "https://pypi.org/simple",
Hashes: []string{
"sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43",
"sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de",
},
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
pipfileLockParser := newPipfileLockParser(tc.config)
pkgtest.TestFileParser(t, fixture, pipfileLockParser.parsePipfileLock, tc.expectedPackages, nil)
})
}
} }
func Test_corruptPipfileLock(t *testing.T) { func Test_corruptPipfileLock(t *testing.T) {
pipfileLockParser := newPipfileLockParser(DefaultCatalogerConfig())
pkgtest.NewCatalogTester(). pkgtest.NewCatalogTester().
FromFile(t, "test-fixtures/glob-paths/src/Pipfile.lock"). FromFile(t, "test-fixtures/glob-paths/src/Pipfile.lock").
WithError(). WithError().
TestParser(t, parsePipfileLock) TestParser(t, pipfileLockParser.parsePipfileLock)
} }

View File

@ -16,9 +16,6 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/internal/dependency" "github.com/anchore/syft/syft/pkg/cataloger/internal/dependency"
) )
// integrity check
var _ generic.Parser = parsePoetryLock
type poetryPackageSource struct { type poetryPackageSource struct {
URL string `toml:"url"` URL string `toml:"url"`
Type string `toml:"type"` Type string `toml:"type"`
@ -48,9 +45,21 @@ type poetryPackageDependency struct {
Extras []string `toml:"extras"` Extras []string `toml:"extras"`
} }
type poetryLockParser struct {
cfg CatalogerConfig
licenseResolver pythonLicenseResolver
}
func newPoetryLockParser(cfg CatalogerConfig) poetryLockParser {
return poetryLockParser{
cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
}
}
// parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered. // parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered.
func parsePoetryLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (plp poetryLockParser) parsePoetryLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pkgs, err := poetryLockPackages(reader) pkgs, err := plp.poetryLockPackages(ctx, reader)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -61,7 +70,7 @@ func parsePoetryLock(_ context.Context, _ file.Resolver, _ *generic.Environment,
return pkgs, dependency.Resolve(poetryLockDependencySpecifier, pkgs), unknown.IfEmptyf(pkgs, "unable to determine packages") return pkgs, dependency.Resolve(poetryLockDependencySpecifier, pkgs), unknown.IfEmptyf(pkgs, "unable to determine packages")
} }
func poetryLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { func (plp poetryLockParser) poetryLockPackages(ctx context.Context, reader file.LocationReadCloser) ([]pkg.Package, error) {
metadata := poetryPackages{} metadata := poetryPackages{}
md, err := toml.NewDecoder(reader).Decode(&metadata) md, err := toml.NewDecoder(reader).Decode(&metadata)
if err != nil { if err != nil {
@ -96,6 +105,8 @@ func poetryLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) {
pkgs = append( pkgs = append(
pkgs, pkgs,
newPackageForIndexWithMetadata( newPackageForIndexWithMetadata(
ctx,
plp.licenseResolver,
p.Name, p.Name,
p.Version, p.Version,
newPythonPoetryLockEntry(p), newPythonPoetryLockEntry(p),

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -79,12 +80,64 @@ func TestParsePoetryLock(t *testing.T) {
var expectedRelationships []artifact.Relationship var expectedRelationships []artifact.Relationship
pkgtest.TestFileParser(t, fixture, parsePoetryLock, expectedPkgs, expectedRelationships) poetryLockParser := newPoetryLockParser(DefaultCatalogerConfig())
pkgtest.TestFileParser(t, fixture, poetryLockParser.parsePoetryLock, expectedPkgs, expectedRelationships)
} }
func TestParsePoetryLockWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/poetry.lock"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: pkg.PythonPoetryLockEntry{
Index: "https://pypi.org/simple",
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
poetryLockParser := newPoetryLockParser(tc.config)
pkgtest.TestFileParser(t, fixture, poetryLockParser.parsePoetryLock, tc.expectedPackages, nil)
})
}
}
func Test_corruptPoetryLock(t *testing.T) { func Test_corruptPoetryLock(t *testing.T) {
poetryLockParser := newPoetryLockParser(DefaultCatalogerConfig())
pkgtest.NewCatalogTester(). pkgtest.NewCatalogTester().
FromFile(t, "test-fixtures/glob-paths/src/poetry.lock"). FromFile(t, "test-fixtures/glob-paths/src/poetry.lock").
WithError(). WithError().
TestParser(t, parsePoetryLock) TestParser(t, poetryLockParser.parsePoetryLock)
} }

View File

@ -83,18 +83,20 @@ func newRequirement(raw string) *unprocessedRequirement {
} }
type requirementsParser struct { type requirementsParser struct {
guessUnpinnedRequirements bool cfg CatalogerConfig
licenseResolver pythonLicenseResolver
} }
func newRequirementsParser(cfg CatalogerConfig) requirementsParser { func newRequirementsParser(cfg CatalogerConfig) requirementsParser {
return requirementsParser{ return requirementsParser{
guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
} }
} }
// parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a
// specific version. // specific version.
func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (rp requirementsParser) parseRequirementsTxt(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var errs error var errs error
var packages []pkg.Package var packages []pkg.Package
@ -133,7 +135,7 @@ func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Reso
} }
name := removeExtras(req.Name) name := removeExtras(req.Name)
version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) version := parseVersion(req.VersionConstraint, rp.cfg.GuessUnpinnedRequirements)
if version == "" { if version == "" {
log.WithFields("path", reader.RealPath, "line", line).Trace("unable to determine package version in requirements.txt line") log.WithFields("path", reader.RealPath, "line", line).Trace("unable to determine package version in requirements.txt line")
@ -144,6 +146,8 @@ func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Reso
packages = append( packages = append(
packages, packages,
newPackageForRequirementsWithMetadata( newPackageForRequirementsWithMetadata(
ctx,
rp.licenseResolver,
name, name,
version, version,
pkg.PythonRequirementsEntry{ pkg.PythonRequirementsEntry{

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -226,6 +227,58 @@ func TestParseRequirementsTxt(t *testing.T) {
} }
} }
func TestParseRequirementsTxtWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/requirements.txt"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: pkg.PythonRequirementsEntry{
Name: "certifi",
VersionConstraint: "== 2025.10.5",
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
requirementsParser := newRequirementsParser(tc.config)
pkgtest.TestFileParser(t, fixture, requirementsParser.parseRequirementsTxt, tc.expectedPackages, nil)
})
}
}
func Test_newRequirement(t *testing.T) { func Test_newRequirement(t *testing.T) {
tests := []struct { tests := []struct {

View File

@ -13,8 +13,17 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/generic" "github.com/anchore/syft/syft/pkg/cataloger/generic"
) )
// integrity check type setupFileParser struct {
var _ generic.Parser = parseSetup cfg CatalogerConfig
licenseResolver pythonLicenseResolver
}
func newSetupFileParser(cfg CatalogerConfig) setupFileParser {
return setupFileParser{
cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
}
}
// match examples: // match examples:
// //
@ -24,7 +33,7 @@ var _ generic.Parser = parseSetup
var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`) var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`)
var unquotedPinnedDependency = regexp.MustCompile(`^\s*(\w+)\s*==\s*([\w\.\-]+)`) var unquotedPinnedDependency = regexp.MustCompile(`^\s*(\w+)\s*==\s*([\w\.\-]+)`)
func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (sp setupFileParser) parseSetupFile(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
var packages []pkg.Package var packages []pkg.Package
scanner := bufio.NewScanner(reader) scanner := bufio.NewScanner(reader)
@ -33,23 +42,23 @@ func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, read
line := scanner.Text() line := scanner.Text()
line = strings.TrimRight(line, "\n") line = strings.TrimRight(line, "\n")
packages = processQuotedDependencies(line, reader, packages) packages = sp.processQuotedDependencies(ctx, line, reader, packages)
packages = processUnquotedDependency(line, reader, packages) packages = sp.processUnquotedDependency(ctx, line, reader, packages)
} }
return packages, nil, nil return packages, nil, nil
} }
func processQuotedDependencies(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { func (sp setupFileParser) processQuotedDependencies(ctx context.Context, line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package {
for _, match := range pinnedDependency.FindAllString(line, -1) { for _, match := range pinnedDependency.FindAllString(line, -1) {
if p, ok := parseQuotedDependency(match, line, reader); ok { if p, ok := sp.parseQuotedDependency(ctx, match, line, reader); ok {
packages = append(packages, p) packages = append(packages, p)
} }
} }
return packages return packages
} }
func parseQuotedDependency(match, line string, reader file.LocationReadCloser) (pkg.Package, bool) { func (sp setupFileParser) parseQuotedDependency(ctx context.Context, match, line string, reader file.LocationReadCloser) (pkg.Package, bool) {
parts := strings.Split(match, "==") parts := strings.Split(match, "==")
if len(parts) != 2 { if len(parts) != 2 {
return pkg.Package{}, false return pkg.Package{}, false
@ -58,11 +67,11 @@ func parseQuotedDependency(match, line string, reader file.LocationReadCloser) (
name := cleanDependencyString(parts[0]) name := cleanDependencyString(parts[0])
version := cleanDependencyString(parts[len(parts)-1]) version := cleanDependencyString(parts[len(parts)-1])
return validateAndCreatePackage(name, version, line, reader) return sp.validateAndCreatePackage(ctx, name, version, line, reader)
} }
// processUnquotedDependency extracts and processes an unquoted dependency from a line // processUnquotedDependency extracts and processes an unquoted dependency from a line
func processUnquotedDependency(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { func (sp setupFileParser) processUnquotedDependency(ctx context.Context, line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package {
matches := unquotedPinnedDependency.FindStringSubmatch(line) matches := unquotedPinnedDependency.FindStringSubmatch(line)
if len(matches) != 3 { if len(matches) != 3 {
return packages return packages
@ -71,7 +80,7 @@ func processUnquotedDependency(line string, reader file.LocationReadCloser, pack
name := strings.TrimSpace(matches[1]) name := strings.TrimSpace(matches[1])
version := strings.TrimSpace(matches[2]) version := strings.TrimSpace(matches[2])
if p, ok := validateAndCreatePackage(name, version, line, reader); ok { if p, ok := sp.validateAndCreatePackage(ctx, name, version, line, reader); ok {
if !isDuplicatePackage(p, packages) { if !isDuplicatePackage(p, packages) {
packages = append(packages, p) packages = append(packages, p)
} }
@ -87,7 +96,7 @@ func cleanDependencyString(s string) string {
return s return s
} }
func validateAndCreatePackage(name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) { func (sp setupFileParser) validateAndCreatePackage(ctx context.Context, name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) {
if hasTemplateDirective(name) || hasTemplateDirective(version) { if hasTemplateDirective(name) || hasTemplateDirective(version) {
// this can happen in more dynamic setup.py where there is templating // this can happen in more dynamic setup.py where there is templating
return pkg.Package{}, false return pkg.Package{}, false
@ -99,6 +108,8 @@ func validateAndCreatePackage(name, version, line string, reader file.LocationRe
} }
p := newPackageForIndex( p := newPackageForIndex(
ctx,
sp.licenseResolver,
name, name,
version, version,
reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation),

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -159,12 +160,60 @@ func TestParseSetup(t *testing.T) {
} }
var expectedRelationships []artifact.Relationship var expectedRelationships []artifact.Relationship
pkgtest.TestFileParser(t, tt.fixture, parseSetup, tt.expected, expectedRelationships) setupFileParser := newSetupFileParser(DefaultCatalogerConfig())
pkgtest.TestFileParser(t, tt.fixture, setupFileParser.parseSetupFile, tt.expected, expectedRelationships)
}) })
} }
} }
func TestParseSetupFileWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/setup.py"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
setupFileParser := newSetupFileParser(tc.config)
pkgtest.TestFileParser(t, fixture, setupFileParser.parseSetupFile, tc.expectedPackages, nil)
})
}
}
func Test_hasTemplateDirective(t *testing.T) { func Test_hasTemplateDirective(t *testing.T) {
tests := []struct { tests := []struct {

View File

@ -69,9 +69,21 @@ type uvMetadata struct {
ProvidesExtras []string `toml:"provides-extras"` ProvidesExtras []string `toml:"provides-extras"`
} }
type uvLockParser struct {
cfg CatalogerConfig
licenseResolver pythonLicenseResolver
}
func newUvLockParser(cfg CatalogerConfig) uvLockParser {
return uvLockParser{
cfg: cfg,
licenseResolver: newPythonLicenseResolver(cfg),
}
}
// parseUvLock is a parser function for uv.lock contents, returning all the pakcages discovered // parseUvLock is a parser function for uv.lock contents, returning all the pakcages discovered
func parseUvLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { func (ulp uvLockParser) parseUvLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
pkgs, err := uvLockPackages(reader) pkgs, err := ulp.uvLockPackages(ctx, reader)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }
@ -127,7 +139,7 @@ func newPythonUvLockEntry(p uvPackage) pkg.PythonUvLockEntry {
} }
} }
func uvLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { func (ulp uvLockParser) uvLockPackages(ctx context.Context, reader file.LocationReadCloser) ([]pkg.Package, error) {
var parsedLockFileVersion uvLockFileVersion var parsedLockFileVersion uvLockFileVersion
// we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough // we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough
@ -167,6 +179,8 @@ func uvLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) {
for _, p := range parsedLockFile.Packages { for _, p := range parsedLockFile.Packages {
pkgs = append(pkgs, pkgs = append(pkgs,
newPackageForIndexWithMetadata( newPackageForIndexWithMetadata(
ctx,
ulp.licenseResolver,
p.Name, p.Name,
p.Version, p.Version,
newPythonUvLockEntry(p), newPythonUvLockEntry(p),

View File

@ -1,6 +1,7 @@
package python package python
import ( import (
"context"
"testing" "testing"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
@ -124,5 +125,58 @@ func TestParseUvLock(t *testing.T) {
}, },
} }
pkgtest.TestFileParser(t, fixture, parseUvLock, expectedPkgs, expectedRelationships) uvLockParser := newUvLockParser(DefaultCatalogerConfig())
pkgtest.TestFileParser(t, fixture, uvLockParser.parseUvLock, expectedPkgs, expectedRelationships)
}
func TestParseUvLockWithLicenseEnrichment(t *testing.T) {
ctx := context.TODO()
fixture := "test-fixtures/pypi-remote/uv.lock"
locations := file.NewLocationSet(file.NewLocation(fixture))
mux, url, teardown := setupPypiRegistry()
defer teardown()
tests := []struct {
name string
fixture string
config CatalogerConfig
requestHandlers []handlerPath
expectedPackages []pkg.Package
}{
{
name: "search remote licenses returns the expected licenses when search is set to true",
config: CatalogerConfig{SearchRemoteLicenses: true},
requestHandlers: []handlerPath{
{
path: "/certifi/2025.10.5/json",
handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"),
},
},
expectedPackages: []pkg.Package{
{
Name: "certifi",
Version: "2025.10.5",
Locations: locations,
PURL: "pkg:pypi/certifi@2025.10.5",
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")),
Language: pkg.Python,
Type: pkg.PythonPkg,
Metadata: pkg.PythonUvLockEntry{
Index: "https://pypi.org/simple",
Dependencies: nil,
},
},
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// set up the mock server
for _, handler := range tc.requestHandlers {
mux.HandleFunc(handler.path, handler.handler)
}
tc.config.PypiBaseURL = url
uvLockParser := newUvLockParser(tc.config)
pkgtest.TestFileParser(t, fixture, uvLockParser.parseUvLock, tc.expectedPackages, nil)
})
}
} }

View File

@ -0,0 +1,29 @@
{
"_meta": {
"hash": {
"sha256": "a6b2dfd5367688bec81240eb04e7bde7f92b35491be5934fcb4e2e6ca9d275c0"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.7"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"certifi": {
"hashes": [
"sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43",
"sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"
],
"index": "pypi",
"version": "==2025.10.5"
}
},
"develop": {}
}

View File

@ -0,0 +1,12 @@
[[package]]
name = "certifi"
version = "2025.10.5"
requires_python = ">=3.7"
summary = "Python package for providing Mozilla's CA Bundle."
groups = ["security"]
marker = "python_version >= \"3.7\""
files = [
{url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"},
{url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"},
]

View File

@ -0,0 +1,13 @@
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "certifi"
version = "2025.10.5"
description = "Python package for providing Mozilla's CA Bundle."
optional = false
python-versions = ">=3.7"
files = [
{file = "certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"},
{file = "certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"},
]

View File

@ -0,0 +1,9 @@
[tool.poetry]
name = "testpkg"
version = "0.1.0"
description = ""
authors = ["Alex Goodman <wagoodman@users.noreply.github.com>"]
readme = "README.md"
[tool.poetry.dependencies]
certifi = "^2025.10.5"

View File

@ -0,0 +1,116 @@
{
"info": {
"author": "Kenneth Reitz",
"author_email": "me@kennethreitz.com",
"bugtrack_url": null,
"classifiers": [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
"Natural Language :: English",
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Programming Language :: Python :: 3.14",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9"
],
"description": "Certifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.\n",
"description_content_type": null,
"docs_url": null,
"download_url": null,
"downloads": {
"last_day": -1,
"last_month": -1,
"last_week": -1
},
"dynamic": [
"Author",
"Author-Email",
"Classifier",
"Description",
"Home-Page",
"License",
"License-File",
"Project-Url",
"Requires-Python",
"Summary"
],
"home_page": "https://github.com/certifi/python-certifi",
"keywords": null,
"license": "MPL-2.0",
"license_expression": null,
"license_files": [
"LICENSE"
],
"maintainer": null,
"maintainer_email": null,
"name": "certifi",
"package_url": "https://pypi.org/project/certifi/",
"platform": null,
"project_url": "https://pypi.org/project/certifi/",
"project_urls": {
"Homepage": "https://github.com/certifi/python-certifi",
"Source": "https://github.com/certifi/python-certifi"
},
"provides_extra": null,
"release_url": "https://pypi.org/project/certifi/2025.10.5/",
"requires_dist": null,
"requires_python": ">=3.7",
"summary": "Python package for providing Mozilla's CA Bundle.",
"version": "2025.10.5",
"yanked": false,
"yanked_reason": null
},
"last_serial": 31620018,
"urls": [
{
"comment_text": null,
"digests": {
"blake2b_256": "e437af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed",
"md5": "7b56f7121949a196441739c539fd01be",
"sha256": "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"
},
"downloads": -1,
"filename": "certifi-2025.10.5-py3-none-any.whl",
"has_sig": false,
"md5_digest": "7b56f7121949a196441739c539fd01be",
"packagetype": "bdist_wheel",
"python_version": "py3",
"requires_python": ">=3.7",
"size": 163286,
"upload_time": "2025-10-05T04:12:14",
"upload_time_iso_8601": "2025-10-05T04:12:14.030503Z",
"url": "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl",
"yanked": false,
"yanked_reason": null
},
{
"comment_text": null,
"digests": {
"blake2b_256": "4c5bb6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91",
"md5": "7fe97b2e79933430062935f3759a09b0",
"sha256": "47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"
},
"downloads": -1,
"filename": "certifi-2025.10.5.tar.gz",
"has_sig": false,
"md5_digest": "7fe97b2e79933430062935f3759a09b0",
"packagetype": "sdist",
"python_version": "source",
"requires_python": ">=3.7",
"size": 164519,
"upload_time": "2025-10-05T04:12:15",
"upload_time_iso_8601": "2025-10-05T04:12:15.808237Z",
"url": "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz",
"yanked": false,
"yanked_reason": null
}
],
"vulnerabilities": []
}

View File

@ -0,0 +1 @@
certifi == 2025.10.5

View File

@ -0,0 +1,22 @@
from setuptools import setup
# Sample setup.py from the pytest project with added comments specific
# to the cataloger
INSTALL_REQUIRES = [
"certifi==2025.10.5",
]
def main():
setup(
use_scm_version={"write_to": "src/_pytest/_version.py"},
setup_requires=["setuptools-scm", "setuptools>=40.0"],
package_dir={"": "src"},
extras_require={},
install_requires=INSTALL_REQUIRES,
)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,11 @@
version = 1
requires-python = ">=3.7"
[[package]]
name = "certifi"
version = "2025.10.5"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286 },
]