diff --git a/cmd/syft/internal/options/catalog.go b/cmd/syft/internal/options/catalog.go index 652d6a40d..6f33f7b9e 100644 --- a/cmd/syft/internal/options/catalog.go +++ b/cmd/syft/internal/options/catalog.go @@ -198,9 +198,10 @@ func (cfg Catalog) ToPackagesConfig() pkgcataloging.Config { }, Nix: nix.DefaultConfig(). WithCaptureOwnedFiles(cfg.Nix.CaptureOwnedFiles), - Python: python.CatalogerConfig{ - GuessUnpinnedRequirements: cfg.Python.GuessUnpinnedRequirements, - }, + Python: python.DefaultCatalogerConfig(). + WithSearchRemoteLicenses(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Python), cfg.Python.SearchRemoteLicenses)). + WithPypiBaseURL(cfg.Python.PypiBaseURL). + WithGuessUnpinnedRequirements(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Python), cfg.Python.GuessUnpinnedRequirements)), JavaArchive: java.DefaultArchiveCatalogerConfig(). WithUseMavenLocalRepository(*multiLevelOption(false, enrichmentEnabled(cfg.Enrich, task.Java, task.Maven), cfg.Java.UseMavenLocalRepository)). WithMavenLocalRepositoryDir(cfg.Java.MavenLocalRepositoryDir). @@ -320,6 +321,7 @@ var publicisedEnrichmentOptions = []string{ task.Golang, task.Java, task.JavaScript, + task.Python, } func enrichmentEnabled(enrichDirectives []string, features ...string) *bool { diff --git a/cmd/syft/internal/options/python.go b/cmd/syft/internal/options/python.go index c645cbfcd..97c5330c1 100644 --- a/cmd/syft/internal/options/python.go +++ b/cmd/syft/internal/options/python.go @@ -3,7 +3,9 @@ package options import "github.com/anchore/clio" type pythonConfig struct { - GuessUnpinnedRequirements bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` + SearchRemoteLicenses *bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` + PypiBaseURL string `json:"pypi-base-url" yaml:"pypi-base-url" mapstructure:"pypi-base-url"` + GuessUnpinnedRequirements *bool `json:"guess-unpinned-requirements" yaml:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` } var _ interface { @@ -11,6 +13,8 @@ var _ interface { } = (*pythonConfig)(nil) func (o *pythonConfig) DescribeFields(descriptions clio.FieldDescriptionSet) { + descriptions.Add(&o.SearchRemoteLicenses, `enables Syft to use the network to fill in more detailed license information`) + descriptions.Add(&o.PypiBaseURL, `base Pypi url to use`) descriptions.Add(&o.GuessUnpinnedRequirements, `when running across entries in requirements.txt that do not specify a specific version (e.g. "sqlalchemy >= 1.0.0, <= 2.0.0, != 3.0.0, <= 3.0.0"), attempt to guess what the version could be based on the version requirements specified (e.g. "1.0.0"). When enabled the lowest expressible version diff --git a/internal/task/package_tasks.go b/internal/task/package_tasks.go index 6f04f015f..79b9167ba 100644 --- a/internal/task/package_tasks.go +++ b/internal/task/package_tasks.go @@ -52,6 +52,9 @@ const ( JavaScript = "javascript" Node = "node" NPM = "npm" + + // Python ecosystem labels + Python = "python" ) //nolint:funlen @@ -109,7 +112,7 @@ func DefaultPackageTaskFactories() Factories { func(cfg CatalogingFactoryConfig) pkg.Cataloger { return python.NewPackageCataloger(cfg.PackagesConfig.Python) }, - pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "python", + pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, Python, ), newSimplePackageTaskFactory(ruby.NewGemFileLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem"), newSimplePackageTaskFactory(ruby.NewGemSpecCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "ruby", "gem", "gemspec"), @@ -127,7 +130,7 @@ func DefaultPackageTaskFactories() Factories { pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#", ), newSimplePackageTaskFactory(dotnet.NewDotnetPackagesLockCataloger, pkgcataloging.DeclaredTag, pkgcataloging.ImageTag, pkgcataloging.DirectoryTag, pkgcataloging.LanguageTag, "dotnet", "c#"), - newSimplePackageTaskFactory(python.NewInstalledPackageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, "python"), + newSimplePackageTaskFactory(python.NewInstalledPackageCataloger, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, pkgcataloging.LanguageTag, Python), newPackageTaskFactory( func(cfg CatalogingFactoryConfig) pkg.Cataloger { return golang.NewGoModuleBinaryCataloger(cfg.PackagesConfig.Golang) diff --git a/syft/pkg/cataloger/python/cataloger.go b/syft/pkg/cataloger/python/cataloger.go index 69284dcb6..dffc7b3fb 100644 --- a/syft/pkg/cataloger/python/cataloger.go +++ b/syft/pkg/cataloger/python/cataloger.go @@ -10,28 +10,21 @@ import ( const eggInfoGlob = "**/*.egg-info" -type CatalogerConfig struct { - // GuessUnpinnedRequirements attempts to infer package versions from version constraints when no explicit version is specified in requirements files. - // app-config: python.guess-unpinned-requirements - GuessUnpinnedRequirements bool `yaml:"guess-unpinned-requirements" json:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` -} - -func DefaultCatalogerConfig() CatalogerConfig { - return CatalogerConfig{ - GuessUnpinnedRequirements: false, - } -} - // NewPackageCataloger returns a new cataloger for python packages referenced from poetry lock files, requirements.txt files, and setup.py files. func NewPackageCataloger(cfg CatalogerConfig) pkg.Cataloger { - rqp := newRequirementsParser(cfg) + poetryLockParser := newPoetryLockParser(cfg) + pipfileLockParser := newPipfileLockParser(cfg) + setupFileParser := newSetupFileParser(cfg) + uvLockParser := newUvLockParser(cfg) + pdmLockParser := newPdmLockParser(cfg) + requirementsFileParser := newRequirementsParser(cfg) return generic.NewCataloger("python-package-cataloger"). - WithParserByGlobs(rqp.parseRequirementsTxt, "**/*requirements*.txt"). - WithParserByGlobs(parsePoetryLock, "**/poetry.lock"). - WithParserByGlobs(parsePipfileLock, "**/Pipfile.lock"). - WithParserByGlobs(parseSetup, "**/setup.py"). - WithParserByGlobs(parseUvLock, "**/uv.lock"). - WithParserByGlobs(parsePdmLock, "**/pdm.lock") + WithParserByGlobs(requirementsFileParser.parseRequirementsTxt, "**/*requirements*.txt"). + WithParserByGlobs(poetryLockParser.parsePoetryLock, "**/poetry.lock"). + WithParserByGlobs(pipfileLockParser.parsePipfileLock, "**/Pipfile.lock"). + WithParserByGlobs(setupFileParser.parseSetupFile, "**/setup.py"). + WithParserByGlobs(uvLockParser.parseUvLock, "**/uv.lock"). + WithParserByGlobs(pdmLockParser.parsePdmLock, "**/pdm.lock") } // NewInstalledPackageCataloger returns a new cataloger for python packages within egg or wheel installation directories. diff --git a/syft/pkg/cataloger/python/config.go b/syft/pkg/cataloger/python/config.go new file mode 100644 index 000000000..0e5aba0c9 --- /dev/null +++ b/syft/pkg/cataloger/python/config.go @@ -0,0 +1,40 @@ +package python + +const pypiBaseURL = "https://pypi.org/pypi" + +type CatalogerConfig struct { + // GuessUnpinnedRequirements attempts to infer package versions from version constraints when no explicit version is specified in requirements files. + // app-config: python.guess-unpinned-requirements + GuessUnpinnedRequirements bool `yaml:"guess-unpinned-requirements" json:"guess-unpinned-requirements" mapstructure:"guess-unpinned-requirements"` + // SearchRemoteLicenses enables querying the NPM registry API to retrieve license information for packages that are missing license data in their local metadata. + // app-config: python.search-remote-licenses + SearchRemoteLicenses bool `json:"search-remote-licenses" yaml:"search-remote-licenses" mapstructure:"search-remote-licenses"` + // PypiBaseURL specifies the base URL for the Pypi registry API used when searching for remote license information. + // app-config: python.pypi-base-url + PypiBaseURL string `json:"pypi-base-url" yaml:"pypi-base-url" mapstructure:"pypi-base-url"` +} + +func DefaultCatalogerConfig() CatalogerConfig { + return CatalogerConfig{ + GuessUnpinnedRequirements: false, + SearchRemoteLicenses: false, + PypiBaseURL: pypiBaseURL, + } +} + +func (c CatalogerConfig) WithSearchRemoteLicenses(input bool) CatalogerConfig { + c.SearchRemoteLicenses = input + return c +} + +func (c CatalogerConfig) WithGuessUnpinnedRequirements(input bool) CatalogerConfig { + c.GuessUnpinnedRequirements = input + return c +} + +func (c CatalogerConfig) WithPypiBaseURL(input string) CatalogerConfig { + if input != "" { + c.PypiBaseURL = input + } + return c +} diff --git a/syft/pkg/cataloger/python/dependency_test.go b/syft/pkg/cataloger/python/dependency_test.go index a3b520755..fc58e346c 100644 --- a/syft/pkg/cataloger/python/dependency_test.go +++ b/syft/pkg/cataloger/python/dependency_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "os" "testing" @@ -259,7 +260,8 @@ func Test_poetryLockDependencySpecifier_againstPoetryLock(t *testing.T) { fh, err := os.Open(tt.fixture) require.NoError(t, err) - pkgs, err := poetryLockPackages(file.NewLocationReadCloser(file.NewLocation(tt.fixture), fh)) + plp := newPoetryLockParser(DefaultCatalogerConfig()) + pkgs, err := plp.poetryLockPackages(context.TODO(), file.NewLocationReadCloser(file.NewLocation(tt.fixture), fh)) require.NoError(t, err) var got []dependency.Specification diff --git a/syft/pkg/cataloger/python/license.go b/syft/pkg/cataloger/python/license.go new file mode 100644 index 000000000..d9a2420fb --- /dev/null +++ b/syft/pkg/cataloger/python/license.go @@ -0,0 +1,131 @@ +package python + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "github.com/anchore/syft/internal/cache" + "github.com/anchore/syft/internal/log" + "github.com/anchore/syft/syft/pkg" +) + +type pythonLicenseResolver struct { + catalogerConfig CatalogerConfig + licenseCache cache.Resolver[[]pkg.License] +} + +func newPythonLicenseResolver(config CatalogerConfig) pythonLicenseResolver { + return pythonLicenseResolver{ + licenseCache: cache.GetResolverCachingErrors[[]pkg.License]("python", "v1"), + catalogerConfig: config, + } +} + +func (lr *pythonLicenseResolver) getLicenses(ctx context.Context, packageName string, packageVersion string) pkg.LicenseSet { + var licenseSet pkg.LicenseSet + + if lr.catalogerConfig.SearchRemoteLicenses { + licenses, err := lr.getLicensesFromRemote(ctx, packageName, packageVersion) + if err == nil && licenses != nil { + licenseSet = pkg.NewLicenseSet(licenses...) + } + if err != nil { + log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err) + } + } + return licenseSet +} + +func (lr *pythonLicenseResolver) getLicensesFromRemote(ctx context.Context, packageName string, packageVersion string) ([]pkg.License, error) { + return lr.licenseCache.Resolve(fmt.Sprintf("%s/%s", packageName, packageVersion), func() ([]pkg.License, error) { + license, err := getLicenseFromPypiRegistry(lr.catalogerConfig.PypiBaseURL, packageName, packageVersion) + if err == nil && license != "" { + licenses := pkg.NewLicensesFromValuesWithContext(ctx, license) + return licenses, nil + } + if err != nil { + log.Debugf("unable to extract licenses from pypi registry for package %s:%s: %+v", packageName, packageVersion, err) + } + return nil, err + }) +} + +func formatPypiRegistryURL(baseURL, packageName, version string) (requestURL string, err error) { + if packageName == "" { + return "", fmt.Errorf("unable to format pypi request for a blank package name") + } + + urlPath := []string{packageName, version, "json"} + requestURL, err = url.JoinPath(baseURL, urlPath...) + if err != nil { + return requestURL, fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err) + } + return requestURL, nil +} + +func getLicenseFromPypiRegistry(baseURL, packageName, version string) (string, error) { + // "https://pypi.org/pypi/%s/%s/json", packageName, version + requestURL, err := formatPypiRegistryURL(baseURL, packageName, version) + if err != nil { + return "", fmt.Errorf("unable to format pypi request for pkg:version %s%s; %w", packageName, version, err) + } + log.WithFields("url", requestURL).Info("downloading python package from pypi") + + pypiRequest, err := http.NewRequest(http.MethodGet, requestURL, nil) + if err != nil { + return "", fmt.Errorf("unable to format remote request: %w", err) + } + + httpClient := &http.Client{ + Timeout: time.Second * 10, + } + + resp, err := httpClient.Do(pypiRequest) + if err != nil { + return "", fmt.Errorf("unable to get package from pypi registry: %w", err) + } + defer func() { + if err := resp.Body.Close(); err != nil { + log.Errorf("unable to close body: %+v", err) + } + }() + + if resp.StatusCode != 200 { + return "", fmt.Errorf("unable to get package from pypi registry") + } + + bytes, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("unable to parse package from pypi registry: %w", err) + } + + dec := json.NewDecoder(strings.NewReader(string(bytes))) + + // Read "license" from the response + var pypiResponse struct { + Info struct { + License string `json:"license"` + LicenseExpression string `json:"license_expression"` + } `json:"info"` + } + + if err := dec.Decode(&pypiResponse); err != nil { + return "", fmt.Errorf("unable to parse license from pypi registry: %w", err) + } + + var license string + if pypiResponse.Info.LicenseExpression != "" { + license = pypiResponse.Info.LicenseExpression + } else { + license = pypiResponse.Info.License + } + log.Tracef("Retrieved License: %s", license) + + return license, nil +} diff --git a/syft/pkg/cataloger/python/license_test.go b/syft/pkg/cataloger/python/license_test.go new file mode 100644 index 000000000..afc7e7f27 --- /dev/null +++ b/syft/pkg/cataloger/python/license_test.go @@ -0,0 +1,177 @@ +package python + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFormatPyPiRegistryURL(t *testing.T) { + tests := []struct { + name string + version string + expected string + expectedError error + }{ + { + name: "package1", + version: "1.0", + expected: "https://pypi.org/pypi/package1/1.0/json", + expectedError: nil, + }, + { + name: "package-1", + version: "", + expected: "https://pypi.org/pypi/package-1/json", + expectedError: nil, + }, + { + name: "_", + version: "a", + expected: "https://pypi.org/pypi/_/a/json", + expectedError: nil, + }, + { + name: "", + version: "a", + expected: "", + expectedError: fmt.Errorf("unable to format pypi request for a blank package name"), + }, + } + + cfg := DefaultCatalogerConfig() + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got, err := formatPypiRegistryURL(cfg.PypiBaseURL, test.name, test.version) + + require.Equal(t, test.expected, got) + if test.expectedError != nil { + require.ErrorContains(t, err, test.expectedError.Error()) + } else { + require.NoError(t, err) + } + }) + } + +} + +func TestGetLicenseFromPypiRegistry(t *testing.T) { + mux, url, teardown := setupPypiRegistry() + defer teardown() + + tests := []struct { + name string + version string + requestHandlers []handlerPath + expected string + expectedError error + }{ + { + name: "certifi", + version: "2025.10.5", + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expected: "MPL-2.0", + }, + { + name: "package", + version: "1.0", + requestHandlers: []handlerPath{ + { + path: "/package/1.0/json", + handler: generateMockPypiRegistryHandlerWithStatus("", http.StatusNotFound), + }, + }, + expected: "", + expectedError: fmt.Errorf("unable to get package from pypi registry"), + }, + { + name: "package", + version: "2.0", + requestHandlers: []handlerPath{ + { + path: "/package/2.0/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response_bad.json"), + }, + }, + expected: "", + expectedError: fmt.Errorf("unable to parse license from pypi registry: EOF"), + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + got, err := getLicenseFromPypiRegistry(url, tc.name, tc.version) + require.Equal(t, tc.expected, got) + if tc.expectedError != nil { + require.ErrorContains(t, err, tc.expectedError.Error()) + } else { + require.NoError(t, err) + } + }) + } +} + +type handlerPath struct { + path string + handler func(w http.ResponseWriter, r *http.Request) +} + +func generateMockPypiRegistryHandler(responseFixture string) func(w http.ResponseWriter, r *http.Request) { + return generateMockPypiRegistryHandlerWithStatus(responseFixture, http.StatusOK) +} + +func generateMockPypiRegistryHandlerWithStatus(responseFixture string, mockHttpStatus int) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + if mockHttpStatus != http.StatusOK { + http.Error(w, fmt.Errorf("Error for status").Error(), http.StatusNotFound) + return + } + + w.WriteHeader(http.StatusOK) + // Copy the file's content to the response writer + file, err := os.Open(responseFixture) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + defer file.Close() + + _, err = io.Copy(w, file) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } +} + +// setup sets up a test HTTP server for mocking requests to a particular registry. +// The returned url is injected into the Config so the client uses the test server. +// Tests should register handlers on mux to simulate the expected request/response structure +func setupPypiRegistry() (mux *http.ServeMux, serverURL string, teardown func()) { + // mux is the HTTP request multiplexer used with the test server. + mux = http.NewServeMux() + + // We want to ensure that tests catch mistakes where the endpoint URL is + // specified as absolute rather than relative. It only makes a difference + // when there's a non-empty base URL path. So, use that. See issue #752. + apiHandler := http.NewServeMux() + apiHandler.Handle("/", mux) + // server is a test HTTP server used to provide mock API responses. + server := httptest.NewServer(apiHandler) + + return mux, server.URL, server.Close +} diff --git a/syft/pkg/cataloger/python/package.go b/syft/pkg/cataloger/python/package.go index 0c724ca81..506584fca 100644 --- a/syft/pkg/cataloger/python/package.go +++ b/syft/pkg/cataloger/python/package.go @@ -1,6 +1,7 @@ package python import ( + "context" "fmt" "regexp" "strings" @@ -17,12 +18,14 @@ func normalize(name string) string { return strings.ToLower(normalized) } -func newPackageForIndex(name, version string, locations ...file.Location) pkg.Package { +func newPackageForIndex(ctx context.Context, lr pythonLicenseResolver, name, version string, locations ...file.Location) pkg.Package { name = normalize(name) + licenseSet := lr.getLicenses(ctx, name, version) p := pkg.Package{ Name: name, Version: version, + Licenses: licenseSet, Locations: file.NewLocationSet(locations...), PURL: packageURL(name, version, nil), Language: pkg.Python, @@ -34,12 +37,14 @@ func newPackageForIndex(name, version string, locations ...file.Location) pkg.Pa return p } -func newPackageForIndexWithMetadata(name, version string, metadata interface{}, locations ...file.Location) pkg.Package { +func newPackageForIndexWithMetadata(ctx context.Context, lr pythonLicenseResolver, name, version string, metadata interface{}, locations ...file.Location) pkg.Package { name = normalize(name) + licenseSet := lr.getLicenses(ctx, name, version) p := pkg.Package{ Name: name, Version: version, + Licenses: licenseSet, Locations: file.NewLocationSet(locations...), PURL: packageURL(name, version, nil), Language: pkg.Python, @@ -52,12 +57,14 @@ func newPackageForIndexWithMetadata(name, version string, metadata interface{}, return p } -func newPackageForRequirementsWithMetadata(name, version string, metadata pkg.PythonRequirementsEntry, locations ...file.Location) pkg.Package { +func newPackageForRequirementsWithMetadata(ctx context.Context, lr pythonLicenseResolver, name, version string, metadata pkg.PythonRequirementsEntry, locations ...file.Location) pkg.Package { name = normalize(name) + licenseSet := lr.getLicenses(ctx, name, version) p := pkg.Package{ Name: name, Version: version, + Licenses: licenseSet, Locations: file.NewLocationSet(locations...), PURL: packageURL(name, version, nil), Language: pkg.Python, diff --git a/syft/pkg/cataloger/python/parse_pdm_lock.go b/syft/pkg/cataloger/python/parse_pdm_lock.go index 66993d515..da090ce3a 100644 --- a/syft/pkg/cataloger/python/parse_pdm_lock.go +++ b/syft/pkg/cataloger/python/parse_pdm_lock.go @@ -39,10 +39,20 @@ type pdmLockPackageFile struct { Hash string `toml:"hash"` } -var _ generic.Parser = parsePdmLock +type pdmLockParser struct { + cfg CatalogerConfig + licenseResolver pythonLicenseResolver +} + +func newPdmLockParser(cfg CatalogerConfig) pdmLockParser { + return pdmLockParser{ + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), + } +} // parsePdmLock is a parser function for pdm.lock contents, returning python packages discovered. -func parsePdmLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (plp pdmLockParser) parsePdmLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var lock pdmLock _, err := toml.NewDecoder(reader).Decode(&lock) if err != nil { @@ -85,6 +95,8 @@ func parsePdmLock(_ context.Context, _ file.Resolver, _ *generic.Environment, re } pkgs = append(pkgs, newPackageForIndexWithMetadata( + ctx, + plp.licenseResolver, p.Name, p.Version, pythonPkgMetadata, diff --git a/syft/pkg/cataloger/python/parse_pdm_lock_test.go b/syft/pkg/cataloger/python/parse_pdm_lock_test.go index ba1d6c0dc..31762016a 100644 --- a/syft/pkg/cataloger/python/parse_pdm_lock_test.go +++ b/syft/pkg/cataloger/python/parse_pdm_lock_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/anchore/syft/syft/artifact" @@ -352,12 +353,81 @@ func TestParsePdmLock(t *testing.T) { }, } - pkgtest.TestFileParser(t, fixture, parsePdmLock, expectedPkgs, expectedRelationships) + pdmLockParser := newPdmLockParser(DefaultCatalogerConfig()) + pkgtest.TestFileParser(t, fixture, pdmLockParser.parsePdmLock, expectedPkgs, expectedRelationships) +} + +func TestParsePdmLockWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/pdm.lock" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + Metadata: pkg.PythonPdmLockEntry{ + Summary: "Python package for providing Mozilla's CA Bundle.", + Files: []pkg.PythonFileRecord{ + { + Path: "", + Digest: &pkg.PythonFileDigest{ + Algorithm: "sha256", + Value: "47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", + }, + }, + { + Path: "", + Digest: &pkg.PythonFileDigest{ + Algorithm: "sha256", + Value: "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", + }, + }, + }, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + pdmLockParser := newPdmLockParser(tc.config) + pkgtest.TestFileParser(t, fixture, pdmLockParser.parsePdmLock, tc.expectedPackages, nil) + }) + } } func Test_corruptPdmLock(t *testing.T) { + pdmLockParser := newPdmLockParser(DefaultCatalogerConfig()) pkgtest.NewCatalogTester(). FromFile(t, "test-fixtures/glob-paths/src/pdm.lock"). WithError(). - TestParser(t, parsePdmLock) + TestParser(t, pdmLockParser.parsePdmLock) } diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock.go b/syft/pkg/cataloger/python/parse_pipfile_lock.go index b2100060a..6da14cd02 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock.go @@ -39,10 +39,20 @@ type pipfileLockDependency struct { Index string `json:"index"` } -var _ generic.Parser = parsePipfileLock +type pipfileLockParser struct { + cfg CatalogerConfig + licenseResolver pythonLicenseResolver +} + +func newPipfileLockParser(cfg CatalogerConfig) pipfileLockParser { + return pipfileLockParser{ + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), + } +} // parsePipfileLock is a parser function for Pipfile.lock contents, returning "Default" python packages discovered. -func parsePipfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (plp pipfileLockParser) parsePipfileLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { pkgs := make([]pkg.Package, 0) dec := json.NewDecoder(reader) @@ -66,7 +76,7 @@ func parsePipfileLock(_ context.Context, _ file.Resolver, _ *generic.Environment index = "https://pypi.org/simple" } version := strings.TrimPrefix(pkgMeta.Version, "==") - pkgs = append(pkgs, newPackageForIndexWithMetadata(name, version, pkg.PythonPipfileLockEntry{Index: index, Hashes: pkgMeta.Hashes}, reader.Location)) + pkgs = append(pkgs, newPackageForIndexWithMetadata(ctx, plp.licenseResolver, name, version, pkg.PythonPipfileLockEntry{Index: index, Hashes: pkgMeta.Hashes}, reader.Location)) } } diff --git a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go index 051bfd78a..7ad5a4e7f 100644 --- a/syft/pkg/cataloger/python/parse_pipfile_lock_test.go +++ b/syft/pkg/cataloger/python/parse_pipfile_lock_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/anchore/syft/syft/artifact" @@ -78,12 +79,69 @@ func TestParsePipFileLock(t *testing.T) { // TODO: relationships are not under test var expectedRelationships []artifact.Relationship - pkgtest.TestFileParser(t, fixture, parsePipfileLock, expectedPkgs, expectedRelationships) + pipfileLockParser := newPipfileLockParser(DefaultCatalogerConfig()) + pkgtest.TestFileParser(t, fixture, pipfileLockParser.parsePipfileLock, expectedPkgs, expectedRelationships) +} + +func TestParsePipfileLockWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/Pipfile.lock" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + Metadata: pkg.PythonPipfileLockEntry{ + Index: "https://pypi.org/simple", + Hashes: []string{ + "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", + "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", + }, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + pipfileLockParser := newPipfileLockParser(tc.config) + pkgtest.TestFileParser(t, fixture, pipfileLockParser.parsePipfileLock, tc.expectedPackages, nil) + }) + } } func Test_corruptPipfileLock(t *testing.T) { + pipfileLockParser := newPipfileLockParser(DefaultCatalogerConfig()) pkgtest.NewCatalogTester(). FromFile(t, "test-fixtures/glob-paths/src/Pipfile.lock"). WithError(). - TestParser(t, parsePipfileLock) + TestParser(t, pipfileLockParser.parsePipfileLock) } diff --git a/syft/pkg/cataloger/python/parse_poetry_lock.go b/syft/pkg/cataloger/python/parse_poetry_lock.go index 48b4a5afa..7fdac48e7 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock.go @@ -16,9 +16,6 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/internal/dependency" ) -// integrity check -var _ generic.Parser = parsePoetryLock - type poetryPackageSource struct { URL string `toml:"url"` Type string `toml:"type"` @@ -48,9 +45,21 @@ type poetryPackageDependency struct { Extras []string `toml:"extras"` } +type poetryLockParser struct { + cfg CatalogerConfig + licenseResolver pythonLicenseResolver +} + +func newPoetryLockParser(cfg CatalogerConfig) poetryLockParser { + return poetryLockParser{ + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), + } +} + // parsePoetryLock is a parser function for poetry.lock contents, returning all python packages discovered. -func parsePoetryLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - pkgs, err := poetryLockPackages(reader) +func (plp poetryLockParser) parsePoetryLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + pkgs, err := plp.poetryLockPackages(ctx, reader) if err != nil { return nil, nil, err } @@ -61,7 +70,7 @@ func parsePoetryLock(_ context.Context, _ file.Resolver, _ *generic.Environment, return pkgs, dependency.Resolve(poetryLockDependencySpecifier, pkgs), unknown.IfEmptyf(pkgs, "unable to determine packages") } -func poetryLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { +func (plp poetryLockParser) poetryLockPackages(ctx context.Context, reader file.LocationReadCloser) ([]pkg.Package, error) { metadata := poetryPackages{} md, err := toml.NewDecoder(reader).Decode(&metadata) if err != nil { @@ -96,6 +105,8 @@ func poetryLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { pkgs = append( pkgs, newPackageForIndexWithMetadata( + ctx, + plp.licenseResolver, p.Name, p.Version, newPythonPoetryLockEntry(p), diff --git a/syft/pkg/cataloger/python/parse_poetry_lock_test.go b/syft/pkg/cataloger/python/parse_poetry_lock_test.go index 0cb0030a2..b350a34a1 100644 --- a/syft/pkg/cataloger/python/parse_poetry_lock_test.go +++ b/syft/pkg/cataloger/python/parse_poetry_lock_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/anchore/syft/syft/artifact" @@ -79,12 +80,64 @@ func TestParsePoetryLock(t *testing.T) { var expectedRelationships []artifact.Relationship - pkgtest.TestFileParser(t, fixture, parsePoetryLock, expectedPkgs, expectedRelationships) + poetryLockParser := newPoetryLockParser(DefaultCatalogerConfig()) + pkgtest.TestFileParser(t, fixture, poetryLockParser.parsePoetryLock, expectedPkgs, expectedRelationships) } +func TestParsePoetryLockWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/poetry.lock" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + Metadata: pkg.PythonPoetryLockEntry{ + Index: "https://pypi.org/simple", + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + poetryLockParser := newPoetryLockParser(tc.config) + pkgtest.TestFileParser(t, fixture, poetryLockParser.parsePoetryLock, tc.expectedPackages, nil) + }) + } +} func Test_corruptPoetryLock(t *testing.T) { + poetryLockParser := newPoetryLockParser(DefaultCatalogerConfig()) pkgtest.NewCatalogTester(). FromFile(t, "test-fixtures/glob-paths/src/poetry.lock"). WithError(). - TestParser(t, parsePoetryLock) + TestParser(t, poetryLockParser.parsePoetryLock) } diff --git a/syft/pkg/cataloger/python/parse_requirements.go b/syft/pkg/cataloger/python/parse_requirements.go index bbc761530..21c69cc74 100644 --- a/syft/pkg/cataloger/python/parse_requirements.go +++ b/syft/pkg/cataloger/python/parse_requirements.go @@ -83,18 +83,20 @@ func newRequirement(raw string) *unprocessedRequirement { } type requirementsParser struct { - guessUnpinnedRequirements bool + cfg CatalogerConfig + licenseResolver pythonLicenseResolver } func newRequirementsParser(cfg CatalogerConfig) requirementsParser { return requirementsParser{ - guessUnpinnedRequirements: cfg.GuessUnpinnedRequirements, + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), } } // parseRequirementsTxt takes a Python requirements.txt file, returning all Python packages that are locked to a // specific version. -func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (rp requirementsParser) parseRequirementsTxt(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var errs error var packages []pkg.Package @@ -133,7 +135,7 @@ func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Reso } name := removeExtras(req.Name) - version := parseVersion(req.VersionConstraint, rp.guessUnpinnedRequirements) + version := parseVersion(req.VersionConstraint, rp.cfg.GuessUnpinnedRequirements) if version == "" { log.WithFields("path", reader.RealPath, "line", line).Trace("unable to determine package version in requirements.txt line") @@ -144,6 +146,8 @@ func (rp requirementsParser) parseRequirementsTxt(_ context.Context, _ file.Reso packages = append( packages, newPackageForRequirementsWithMetadata( + ctx, + rp.licenseResolver, name, version, pkg.PythonRequirementsEntry{ diff --git a/syft/pkg/cataloger/python/parse_requirements_test.go b/syft/pkg/cataloger/python/parse_requirements_test.go index 6573b13d6..e1cbfd864 100644 --- a/syft/pkg/cataloger/python/parse_requirements_test.go +++ b/syft/pkg/cataloger/python/parse_requirements_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/stretchr/testify/assert" @@ -226,6 +227,58 @@ func TestParseRequirementsTxt(t *testing.T) { } } +func TestParseRequirementsTxtWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/requirements.txt" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + Metadata: pkg.PythonRequirementsEntry{ + Name: "certifi", + VersionConstraint: "== 2025.10.5", + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + requirementsParser := newRequirementsParser(tc.config) + pkgtest.TestFileParser(t, fixture, requirementsParser.parseRequirementsTxt, tc.expectedPackages, nil) + }) + } +} + func Test_newRequirement(t *testing.T) { tests := []struct { diff --git a/syft/pkg/cataloger/python/parse_setup.go b/syft/pkg/cataloger/python/parse_setup.go index 6a9148763..a42c785ac 100644 --- a/syft/pkg/cataloger/python/parse_setup.go +++ b/syft/pkg/cataloger/python/parse_setup.go @@ -13,8 +13,17 @@ import ( "github.com/anchore/syft/syft/pkg/cataloger/generic" ) -// integrity check -var _ generic.Parser = parseSetup +type setupFileParser struct { + cfg CatalogerConfig + licenseResolver pythonLicenseResolver +} + +func newSetupFileParser(cfg CatalogerConfig) setupFileParser { + return setupFileParser{ + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), + } +} // match examples: // @@ -24,7 +33,7 @@ var _ generic.Parser = parseSetup var pinnedDependency = regexp.MustCompile(`['"]\W?(\w+\W?==\W?[\w.]*)`) var unquotedPinnedDependency = regexp.MustCompile(`^\s*(\w+)\s*==\s*([\w\.\-]+)`) -func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { +func (sp setupFileParser) parseSetupFile(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { var packages []pkg.Package scanner := bufio.NewScanner(reader) @@ -33,23 +42,23 @@ func parseSetup(_ context.Context, _ file.Resolver, _ *generic.Environment, read line := scanner.Text() line = strings.TrimRight(line, "\n") - packages = processQuotedDependencies(line, reader, packages) - packages = processUnquotedDependency(line, reader, packages) + packages = sp.processQuotedDependencies(ctx, line, reader, packages) + packages = sp.processUnquotedDependency(ctx, line, reader, packages) } return packages, nil, nil } -func processQuotedDependencies(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { +func (sp setupFileParser) processQuotedDependencies(ctx context.Context, line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { for _, match := range pinnedDependency.FindAllString(line, -1) { - if p, ok := parseQuotedDependency(match, line, reader); ok { + if p, ok := sp.parseQuotedDependency(ctx, match, line, reader); ok { packages = append(packages, p) } } return packages } -func parseQuotedDependency(match, line string, reader file.LocationReadCloser) (pkg.Package, bool) { +func (sp setupFileParser) parseQuotedDependency(ctx context.Context, match, line string, reader file.LocationReadCloser) (pkg.Package, bool) { parts := strings.Split(match, "==") if len(parts) != 2 { return pkg.Package{}, false @@ -58,11 +67,11 @@ func parseQuotedDependency(match, line string, reader file.LocationReadCloser) ( name := cleanDependencyString(parts[0]) version := cleanDependencyString(parts[len(parts)-1]) - return validateAndCreatePackage(name, version, line, reader) + return sp.validateAndCreatePackage(ctx, name, version, line, reader) } // processUnquotedDependency extracts and processes an unquoted dependency from a line -func processUnquotedDependency(line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { +func (sp setupFileParser) processUnquotedDependency(ctx context.Context, line string, reader file.LocationReadCloser, packages []pkg.Package) []pkg.Package { matches := unquotedPinnedDependency.FindStringSubmatch(line) if len(matches) != 3 { return packages @@ -71,7 +80,7 @@ func processUnquotedDependency(line string, reader file.LocationReadCloser, pack name := strings.TrimSpace(matches[1]) version := strings.TrimSpace(matches[2]) - if p, ok := validateAndCreatePackage(name, version, line, reader); ok { + if p, ok := sp.validateAndCreatePackage(ctx, name, version, line, reader); ok { if !isDuplicatePackage(p, packages) { packages = append(packages, p) } @@ -87,7 +96,7 @@ func cleanDependencyString(s string) string { return s } -func validateAndCreatePackage(name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) { +func (sp setupFileParser) validateAndCreatePackage(ctx context.Context, name, version, line string, reader file.LocationReadCloser) (pkg.Package, bool) { if hasTemplateDirective(name) || hasTemplateDirective(version) { // this can happen in more dynamic setup.py where there is templating return pkg.Package{}, false @@ -99,6 +108,8 @@ func validateAndCreatePackage(name, version, line string, reader file.LocationRe } p := newPackageForIndex( + ctx, + sp.licenseResolver, name, version, reader.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation), diff --git a/syft/pkg/cataloger/python/parse_setup_test.go b/syft/pkg/cataloger/python/parse_setup_test.go index 74cb604a5..a2c57bb16 100644 --- a/syft/pkg/cataloger/python/parse_setup_test.go +++ b/syft/pkg/cataloger/python/parse_setup_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/stretchr/testify/assert" @@ -159,12 +160,60 @@ func TestParseSetup(t *testing.T) { } var expectedRelationships []artifact.Relationship - pkgtest.TestFileParser(t, tt.fixture, parseSetup, tt.expected, expectedRelationships) + setupFileParser := newSetupFileParser(DefaultCatalogerConfig()) + pkgtest.TestFileParser(t, tt.fixture, setupFileParser.parseSetupFile, tt.expected, expectedRelationships) }) } } +func TestParseSetupFileWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/setup.py" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + setupFileParser := newSetupFileParser(tc.config) + pkgtest.TestFileParser(t, fixture, setupFileParser.parseSetupFile, tc.expectedPackages, nil) + }) + } +} func Test_hasTemplateDirective(t *testing.T) { tests := []struct { diff --git a/syft/pkg/cataloger/python/parse_uv_lock.go b/syft/pkg/cataloger/python/parse_uv_lock.go index 0890969ab..75747e9a8 100644 --- a/syft/pkg/cataloger/python/parse_uv_lock.go +++ b/syft/pkg/cataloger/python/parse_uv_lock.go @@ -69,9 +69,21 @@ type uvMetadata struct { ProvidesExtras []string `toml:"provides-extras"` } +type uvLockParser struct { + cfg CatalogerConfig + licenseResolver pythonLicenseResolver +} + +func newUvLockParser(cfg CatalogerConfig) uvLockParser { + return uvLockParser{ + cfg: cfg, + licenseResolver: newPythonLicenseResolver(cfg), + } +} + // parseUvLock is a parser function for uv.lock contents, returning all the pakcages discovered -func parseUvLock(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { - pkgs, err := uvLockPackages(reader) +func (ulp uvLockParser) parseUvLock(ctx context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) { + pkgs, err := ulp.uvLockPackages(ctx, reader) if err != nil { return nil, nil, err } @@ -127,7 +139,7 @@ func newPythonUvLockEntry(p uvPackage) pkg.PythonUvLockEntry { } } -func uvLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { +func (ulp uvLockParser) uvLockPackages(ctx context.Context, reader file.LocationReadCloser) ([]pkg.Package, error) { var parsedLockFileVersion uvLockFileVersion // we cannot use the reader twice, so we read the contents first --uv.lock files tend to be small enough @@ -167,6 +179,8 @@ func uvLockPackages(reader file.LocationReadCloser) ([]pkg.Package, error) { for _, p := range parsedLockFile.Packages { pkgs = append(pkgs, newPackageForIndexWithMetadata( + ctx, + ulp.licenseResolver, p.Name, p.Version, newPythonUvLockEntry(p), diff --git a/syft/pkg/cataloger/python/parse_uv_lock_test.go b/syft/pkg/cataloger/python/parse_uv_lock_test.go index 506b9a722..7dd19366e 100644 --- a/syft/pkg/cataloger/python/parse_uv_lock_test.go +++ b/syft/pkg/cataloger/python/parse_uv_lock_test.go @@ -1,6 +1,7 @@ package python import ( + "context" "testing" "github.com/anchore/syft/syft/artifact" @@ -124,5 +125,58 @@ func TestParseUvLock(t *testing.T) { }, } - pkgtest.TestFileParser(t, fixture, parseUvLock, expectedPkgs, expectedRelationships) + uvLockParser := newUvLockParser(DefaultCatalogerConfig()) + pkgtest.TestFileParser(t, fixture, uvLockParser.parseUvLock, expectedPkgs, expectedRelationships) +} + +func TestParseUvLockWithLicenseEnrichment(t *testing.T) { + ctx := context.TODO() + fixture := "test-fixtures/pypi-remote/uv.lock" + locations := file.NewLocationSet(file.NewLocation(fixture)) + mux, url, teardown := setupPypiRegistry() + defer teardown() + tests := []struct { + name string + fixture string + config CatalogerConfig + requestHandlers []handlerPath + expectedPackages []pkg.Package + }{ + { + name: "search remote licenses returns the expected licenses when search is set to true", + config: CatalogerConfig{SearchRemoteLicenses: true}, + requestHandlers: []handlerPath{ + { + path: "/certifi/2025.10.5/json", + handler: generateMockPypiRegistryHandler("test-fixtures/pypi-remote/registry_response.json"), + }, + }, + expectedPackages: []pkg.Package{ + { + Name: "certifi", + Version: "2025.10.5", + Locations: locations, + PURL: "pkg:pypi/certifi@2025.10.5", + Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, "MPL-2.0")), + Language: pkg.Python, + Type: pkg.PythonPkg, + Metadata: pkg.PythonUvLockEntry{ + Index: "https://pypi.org/simple", + Dependencies: nil, + }, + }, + }, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // set up the mock server + for _, handler := range tc.requestHandlers { + mux.HandleFunc(handler.path, handler.handler) + } + tc.config.PypiBaseURL = url + uvLockParser := newUvLockParser(tc.config) + pkgtest.TestFileParser(t, fixture, uvLockParser.parseUvLock, tc.expectedPackages, nil) + }) + } } diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/Pipfile.lock b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/Pipfile.lock new file mode 100644 index 000000000..f81595e5c --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/Pipfile.lock @@ -0,0 +1,29 @@ +{ + "_meta": { + "hash": { + "sha256": "a6b2dfd5367688bec81240eb04e7bde7f92b35491be5934fcb4e2e6ca9d275c0" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "certifi": { + "hashes": [ + "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", + "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de" + ], + "index": "pypi", + "version": "==2025.10.5" + } + }, + "develop": {} +} \ No newline at end of file diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pdm.lock b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pdm.lock new file mode 100644 index 000000000..06566ef68 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pdm.lock @@ -0,0 +1,12 @@ +[[package]] +name = "certifi" +version = "2025.10.5" +requires_python = ">=3.7" +summary = "Python package for providing Mozilla's CA Bundle." +groups = ["security"] +marker = "python_version >= \"3.7\"" +files = [ + {url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"}, + {url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"}, +] + diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/poetry.lock b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/poetry.lock new file mode 100644 index 000000000..1f9eee138 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/poetry.lock @@ -0,0 +1,13 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "certifi" +version = "2025.10.5" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.7" +files = [ + {file = "certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de"}, + {file = "certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43"}, +] + diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pyproject.toml b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pyproject.toml new file mode 100644 index 000000000..56b3dec12 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/pyproject.toml @@ -0,0 +1,9 @@ +[tool.poetry] +name = "testpkg" +version = "0.1.0" +description = "" +authors = ["Alex Goodman "] +readme = "README.md" + +[tool.poetry.dependencies] +certifi = "^2025.10.5" diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/registry_response.json b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/registry_response.json new file mode 100644 index 000000000..d8bef2928 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/registry_response.json @@ -0,0 +1,116 @@ +{ + "info": { + "author": "Kenneth Reitz", + "author_email": "me@kennethreitz.com", + "bugtrack_url": null, + "classifiers": [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9" + ], + "description": "Certifi: Python SSL Certificates\n================================\n\nCertifi provides Mozilla's carefully curated collection of Root Certificates for\nvalidating the trustworthiness of SSL certificates while verifying the identity\nof TLS hosts. It has been extracted from the `Requests`_ project.\n\nInstallation\n------------\n\n``certifi`` is available on PyPI. Simply install it with ``pip``::\n\n $ pip install certifi\n\nUsage\n-----\n\nTo reference the installed certificate authority (CA) bundle, you can use the\nbuilt-in function::\n\n >>> import certifi\n\n >>> certifi.where()\n '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'\n\nOr from the command line::\n\n $ python -m certifi\n /usr/local/lib/python3.7/site-packages/certifi/cacert.pem\n\nEnjoy!\n\n.. _`Requests`: https://requests.readthedocs.io/en/master/\n\nAddition/Removal of Certificates\n--------------------------------\n\nCertifi does not support any addition/removal or other modification of the\nCA trust store content. This project is intended to provide a reliable and\nhighly portable root of trust to python deployments. Look to upstream projects\nfor methods to use alternate trust.\n", + "description_content_type": null, + "docs_url": null, + "download_url": null, + "downloads": { + "last_day": -1, + "last_month": -1, + "last_week": -1 + }, + "dynamic": [ + "Author", + "Author-Email", + "Classifier", + "Description", + "Home-Page", + "License", + "License-File", + "Project-Url", + "Requires-Python", + "Summary" + ], + "home_page": "https://github.com/certifi/python-certifi", + "keywords": null, + "license": "MPL-2.0", + "license_expression": null, + "license_files": [ + "LICENSE" + ], + "maintainer": null, + "maintainer_email": null, + "name": "certifi", + "package_url": "https://pypi.org/project/certifi/", + "platform": null, + "project_url": "https://pypi.org/project/certifi/", + "project_urls": { + "Homepage": "https://github.com/certifi/python-certifi", + "Source": "https://github.com/certifi/python-certifi" + }, + "provides_extra": null, + "release_url": "https://pypi.org/project/certifi/2025.10.5/", + "requires_dist": null, + "requires_python": ">=3.7", + "summary": "Python package for providing Mozilla's CA Bundle.", + "version": "2025.10.5", + "yanked": false, + "yanked_reason": null + }, + "last_serial": 31620018, + "urls": [ + { + "comment_text": null, + "digests": { + "blake2b_256": "e437af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed", + "md5": "7b56f7121949a196441739c539fd01be", + "sha256": "0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de" + }, + "downloads": -1, + "filename": "certifi-2025.10.5-py3-none-any.whl", + "has_sig": false, + "md5_digest": "7b56f7121949a196441739c539fd01be", + "packagetype": "bdist_wheel", + "python_version": "py3", + "requires_python": ">=3.7", + "size": 163286, + "upload_time": "2025-10-05T04:12:14", + "upload_time_iso_8601": "2025-10-05T04:12:14.030503Z", + "url": "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", + "yanked": false, + "yanked_reason": null + }, + { + "comment_text": null, + "digests": { + "blake2b_256": "4c5bb6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91", + "md5": "7fe97b2e79933430062935f3759a09b0", + "sha256": "47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43" + }, + "downloads": -1, + "filename": "certifi-2025.10.5.tar.gz", + "has_sig": false, + "md5_digest": "7fe97b2e79933430062935f3759a09b0", + "packagetype": "sdist", + "python_version": "source", + "requires_python": ">=3.7", + "size": 164519, + "upload_time": "2025-10-05T04:12:15", + "upload_time_iso_8601": "2025-10-05T04:12:15.808237Z", + "url": "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", + "yanked": false, + "yanked_reason": null + } + ], + "vulnerabilities": [] +} \ No newline at end of file diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/registry_response_bad.json b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/registry_response_bad.json new file mode 100644 index 000000000..e69de29bb diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/requirements.txt b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/requirements.txt new file mode 100644 index 000000000..2602d9775 --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/requirements.txt @@ -0,0 +1 @@ +certifi == 2025.10.5 \ No newline at end of file diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/setup.py b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/setup.py new file mode 100644 index 000000000..2b68d954e --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup + +# Sample setup.py from the pytest project with added comments specific +# to the cataloger + +INSTALL_REQUIRES = [ + "certifi==2025.10.5", +] + + +def main(): + setup( + use_scm_version={"write_to": "src/_pytest/_version.py"}, + setup_requires=["setuptools-scm", "setuptools>=40.0"], + package_dir={"": "src"}, + extras_require={}, + install_requires=INSTALL_REQUIRES, + ) + + +if __name__ == "__main__": + main() diff --git a/syft/pkg/cataloger/python/test-fixtures/pypi-remote/uv.lock b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/uv.lock new file mode 100644 index 000000000..82369b40b --- /dev/null +++ b/syft/pkg/cataloger/python/test-fixtures/pypi-remote/uv.lock @@ -0,0 +1,11 @@ +version = 1 +requires-python = ">=3.7" + +[[package]] +name = "certifi" +version = "2025.10.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/5b/b6ce21586237c77ce67d01dc5507039d444b630dd76611bbca2d8e5dcd91/certifi-2025.10.5.tar.gz", hash = "sha256:47c09d31ccf2acf0be3f701ea53595ee7e0b8fa08801c6624be771df09ae7b43", size = 164519 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/37/af0d2ef3967ac0d6113837b44a4f0bfe1328c2b9763bd5b1744520e5cfed/certifi-2025.10.5-py3-none-any.whl", hash = "sha256:0f212c2744a9bb6de0c56639a6f68afe01ecd92d91f14ae897c4fe7bbeeef0de", size = 163286 }, +]