diff --git a/syft/pkg/cataloger/ruby/parse_gemspec.go b/syft/pkg/cataloger/ruby/parse_gemspec.go index b14cee704..92b586987 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec.go @@ -29,6 +29,12 @@ type gemData struct { // match example: Al\u003Ex ---> 003E var unicodePattern = regexp.MustCompile(`\\u(?P[0-9A-F]{4})`) +// match the common Ruby string-interpolation forms gemspec authors use to build +// fields from the gem's own name/version: #{s.name}, #{gem.name}, #{spec.version}, +// bare #{name}, and the same with surrounding whitespace. The optional receiver +// (s./gem./spec./...) is discarded; only the trailing attribute is captured. +var rubyInterpolationPattern = regexp.MustCompile(`#\{\s*(?:\w+\.)?(name|version)\s*\}`) + var patterns = map[string]*regexp.Regexp{ // match example: name = "railties".freeze ---> railties "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P.*)["']{1} *`), @@ -96,6 +102,8 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic } } + resolveRubyInterpolationsInFields(fields) + if fields["name"] != "" && fields["version"] != "" { var metadata gemData if err := mapstructure.Decode(fields, &metadata); err != nil { @@ -116,6 +124,61 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic return pkgs, nil, nil } +// resolveRubyInterpolationsInFields substitutes a handful of well-known +// Ruby string interpolation placeholders (#{s.name}, #{s.version}, and +// the equivalent #{gem.*} forms) in captured gemspec string fields using +// values already captured from the same file. Gemspec authors routinely +// write things like +// +// s.homepage = "https://github.com/foo/#{s.name}" +// +// which Ruby evaluates before loading the gem. Syft reads the gemspec as +// plain text, so without this pass the literal #{s.name} would leak into +// the SBOM and in particular break CycloneDX schema validation because +// '{' and '}' are not valid IRI characters (see anchore/syft#4720). +// +// We only resolve interpolations pointing at name/version (the values syft has +// already captured from the same file). Both the substitution and the drop +// below are driven by the same field list, so adding a URL-like field here +// keeps it protected from leaking unresolved interpolation. +func resolveRubyInterpolationsInFields(fields map[string]any) { + name, _ := fields["name"].(string) + version, _ := fields["version"].(string) + + // homepage is currently the only captured string field that flows into a + // schema-validated URL slot (CycloneDX externalReferences, SPDX homepage). + for _, key := range []string{"homepage"} { + v, ok := fields[key].(string) + if !ok || v == "" { + continue + } + + v = rubyInterpolationPattern.ReplaceAllStringFunc(v, func(match string) string { + switch rubyInterpolationPattern.FindStringSubmatch(match)[1] { + case "name": + if name != "" { + return name + } + case "version": + if version != "" { + return version + } + } + return match // leave unresolved; the field is dropped below + }) + + // anything still containing a '#{' is an unresolvable Ruby expression. + // Drop the field rather than emit a URL with '{'/'}', which fails + // CycloneDX IRI validation (see anchore/syft#4720); a missing homepage + // is preferable to a BOM downstream tools reject. + if strings.Contains(v, "#{") { + delete(fields, key) + continue + } + fields[key] = v + } +} + // renderUtf8 takes any string escaped string subsections from the ruby string and replaces those sections with the UTF8 runes. func renderUtf8(s string) string { fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { diff --git a/syft/pkg/cataloger/ruby/parse_gemspec_test.go b/syft/pkg/cataloger/ruby/parse_gemspec_test.go index 8ed52b834..51496aded 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec_test.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" @@ -35,3 +37,108 @@ func TestParseGemspec(t *testing.T) { pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil) } + +func TestResolveRubyInterpolationsInFields(t *testing.T) { + tests := []struct { + name string + fields map[string]any + wantHomepage string // "" with wantDropped=true means the key should be absent + wantDropped bool + }{ + { + name: "resolves #{s.name}", + fields: map[string]any{"name": "formatador", "homepage": "https://github.com/geemus/#{s.name}"}, + wantHomepage: "https://github.com/geemus/formatador", + }, + { + name: "resolves #{s.version}", + fields: map[string]any{"version": "1.1.0", "homepage": "https://example.com/v/#{s.version}"}, + wantHomepage: "https://example.com/v/1.1.0", + }, + { + name: "resolves #{gem.name}", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{gem.name}"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves #{spec.version}", + fields: map[string]any{"version": "2.0", "homepage": "https://x/#{spec.version}"}, + wantHomepage: "https://x/2.0", + }, + { + name: "resolves bare #{name}", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{name}"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves with surrounding whitespace", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{ s.name }"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves multiple interpolations in one field", + fields: map[string]any{"name": "foo", "version": "1.2", "homepage": "https://x/#{s.name}/#{s.version}"}, + wantHomepage: "https://x/foo/1.2", + }, + { + name: "drops field on unresolvable expression", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{Time.now}"}, + wantDropped: true, + }, + { + name: "drops field when referenced value was not captured", + fields: map[string]any{"homepage": "https://x/#{s.name}"}, + wantDropped: true, + }, + { + name: "leaves plain field untouched", + fields: map[string]any{"name": "foo", "homepage": "https://bundler.io"}, + wantHomepage: "https://bundler.io", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolveRubyInterpolationsInFields(tt.fields) + got, present := tt.fields["homepage"].(string) + if tt.wantDropped { + assert.False(t, present, "expected homepage to be dropped, got %q", got) + return + } + assert.Equal(t, tt.wantHomepage, got) + }) + } +} + +// Regression test for https://github.com/anchore/syft/issues/4720: +// gemspecs routinely build URL fields from Ruby string interpolation +// (e.g. "https://github.com/geemus/#{s.name}"), and syft used to pass +// those interpolations through into the emitted SBOM, producing URLs +// containing `{` and `}` that fail CycloneDX IRI validation. +func TestParseGemspec_ResolvesRubyInterpolation(t *testing.T) { + fixture := "testdata/formatador.gemspec" + ctx := context.TODO() + locations := file.NewLocationSet(file.NewLocation(fixture)) + + expectedPkg := pkg.Package{ + Name: "formatador", + Version: "1.1.0", + PURL: "pkg:gem/formatador@1.1.0", + Locations: locations, + Type: pkg.GemPkg, + Licenses: pkg.NewLicenseSet( + pkg.NewLicenseFromLocationsWithContext(ctx, "MIT", file.NewLocation(fixture)), + ), + Language: pkg.Ruby, + Metadata: pkg.RubyGemspec{ + Name: "formatador", + Version: "1.1.0", + Files: []string{"lib/formatador.rb"}, + Authors: []string{"geemus (Wesley Beary)"}, + // #{s.name} should have been resolved to the captured name. + Homepage: "https://github.com/geemus/formatador", + }, + } + + pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil) +} diff --git a/syft/pkg/cataloger/ruby/testdata/formatador.gemspec b/syft/pkg/cataloger/ruby/testdata/formatador.gemspec new file mode 100644 index 000000000..36aceba0e --- /dev/null +++ b/syft/pkg/cataloger/ruby/testdata/formatador.gemspec @@ -0,0 +1,18 @@ +# -*- encoding: utf-8 -*- +# stub: formatador 1.1.0 ruby lib + +Gem::Specification.new do |s| + s.name = "formatador".freeze + s.version = "1.1.0".freeze + + s.require_paths = ["lib".freeze] + s.authors = ["geemus (Wesley Beary)".freeze] + s.date = "2026-04-17".freeze + s.email = "geemus@gmail.com".freeze + s.files = ["lib/formatador.rb".freeze] + s.homepage = "https://github.com/geemus/#{s.name}".freeze + s.licenses = ["MIT".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 0".freeze) + s.rubygems_version = "3.5.22".freeze + s.summary = "Ruby STDOUT text progress bar library".freeze +end