From 956858fc11ceb6f7bdeab01cc9393969dd875a38 Mon Sep 17 00:00:00 2001 From: Sai Asish Y Date: Mon, 29 Jun 2026 08:00:48 -0700 Subject: [PATCH] ruby/gemspec: resolve simple #{s.name}/#{s.version} interpolation (#4782) * ruby/gemspec: resolve simple #{s.name}/#{s.version} interpolation Reported in anchore/syft#4720: scanning projects that depend on gems like formatador leaks literal Ruby interpolation into the emitted SBOM, e.g. "externalReferences": [ { "url": "https://github.com/geemus/#{s.name}", "type": "website" } ] because formatador.gemspec uses s.homepage = "https://github.com/geemus/#{s.name}" and parseGemSpecEntries reads the file as plain text instead of evaluating it. The interpolation leaks through the captured homepage field and on into any externalReferences entry the cataloger produces. Dependency Track then rejects the whole BOM because '{' and '}' are not valid IRI-reference characters (RFC 3987). Add a post-parse pass that substitutes the common interpolation forms (#{s.name}, #{gem.name}, #{name}, and the matching #{*.version} variants) in captured string fields using values already parsed from the same gemspec. Anything still containing '#{' after best-effort substitution is an unresolvable Ruby expression, and for URL-like fields (currently just homepage) we drop the field entirely so the SBOM is always schema-valid; callers would rather miss a homepage URL than emit one that breaks downstream tools. Adds testdata/formatador.gemspec, a minimal real-world gemspec using the #{s.name} pattern, plus a new parser test asserting that the homepage field comes out fully resolved. Fixes #4720 Signed-off-by: Sai Asish Y Signed-off-by: Alex Goodman * improve test cases Signed-off-by: Alex Goodman --------- Signed-off-by: Sai Asish Y Signed-off-by: Alex Goodman Co-authored-by: Alex Goodman --- syft/pkg/cataloger/ruby/parse_gemspec.go | 63 +++++++++++ syft/pkg/cataloger/ruby/parse_gemspec_test.go | 107 ++++++++++++++++++ .../ruby/testdata/formatador.gemspec | 18 +++ 3 files changed, 188 insertions(+) create mode 100644 syft/pkg/cataloger/ruby/testdata/formatador.gemspec diff --git a/syft/pkg/cataloger/ruby/parse_gemspec.go b/syft/pkg/cataloger/ruby/parse_gemspec.go index b14cee704..92b586987 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec.go @@ -29,6 +29,12 @@ type gemData struct { // match example: Al\u003Ex ---> 003E var unicodePattern = regexp.MustCompile(`\\u(?P[0-9A-F]{4})`) +// match the common Ruby string-interpolation forms gemspec authors use to build +// fields from the gem's own name/version: #{s.name}, #{gem.name}, #{spec.version}, +// bare #{name}, and the same with surrounding whitespace. The optional receiver +// (s./gem./spec./...) is discarded; only the trailing attribute is captured. +var rubyInterpolationPattern = regexp.MustCompile(`#\{\s*(?:\w+\.)?(name|version)\s*\}`) + var patterns = map[string]*regexp.Regexp{ // match example: name = "railties".freeze ---> railties "name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P.*)["']{1} *`), @@ -96,6 +102,8 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic } } + resolveRubyInterpolationsInFields(fields) + if fields["name"] != "" && fields["version"] != "" { var metadata gemData if err := mapstructure.Decode(fields, &metadata); err != nil { @@ -116,6 +124,61 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic return pkgs, nil, nil } +// resolveRubyInterpolationsInFields substitutes a handful of well-known +// Ruby string interpolation placeholders (#{s.name}, #{s.version}, and +// the equivalent #{gem.*} forms) in captured gemspec string fields using +// values already captured from the same file. Gemspec authors routinely +// write things like +// +// s.homepage = "https://github.com/foo/#{s.name}" +// +// which Ruby evaluates before loading the gem. Syft reads the gemspec as +// plain text, so without this pass the literal #{s.name} would leak into +// the SBOM and in particular break CycloneDX schema validation because +// '{' and '}' are not valid IRI characters (see anchore/syft#4720). +// +// We only resolve interpolations pointing at name/version (the values syft has +// already captured from the same file). Both the substitution and the drop +// below are driven by the same field list, so adding a URL-like field here +// keeps it protected from leaking unresolved interpolation. +func resolveRubyInterpolationsInFields(fields map[string]any) { + name, _ := fields["name"].(string) + version, _ := fields["version"].(string) + + // homepage is currently the only captured string field that flows into a + // schema-validated URL slot (CycloneDX externalReferences, SPDX homepage). + for _, key := range []string{"homepage"} { + v, ok := fields[key].(string) + if !ok || v == "" { + continue + } + + v = rubyInterpolationPattern.ReplaceAllStringFunc(v, func(match string) string { + switch rubyInterpolationPattern.FindStringSubmatch(match)[1] { + case "name": + if name != "" { + return name + } + case "version": + if version != "" { + return version + } + } + return match // leave unresolved; the field is dropped below + }) + + // anything still containing a '#{' is an unresolvable Ruby expression. + // Drop the field rather than emit a URL with '{'/'}', which fails + // CycloneDX IRI validation (see anchore/syft#4720); a missing homepage + // is preferable to a BOM downstream tools reject. + if strings.Contains(v, "#{") { + delete(fields, key) + continue + } + fields[key] = v + } +} + // renderUtf8 takes any string escaped string subsections from the ruby string and replaces those sections with the UTF8 runes. func renderUtf8(s string) string { fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string { diff --git a/syft/pkg/cataloger/ruby/parse_gemspec_test.go b/syft/pkg/cataloger/ruby/parse_gemspec_test.go index 8ed52b834..51496aded 100644 --- a/syft/pkg/cataloger/ruby/parse_gemspec_test.go +++ b/syft/pkg/cataloger/ruby/parse_gemspec_test.go @@ -4,6 +4,8 @@ import ( "context" "testing" + "github.com/stretchr/testify/assert" + "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/pkg" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" @@ -35,3 +37,108 @@ func TestParseGemspec(t *testing.T) { pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil) } + +func TestResolveRubyInterpolationsInFields(t *testing.T) { + tests := []struct { + name string + fields map[string]any + wantHomepage string // "" with wantDropped=true means the key should be absent + wantDropped bool + }{ + { + name: "resolves #{s.name}", + fields: map[string]any{"name": "formatador", "homepage": "https://github.com/geemus/#{s.name}"}, + wantHomepage: "https://github.com/geemus/formatador", + }, + { + name: "resolves #{s.version}", + fields: map[string]any{"version": "1.1.0", "homepage": "https://example.com/v/#{s.version}"}, + wantHomepage: "https://example.com/v/1.1.0", + }, + { + name: "resolves #{gem.name}", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{gem.name}"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves #{spec.version}", + fields: map[string]any{"version": "2.0", "homepage": "https://x/#{spec.version}"}, + wantHomepage: "https://x/2.0", + }, + { + name: "resolves bare #{name}", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{name}"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves with surrounding whitespace", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{ s.name }"}, + wantHomepage: "https://x/foo", + }, + { + name: "resolves multiple interpolations in one field", + fields: map[string]any{"name": "foo", "version": "1.2", "homepage": "https://x/#{s.name}/#{s.version}"}, + wantHomepage: "https://x/foo/1.2", + }, + { + name: "drops field on unresolvable expression", + fields: map[string]any{"name": "foo", "homepage": "https://x/#{Time.now}"}, + wantDropped: true, + }, + { + name: "drops field when referenced value was not captured", + fields: map[string]any{"homepage": "https://x/#{s.name}"}, + wantDropped: true, + }, + { + name: "leaves plain field untouched", + fields: map[string]any{"name": "foo", "homepage": "https://bundler.io"}, + wantHomepage: "https://bundler.io", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + resolveRubyInterpolationsInFields(tt.fields) + got, present := tt.fields["homepage"].(string) + if tt.wantDropped { + assert.False(t, present, "expected homepage to be dropped, got %q", got) + return + } + assert.Equal(t, tt.wantHomepage, got) + }) + } +} + +// Regression test for https://github.com/anchore/syft/issues/4720: +// gemspecs routinely build URL fields from Ruby string interpolation +// (e.g. "https://github.com/geemus/#{s.name}"), and syft used to pass +// those interpolations through into the emitted SBOM, producing URLs +// containing `{` and `}` that fail CycloneDX IRI validation. +func TestParseGemspec_ResolvesRubyInterpolation(t *testing.T) { + fixture := "testdata/formatador.gemspec" + ctx := context.TODO() + locations := file.NewLocationSet(file.NewLocation(fixture)) + + expectedPkg := pkg.Package{ + Name: "formatador", + Version: "1.1.0", + PURL: "pkg:gem/formatador@1.1.0", + Locations: locations, + Type: pkg.GemPkg, + Licenses: pkg.NewLicenseSet( + pkg.NewLicenseFromLocationsWithContext(ctx, "MIT", file.NewLocation(fixture)), + ), + Language: pkg.Ruby, + Metadata: pkg.RubyGemspec{ + Name: "formatador", + Version: "1.1.0", + Files: []string{"lib/formatador.rb"}, + Authors: []string{"geemus (Wesley Beary)"}, + // #{s.name} should have been resolved to the captured name. + Homepage: "https://github.com/geemus/formatador", + }, + } + + pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil) +} diff --git a/syft/pkg/cataloger/ruby/testdata/formatador.gemspec b/syft/pkg/cataloger/ruby/testdata/formatador.gemspec new file mode 100644 index 000000000..36aceba0e --- /dev/null +++ b/syft/pkg/cataloger/ruby/testdata/formatador.gemspec @@ -0,0 +1,18 @@ +# -*- encoding: utf-8 -*- +# stub: formatador 1.1.0 ruby lib + +Gem::Specification.new do |s| + s.name = "formatador".freeze + s.version = "1.1.0".freeze + + s.require_paths = ["lib".freeze] + s.authors = ["geemus (Wesley Beary)".freeze] + s.date = "2026-04-17".freeze + s.email = "geemus@gmail.com".freeze + s.files = ["lib/formatador.rb".freeze] + s.homepage = "https://github.com/geemus/#{s.name}".freeze + s.licenses = ["MIT".freeze] + s.required_ruby_version = Gem::Requirement.new(">= 0".freeze) + s.rubygems_version = "3.5.22".freeze + s.summary = "Ruby STDOUT text progress bar library".freeze +end