ruby/gemspec: resolve simple #{s.name}/#{s.version} interpolation (#4782)

* ruby/gemspec: resolve simple #{s.name}/#{s.version} interpolation

Reported in anchore/syft#4720: scanning projects that depend on gems
like formatador leaks literal Ruby interpolation into the emitted
SBOM, e.g.

    "externalReferences": [
      { "url": "https://github.com/geemus/#{s.name}", "type": "website" }
    ]

because formatador.gemspec uses

    s.homepage = "https://github.com/geemus/#{s.name}"

and parseGemSpecEntries reads the file as plain text instead of
evaluating it. The interpolation leaks through the captured homepage
field and on into any externalReferences entry the cataloger produces.
Dependency Track then rejects the whole BOM because '{' and '}' are
not valid IRI-reference characters (RFC 3987).

Add a post-parse pass that substitutes the common interpolation forms
(#{s.name}, #{gem.name}, #{name}, and the matching #{*.version}
variants) in captured string fields using values already parsed from
the same gemspec. Anything still containing '#{' after best-effort
substitution is an unresolvable Ruby expression, and for URL-like
fields (currently just homepage) we drop the field entirely so the
SBOM is always schema-valid; callers would rather miss a homepage URL
than emit one that breaks downstream tools.

Adds testdata/formatador.gemspec, a minimal real-world gemspec using
the #{s.name} pattern, plus a new parser test asserting that the
homepage field comes out fully resolved.

Fixes #4720

Signed-off-by: Sai Asish Y <say.apm35@gmail.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

* improve test cases

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>

---------

Signed-off-by: Sai Asish Y <say.apm35@gmail.com>
Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
Co-authored-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Sai Asish Y 2026-06-29 08:00:48 -07:00 committed by GitHub
parent 37fee88b5c
commit 956858fc11
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 188 additions and 0 deletions

View File

@ -29,6 +29,12 @@ type gemData struct {
// match example: Al\u003Ex ---> 003E
var unicodePattern = regexp.MustCompile(`\\u(?P<unicode>[0-9A-F]{4})`)
// match the common Ruby string-interpolation forms gemspec authors use to build
// fields from the gem's own name/version: #{s.name}, #{gem.name}, #{spec.version},
// bare #{name}, and the same with surrounding whitespace. The optional receiver
// (s./gem./spec./...) is discarded; only the trailing attribute is captured.
var rubyInterpolationPattern = regexp.MustCompile(`#\{\s*(?:\w+\.)?(name|version)\s*\}`)
var patterns = map[string]*regexp.Regexp{
// match example: name = "railties".freeze ---> railties
"name": regexp.MustCompile(`.*\.name\s*=\s*["']{1}(?P<name>.*)["']{1} *`),
@ -96,6 +102,8 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic
}
}
resolveRubyInterpolationsInFields(fields)
if fields["name"] != "" && fields["version"] != "" {
var metadata gemData
if err := mapstructure.Decode(fields, &metadata); err != nil {
@ -116,6 +124,61 @@ func parseGemSpecEntries(ctx context.Context, resolver file.Resolver, _ *generic
return pkgs, nil, nil
}
// resolveRubyInterpolationsInFields substitutes a handful of well-known
// Ruby string interpolation placeholders (#{s.name}, #{s.version}, and
// the equivalent #{gem.*} forms) in captured gemspec string fields using
// values already captured from the same file. Gemspec authors routinely
// write things like
//
// s.homepage = "https://github.com/foo/#{s.name}"
//
// which Ruby evaluates before loading the gem. Syft reads the gemspec as
// plain text, so without this pass the literal #{s.name} would leak into
// the SBOM and in particular break CycloneDX schema validation because
// '{' and '}' are not valid IRI characters (see anchore/syft#4720).
//
// We only resolve interpolations pointing at name/version (the values syft has
// already captured from the same file). Both the substitution and the drop
// below are driven by the same field list, so adding a URL-like field here
// keeps it protected from leaking unresolved interpolation.
func resolveRubyInterpolationsInFields(fields map[string]any) {
name, _ := fields["name"].(string)
version, _ := fields["version"].(string)
// homepage is currently the only captured string field that flows into a
// schema-validated URL slot (CycloneDX externalReferences, SPDX homepage).
for _, key := range []string{"homepage"} {
v, ok := fields[key].(string)
if !ok || v == "" {
continue
}
v = rubyInterpolationPattern.ReplaceAllStringFunc(v, func(match string) string {
switch rubyInterpolationPattern.FindStringSubmatch(match)[1] {
case "name":
if name != "" {
return name
}
case "version":
if version != "" {
return version
}
}
return match // leave unresolved; the field is dropped below
})
// anything still containing a '#{' is an unresolvable Ruby expression.
// Drop the field rather than emit a URL with '{'/'}', which fails
// CycloneDX IRI validation (see anchore/syft#4720); a missing homepage
// is preferable to a BOM downstream tools reject.
if strings.Contains(v, "#{") {
delete(fields, key)
continue
}
fields[key] = v
}
}
// renderUtf8 takes any string escaped string subsections from the ruby string and replaces those sections with the UTF8 runes.
func renderUtf8(s string) string {
fullReplacement := unicodePattern.ReplaceAllStringFunc(s, func(unicodeSection string) string {

View File

@ -4,6 +4,8 @@ import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
@ -35,3 +37,108 @@ func TestParseGemspec(t *testing.T) {
pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil)
}
func TestResolveRubyInterpolationsInFields(t *testing.T) {
tests := []struct {
name string
fields map[string]any
wantHomepage string // "" with wantDropped=true means the key should be absent
wantDropped bool
}{
{
name: "resolves #{s.name}",
fields: map[string]any{"name": "formatador", "homepage": "https://github.com/geemus/#{s.name}"},
wantHomepage: "https://github.com/geemus/formatador",
},
{
name: "resolves #{s.version}",
fields: map[string]any{"version": "1.1.0", "homepage": "https://example.com/v/#{s.version}"},
wantHomepage: "https://example.com/v/1.1.0",
},
{
name: "resolves #{gem.name}",
fields: map[string]any{"name": "foo", "homepage": "https://x/#{gem.name}"},
wantHomepage: "https://x/foo",
},
{
name: "resolves #{spec.version}",
fields: map[string]any{"version": "2.0", "homepage": "https://x/#{spec.version}"},
wantHomepage: "https://x/2.0",
},
{
name: "resolves bare #{name}",
fields: map[string]any{"name": "foo", "homepage": "https://x/#{name}"},
wantHomepage: "https://x/foo",
},
{
name: "resolves with surrounding whitespace",
fields: map[string]any{"name": "foo", "homepage": "https://x/#{ s.name }"},
wantHomepage: "https://x/foo",
},
{
name: "resolves multiple interpolations in one field",
fields: map[string]any{"name": "foo", "version": "1.2", "homepage": "https://x/#{s.name}/#{s.version}"},
wantHomepage: "https://x/foo/1.2",
},
{
name: "drops field on unresolvable expression",
fields: map[string]any{"name": "foo", "homepage": "https://x/#{Time.now}"},
wantDropped: true,
},
{
name: "drops field when referenced value was not captured",
fields: map[string]any{"homepage": "https://x/#{s.name}"},
wantDropped: true,
},
{
name: "leaves plain field untouched",
fields: map[string]any{"name": "foo", "homepage": "https://bundler.io"},
wantHomepage: "https://bundler.io",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
resolveRubyInterpolationsInFields(tt.fields)
got, present := tt.fields["homepage"].(string)
if tt.wantDropped {
assert.False(t, present, "expected homepage to be dropped, got %q", got)
return
}
assert.Equal(t, tt.wantHomepage, got)
})
}
}
// Regression test for https://github.com/anchore/syft/issues/4720:
// gemspecs routinely build URL fields from Ruby string interpolation
// (e.g. "https://github.com/geemus/#{s.name}"), and syft used to pass
// those interpolations through into the emitted SBOM, producing URLs
// containing `{` and `}` that fail CycloneDX IRI validation.
func TestParseGemspec_ResolvesRubyInterpolation(t *testing.T) {
fixture := "testdata/formatador.gemspec"
ctx := context.TODO()
locations := file.NewLocationSet(file.NewLocation(fixture))
expectedPkg := pkg.Package{
Name: "formatador",
Version: "1.1.0",
PURL: "pkg:gem/formatador@1.1.0",
Locations: locations,
Type: pkg.GemPkg,
Licenses: pkg.NewLicenseSet(
pkg.NewLicenseFromLocationsWithContext(ctx, "MIT", file.NewLocation(fixture)),
),
Language: pkg.Ruby,
Metadata: pkg.RubyGemspec{
Name: "formatador",
Version: "1.1.0",
Files: []string{"lib/formatador.rb"},
Authors: []string{"geemus (Wesley Beary)"},
// #{s.name} should have been resolved to the captured name.
Homepage: "https://github.com/geemus/formatador",
},
}
pkgtest.TestFileParser(t, fixture, parseGemSpecEntries, []pkg.Package{expectedPkg}, nil)
}

View File

@ -0,0 +1,18 @@
# -*- encoding: utf-8 -*-
# stub: formatador 1.1.0 ruby lib
Gem::Specification.new do |s|
s.name = "formatador".freeze
s.version = "1.1.0".freeze
s.require_paths = ["lib".freeze]
s.authors = ["geemus (Wesley Beary)".freeze]
s.date = "2026-04-17".freeze
s.email = "geemus@gmail.com".freeze
s.files = ["lib/formatador.rb".freeze]
s.homepage = "https://github.com/geemus/#{s.name}".freeze
s.licenses = ["MIT".freeze]
s.required_ruby_version = Gem::Requirement.new(">= 0".freeze)
s.rubygems_version = "3.5.22".freeze
s.summary = "Ruby STDOUT text progress bar library".freeze
end