fix: improve dotnet portable executable identification (#2133)

Signed-off-by: Keith Zantow <kzantow@gmail.com>
This commit is contained in:
Keith Zantow 2023-11-29 12:51:24 -05:00 committed by GitHub
parent 5c8dd4c3a7
commit ef5c1651ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 320 additions and 47 deletions

View File

@ -58,23 +58,14 @@ func parseDotnetPortableExecutable(_ file.Resolver, _ *generic.Environment, f fi
func buildDotNetPackage(versionResources map[string]string, f file.LocationReadCloser) (dnpkg pkg.Package, err error) {
name := findName(versionResources)
if name == "" {
return dnpkg, fmt.Errorf("unable to find FileDescription, or ProductName in PE file: %s", f.RealPath)
return dnpkg, fmt.Errorf("unable to find PE name in file: %s", f.RealPath)
}
version := findVersion(versionResources)
if strings.TrimSpace(version) == "" {
return dnpkg, fmt.Errorf("unable to find FileVersion in PE file: %s", f.RealPath)
if version == "" {
return dnpkg, fmt.Errorf("unable to find PE version in file: %s", f.RealPath)
}
purl := packageurl.NewPackageURL(
packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen.
"",
name,
version,
nil,
"",
).ToString()
metadata := pkg.DotnetPortableExecutableEntry{
AssemblyVersion: versionResources["Assembly Version"],
LegalCopyright: versionResources["LegalCopyright"],
@ -91,7 +82,7 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC
Locations: file.NewLocationSet(f.Location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Type: pkg.DotnetPkg,
Language: pkg.Dotnet,
PURL: purl,
PURL: portableExecutablePackageURL(name, version),
Metadata: metadata,
}
@ -100,30 +91,128 @@ func buildDotNetPackage(versionResources map[string]string, f file.LocationReadC
return dnpkg, nil
}
func findVersion(versionResources map[string]string) string {
for _, key := range []string{"FileVersion"} {
if version, ok := versionResources[key]; ok {
if strings.TrimSpace(version) == "" {
continue
}
fields := strings.Fields(version)
if len(fields) > 0 {
return fields[0]
}
func portableExecutablePackageURL(name, version string) string {
return packageurl.NewPackageURL(
packageurl.TypeNuget, // See explanation in syft/pkg/cataloger/dotnet/package.go as to why this was chosen.
"",
name,
version,
nil,
"",
).ToString()
}
func extractVersion(version string) string {
version = strings.TrimSpace(version)
out := ""
// some example versions are: "1, 0, 0, 0", "Release 73" or "4.7.4076.0 built by: NET472REL1LAST_B"
// so try to split it and take the first parts that look numeric
for i, f := range strings.Fields(version) {
// if the output already has a number but the current segment does not have a number,
// return what we found for the version
if containsNumber(out) && !containsNumber(f) {
return out
}
if i == 0 {
out = f
} else {
out += " " + f
}
}
return out
}
func findVersion(versionResources map[string]string) string {
productVersion := extractVersion(versionResources["ProductVersion"])
fileVersion := extractVersion(versionResources["FileVersion"])
if productVersion == "" {
return fileVersion
}
productVersionDetail := punctuationCount(productVersion)
fileVersionDetail := punctuationCount(fileVersion)
if containsNumber(productVersion) && productVersionDetail >= fileVersionDetail {
return productVersion
}
if containsNumber(fileVersion) && fileVersionDetail > 0 {
return fileVersion
}
if containsNumber(productVersion) {
return productVersion
}
if containsNumber(fileVersion) {
return fileVersion
}
return productVersion
}
func containsNumber(s string) bool {
return numberRegex.MatchString(s)
}
func punctuationCount(s string) int {
return len(versionPunctuationRegex.FindAllString(s, -1))
}
var (
// spaceRegex includes nbsp (#160) considered to be a space character
spaceRegex = regexp.MustCompile(`[\s\xa0]+`)
numberRegex = regexp.MustCompile(`\d`)
versionPunctuationRegex = regexp.MustCompile(`[.,]+`)
)
func findName(versionResources map[string]string) string {
// PE files found in the wild _not_ authored by Microsoft seem to use ProductName as a clear
// identifier of the software
nameFields := []string{"ProductName", "FileDescription", "InternalName", "OriginalFilename"}
if isMicrosoft(versionResources) {
// Microsoft seems to be consistent using the FileDescription, with a few that are blank and have
// fallbacks to ProductName last, as this is often something very broad like "Microsoft Windows"
nameFields = []string{"FileDescription", "InternalName", "OriginalFilename", "ProductName"}
}
for _, field := range nameFields {
value := spaceNormalize(versionResources[field])
if value == "" {
continue
}
return value
}
return ""
}
func findName(versionResources map[string]string) string {
for _, key := range []string{"FileDescription", "ProductName"} {
if name, ok := versionResources[key]; ok {
if strings.TrimSpace(name) == "" {
continue
}
trimmed := strings.TrimSpace(name)
return regexp.MustCompile(`[^a-zA-Z0-9.]+`).ReplaceAllString(trimmed, "")
}
// normalizes a string to a trimmed version with all contigous whitespace collapsed to a single space character
func spaceNormalize(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
return ""
// ensure valid utf8 text
value = strings.ToValidUTF8(value, "")
// consolidate all space characters
value = spaceRegex.ReplaceAllString(value, " ")
// remove other non-space, non-printable characters
value = regexp.MustCompile(`[\x00-\x1f]`).ReplaceAllString(value, "")
// consolidate all space characters again in case other non-printables were in-between
value = spaceRegex.ReplaceAllString(value, " ")
// finally, remove any remaining surrounding whitespace
value = strings.TrimSpace(value)
return value
}
func isMicrosoft(versionResources map[string]string) bool {
return strings.Contains(strings.ToLower(versionResources["CompanyName"]), "microsoft") ||
strings.Contains(strings.ToLower(versionResources["ProductName"]), "microsoft")
}

View File

@ -32,12 +32,8 @@ func TestParseDotnetPortableExecutable(t *testing.T) {
"Assembly Version": "3.14.2.11",
},
expectedPackage: pkg.Package{
Name: "ActiveDirectoryAuthenticationLibrary",
Version: "3.14.40721.0918",
Locations: file.NewLocationSet(file.NewLocation("").WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Type: pkg.DotnetPkg,
Language: pkg.Dotnet,
PURL: "pkg:nuget/ActiveDirectoryAuthenticationLibrary@3.14.40721.0918",
Name: "Active Directory Authentication Library",
Version: "3.14.40721.0918",
Metadata: pkg.DotnetPortableExecutableEntry{
AssemblyVersion: "3.14.2.11",
LegalCopyright: "Copyright (c) Microsoft Corporation. All rights reserved.",
@ -52,7 +48,7 @@ func TestParseDotnetPortableExecutable(t *testing.T) {
name: "dotnet package with malformed field and extended version",
versionResources: map[string]string{
"CompanyName": "Microsoft Corporation",
"FileDescription": "äbFileVersion",
"FileDescription": "äbFile\xa0\xa1Versi on",
"FileVersion": "4.6.25512.01 built by: dlab-DDVSOWINAGE016. Commit Hash: d0d5c7b49271cadb6d97de26d8e623e98abdc8db",
"InternalName": "äbFileVersion",
"LegalCopyright": "© Microsoft Corporation. All rights reserved.",
@ -61,13 +57,9 @@ func TestParseDotnetPortableExecutable(t *testing.T) {
"ProductVersion": "4.6.25512.01 built by: dlab-DDVSOWINAGE016. Commit Hash: d0d5c7b49271cadb6d97de26d8e623e98abdc8db",
},
expectedPackage: pkg.Package{
Name: "bFileVersion",
Name: "äbFileVersi on",
Version: "4.6.25512.01",
Locations: file.NewLocationSet(
file.NewLocation("").WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Type: pkg.DotnetPkg,
Language: pkg.Dotnet,
PURL: "pkg:nuget/bFileVersion@4.6.25512.01",
PURL: "pkg:nuget/%C3%A4bFileVersi%20on@4.6.25512.01",
Metadata: pkg.DotnetPortableExecutableEntry{
LegalCopyright: "© Microsoft Corporation. All rights reserved.",
InternalName: "äb\x01FileVersion",
@ -77,16 +69,208 @@ func TestParseDotnetPortableExecutable(t *testing.T) {
},
},
},
{
name: "System.Data.Linq.dll",
versionResources: map[string]string{
"CompanyName": "Microsoft Corporation",
"FileDescription": "System.Data.Linq.dll",
"FileVersion": "4.7.3190.0 built by: NET472REL1LAST_C",
"InternalName": "System.Data.Linq.dll",
"LegalCopyright": "© Microsoft Corporation. All rights reserved.",
"OriginalFilename": "System.Data.Linq.dll",
"ProductName": "Microsoft® .NET Framework",
"ProductVersion": "4.7.3190.0",
},
expectedPackage: pkg.Package{
Name: "System.Data.Linq.dll",
Version: "4.7.3190.0",
},
},
{
name: "curl",
versionResources: map[string]string{
"CompanyName": "curl, https://curl.se/",
"FileDescription": "The curl executable",
"FileVersion": "8.4.0",
"InternalName": "curl",
"LegalCopyright": "© Daniel Stenberg, <daniel@haxx.se>.",
"OriginalFilename": "curl.exe",
"ProductName": "The curl executable",
"ProductVersion": "8.4.0",
},
expectedPackage: pkg.Package{
Name: "The curl executable",
Version: "8.4.0",
},
},
{
name: "Prometheus",
versionResources: map[string]string{
"AssemblyVersion": "8.0.0.0",
"CompanyName": "",
"FileDescription": "",
"FileVersion": "8.0.1",
"InternalName": "Prometheus.AspNetCore.dll",
"OriginalFilename": "Prometheus.AspNetCore.dll",
"ProductName": "",
"ProductVersion": "8.0.1",
},
expectedPackage: pkg.Package{
Name: "Prometheus.AspNetCore.dll",
Version: "8.0.1",
},
},
{
name: "Hidden Input",
versionResources: map[string]string{
"FileDescription": "Reads from stdin without leaking info to the terminal and outputs back to stdout",
"FileVersion": "1, 0, 0, 0",
"InternalName": "hiddeninput",
"LegalCopyright": "Jordi Boggiano - 2012",
"OriginalFilename": "hiddeninput.exe",
"ProductName": "Hidden Input",
"ProductVersion": "1, 0, 0, 0",
},
expectedPackage: pkg.Package{
Name: "Hidden Input",
Version: "1, 0, 0, 0",
},
},
{
name: "SQLite3",
versionResources: map[string]string{
"CompanyName": "SQLite Development Team",
"FileDescription": "SQLite is a software library that implements a self-contained, serverless, zero-configuration, transactional SQL database engine.",
"FileVersion": "3.23.2",
"InternalName": "sqlite3",
"LegalCopyright": "http://www.sqlite.org/copyright.html",
"ProductName": "SQLite",
"ProductVersion": "3.23.2",
},
expectedPackage: pkg.Package{
Name: "SQLite",
Version: "3.23.2",
},
},
{
name: "Brave Browser",
versionResources: map[string]string{
"CompanyName": "Brave Software, Inc.",
"FileDescription": "Brave Browser",
"FileVersion": "80.1.7.92",
"InternalName": "chrome_exe",
"LegalCopyright": "Copyright 2016 The Brave Authors. All rights reserved.",
"OriginalFilename": "chrome.exe",
"ProductName": "Brave Browser",
"ProductVersion": "80.1.7.92",
},
expectedPackage: pkg.Package{
Name: "Brave Browser",
Version: "80.1.7.92",
},
},
{
name: "Better product version",
versionResources: map[string]string{
"FileDescription": "Better version",
"FileVersion": "80.1.7",
"ProductVersion": "80.1.7.92",
},
expectedPackage: pkg.Package{
Name: "Better version",
Version: "80.1.7.92",
},
},
{
name: "Better file version",
versionResources: map[string]string{
"FileDescription": "Better version",
"FileVersion": "80.1.7.92",
"ProductVersion": "80.1.7",
},
expectedPackage: pkg.Package{
Name: "Better version",
Version: "80.1.7.92",
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
location := file.NewLocation("")
f := file.LocationReadCloser{
Location: file.NewLocation(""),
Location: location,
}
got, err := buildDotNetPackage(tc.versionResources, f)
assert.NoErrorf(t, err, "failed to build package from version resources: %+v", tc.versionResources)
// ignore certain metadata
if tc.expectedPackage.Metadata == nil {
got.Metadata = nil
}
// set known defaults
if tc.expectedPackage.Type == "" {
tc.expectedPackage.Type = pkg.DotnetPkg
}
if tc.expectedPackage.Language == "" {
tc.expectedPackage.Language = pkg.Dotnet
}
if tc.expectedPackage.PURL == "" {
tc.expectedPackage.PURL = portableExecutablePackageURL(tc.expectedPackage.Name, tc.expectedPackage.Version)
}
tc.expectedPackage.Locations = file.NewLocationSet(location.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation))
pkgtest.AssertPackagesEqual(t, tc.expectedPackage, got)
})
}
}
func Test_extractVersion(t *testing.T) {
tests := []struct {
input string
expected string
}{
{
input: "1, 0, 0, 0",
expected: "1, 0, 0, 0",
},
{
input: "Release 73",
expected: "Release 73",
},
{
input: "4.7.4076.0 built by: NET472REL1LAST_B",
expected: "4.7.4076.0",
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
got := extractVersion(test.input)
assert.Equal(t, test.expected, got)
})
}
}
func Test_spaceNormalize(t *testing.T) {
tests := []struct {
input string
expected string
}{
{
expected: "some spaces apart",
input: " some spaces\n\t\t \n\rapart\n",
},
{
expected: "söme ¡nvalid characters",
input: "\rsöme \u0001¡nvalid\t characters\n",
},
}
for _, test := range tests {
t.Run(test.expected, func(t *testing.T) {
got := spaceNormalize(test.input)
assert.Equal(t, test.expected, got)
})
}
}