add PE binary cataloger (#3911)

Signed-off-by: Alex Goodman <wagoodman@users.noreply.github.com>
This commit is contained in:
Alex Goodman 2025-05-19 14:17:09 -04:00 committed by GitHub
parent b4ca04001c
commit e23ca43a83
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
26 changed files with 4161 additions and 734 deletions

View File

@ -3,5 +3,5 @@ package internal
const ( const (
// JSONSchemaVersion is the current schema version output by the JSON encoder // JSONSchemaVersion is the current schema version output by the JSON encoder
// This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment. // This is roughly following the "SchemaVer" guidelines for versioning the JSON schema. Please see schema/json/README.md for details on how to increment.
JSONSchemaVersion = "16.0.33" JSONSchemaVersion = "16.0.34"
) )

View File

@ -154,7 +154,8 @@ func DefaultPackageTaskFactories() Factories {
}, },
pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary",
), ),
newSimplePackageTaskFactory(binary.NewELFPackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", "elf-package"), newSimplePackageTaskFactory(binary.NewELFPackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", "elf-package", "elf"),
newSimplePackageTaskFactory(binary.NewPEPackageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "binary", "pe-package", "pe", "dll", "exe"),
newSimplePackageTaskFactory(githubactions.NewActionUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"), newSimplePackageTaskFactory(githubactions.NewActionUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(githubactions.NewWorkflowUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"), newSimplePackageTaskFactory(githubactions.NewWorkflowUsageCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, "github", "github-actions"),
newSimplePackageTaskFactory(java.NewJvmDistributionCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "java", "jvm", "jdk", "jre"), newSimplePackageTaskFactory(java.NewJvmDistributionCataloger, pkgcataloging.DeclaredTag, pkgcataloging.DirectoryTag, pkgcataloging.InstalledTag, pkgcataloging.ImageTag, "java", "jvm", "jdk", "jre"),

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"$schema": "https://json-schema.org/draft/2020-12/schema", "$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "anchore.io/schema/syft/json/16.0.33/document", "$id": "anchore.io/schema/syft/json/16.0.34/document",
"$ref": "#/$defs/Document", "$ref": "#/$defs/Document",
"$defs": { "$defs": {
"AlpmDbEntry": { "AlpmDbEntry": {
@ -1971,6 +1971,9 @@
{ {
"$ref": "#/$defs/OpamPackage" "$ref": "#/$defs/OpamPackage"
}, },
{
"$ref": "#/$defs/PeBinary"
},
{ {
"$ref": "#/$defs/PhpComposerInstalledEntry" "$ref": "#/$defs/PhpComposerInstalledEntry"
}, },
@ -2045,6 +2048,17 @@
"purl" "purl"
] ]
}, },
"PeBinary": {
"properties": {
"VersionResources": {
"$ref": "#/$defs/KeyValues"
}
},
"type": "object",
"required": [
"VersionResources"
]
},
"PhpComposerAuthors": { "PhpComposerAuthors": {
"properties": { "properties": {
"name": { "name": {

View File

@ -50,6 +50,14 @@ func Originator(p pkg.Package) (typ string, author string) { //nolint: gocyclo,f
case pkg.DotnetPortableExecutableEntry: case pkg.DotnetPortableExecutableEntry:
typ = orgType typ = orgType
author = metadata.CompanyName author = metadata.CompanyName
case pkg.PEBinary:
// this is a known common keyword used in version resources
// for more info see: https://learn.microsoft.com/en-us/windows/win32/menurc/versioninfo-resource
val, ok := metadata.VersionResources.Get("CompanyName")
if ok {
typ = orgType
author = val
}
case pkg.DpkgDBEntry: case pkg.DpkgDBEntry:
author = metadata.Maintainer author = metadata.Maintainer

View File

@ -112,6 +112,21 @@ func Test_OriginatorSupplier(t *testing.T) {
originator: "Organization: Microsoft Corporation", originator: "Organization: Microsoft Corporation",
supplier: "Organization: Microsoft Corporation", supplier: "Organization: Microsoft Corporation",
}, },
{
name: "from PE binary",
input: pkg.Package{
Metadata: pkg.PEBinary{
VersionResources: pkg.KeyValues{
{
Key: "CompanyName",
Value: "Microsoft Corporation",
},
},
},
},
originator: "Organization: Microsoft Corporation",
supplier: "Organization: Microsoft Corporation",
},
{ {
name: "from dpkg DB", name: "from dpkg DB",
input: pkg.Package{ input: pkg.Package{

View File

@ -42,6 +42,7 @@ func AllTypes() []any {
pkg.NpmPackage{}, pkg.NpmPackage{},
pkg.NpmPackageLockEntry{}, pkg.NpmPackageLockEntry{},
pkg.OpamPackage{}, pkg.OpamPackage{},
pkg.PEBinary{},
pkg.PhpComposerInstalledEntry{}, pkg.PhpComposerInstalledEntry{},
pkg.PhpComposerLockEntry{}, pkg.PhpComposerLockEntry{},
pkg.PhpPearEntry{}, pkg.PhpPearEntry{},

View File

@ -94,6 +94,7 @@ var jsonTypes = makeJSONTypes(
jsonNames(pkg.NpmPackage{}, "javascript-npm-package", "NpmPackageJsonMetadata"), jsonNames(pkg.NpmPackage{}, "javascript-npm-package", "NpmPackageJsonMetadata"),
jsonNames(pkg.NpmPackageLockEntry{}, "javascript-npm-package-lock-entry", "NpmPackageLockJsonMetadata"), jsonNames(pkg.NpmPackageLockEntry{}, "javascript-npm-package-lock-entry", "NpmPackageLockJsonMetadata"),
jsonNames(pkg.YarnLockEntry{}, "javascript-yarn-lock-entry", "YarnLockJsonMetadata"), jsonNames(pkg.YarnLockEntry{}, "javascript-yarn-lock-entry", "YarnLockJsonMetadata"),
jsonNames(pkg.PEBinary{}, "pe-binary"),
jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"), jsonNames(pkg.PhpComposerLockEntry{}, "php-composer-lock-entry", "PhpComposerJsonMetadata"),
jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred jsonNamesWithoutLookup(pkg.PhpComposerInstalledEntry{}, "php-composer-installed-entry", "PhpComposerJsonMetadata"), // the legacy value is split into two types, where the other is preferred
jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"), jsonNames(pkg.PhpPeclEntry{}, "php-pecl-entry", "PhpPeclMetadata"),

View File

@ -46,3 +46,8 @@ type ELFBinaryPackageNoteJSONPayload struct {
// Commit is the commit hash of the source repository for which the binary was built from // Commit is the commit hash of the source repository for which the binary was built from
Commit string `json:"commit,omitempty"` Commit string `json:"commit,omitempty"`
} }
// PEBinary represents metadata captured from a Portable Executable formatted binary (dll, exe, etc.)
type PEBinary struct {
VersionResources KeyValues
}

View File

@ -15,7 +15,7 @@ func newELFPackage(ctx context.Context, metadata elfBinaryPackageNotes, location
Name: metadata.Name, Name: metadata.Name,
Version: metadata.Version, Version: metadata.Version,
Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, metadata.License)), Licenses: pkg.NewLicenseSet(pkg.NewLicenseWithContext(ctx, metadata.License)),
PURL: packageURL(metadata), PURL: elfPackageURL(metadata),
Type: pkgType(metadata.Type), Type: pkgType(metadata.Type),
Locations: locations, Locations: locations,
Metadata: metadata.ELFBinaryPackageNoteJSONPayload, Metadata: metadata.ELFBinaryPackageNoteJSONPayload,
@ -26,7 +26,7 @@ func newELFPackage(ctx context.Context, metadata elfBinaryPackageNotes, location
return p return p
} }
func packageURL(metadata elfBinaryPackageNotes) string { func elfPackageURL(metadata elfBinaryPackageNotes) string {
var qualifiers []packageurl.Qualifier var qualifiers []packageurl.Qualifier
os, osVersion := osNameAndVersionFromMetadata(metadata) os, osVersion := osNameAndVersionFromMetadata(metadata)

View File

@ -10,7 +10,7 @@ import (
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
) )
func Test_ELF_Package_Cataloger(t *testing.T) { func Test_ELFPackageCataloger(t *testing.T) {
cases := []struct { cases := []struct {
name string name string

View File

@ -130,7 +130,7 @@ func Test_packageURL(t *testing.T) {
for _, test := range tests { for _, test := range tests {
t.Run(test.name, func(t *testing.T) { t.Run(test.name, func(t *testing.T) {
assert.Equal(t, test.want, packageURL(test.metadata)) assert.Equal(t, test.want, elfPackageURL(test.metadata))
}) })
} }
} }

View File

@ -0,0 +1,134 @@
package binary
import (
"path"
"regexp"
"sort"
"strings"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
)
var (
// spaceRegex includes nbsp (#160) considered to be a space character
spaceRegex = regexp.MustCompile(`[\s\xa0]+`)
numberRegex = regexp.MustCompile(`\d`)
)
func newPEPackage(versionResources map[string]string, f file.Location) pkg.Package {
name := findNameFromVR(versionResources)
if name == "" {
// it's possible that the version resources are empty, so we fall back to the file name
name = strings.TrimSuffix(strings.TrimSuffix(path.Base(f.RealPath), ".exe"), ".dll")
}
p := pkg.Package{
Name: name,
Version: findVersionFromVR(versionResources),
Locations: file.NewLocationSet(f.WithAnnotation(pkg.EvidenceAnnotationKey, pkg.PrimaryEvidenceAnnotation)),
Type: pkg.BinaryPkg,
Metadata: newPEBinaryVersionResourcesFromMap(versionResources),
}
p.SetID()
return p
}
func newPEBinaryVersionResourcesFromMap(vr map[string]string) pkg.PEBinary {
var kvs pkg.KeyValues
for k, v := range vr {
if v == "" {
continue
}
kvs = append(kvs, pkg.KeyValue{
Key: k,
Value: spaceNormalize(v),
})
}
sort.Slice(kvs, func(i, j int) bool {
return kvs[i].Key < kvs[j].Key
})
return pkg.PEBinary{
VersionResources: kvs,
}
}
func findNameFromVR(versionResources map[string]string) string {
// PE files not authored by Microsoft tend to use ProductName as an identifier.
nameFields := []string{"ProductName", "FileDescription", "InternalName", "OriginalFilename"}
if isMicrosoftVR(versionResources) {
// for Microsoft files, prioritize FileDescription.
nameFields = []string{"FileDescription", "InternalName", "OriginalFilename", "ProductName"}
}
var name string
for _, field := range nameFields {
value := spaceNormalize(versionResources[field])
if value == "" {
continue
}
name = value
break
}
return name
}
func isMicrosoftVR(versionResources map[string]string) bool {
return strings.Contains(strings.ToLower(versionResources["CompanyName"]), "microsoft") ||
strings.Contains(strings.ToLower(versionResources["ProductName"]), "microsoft")
}
// spaceNormalize trims and normalizes whitespace in a string.
func spaceNormalize(value string) string {
value = strings.TrimSpace(value)
if value == "" {
return ""
}
// ensure valid UTF-8.
value = strings.ToValidUTF8(value, "")
// consolidate all whitespace.
value = spaceRegex.ReplaceAllString(value, " ")
// remove non-printable characters.
value = regexp.MustCompile(`[\x00-\x1f]`).ReplaceAllString(value, "")
// consolidate again and trim.
value = spaceRegex.ReplaceAllString(value, " ")
value = strings.TrimSpace(value)
return value
}
func findVersionFromVR(versionResources map[string]string) string {
productVersion := extractVersionFromResourcesValue(versionResources["ProductVersion"])
fileVersion := extractVersionFromResourcesValue(versionResources["FileVersion"])
if productVersion != "" {
return productVersion
}
return fileVersion
}
func extractVersionFromResourcesValue(version string) string {
version = strings.TrimSpace(version)
out := ""
for i, f := range strings.Fields(version) {
if containsNumber(out) && !containsNumber(f) {
return out
}
if i == 0 {
out = f
} else {
out += " " + f
}
}
return out
}
func containsNumber(s string) bool {
return numberRegex.MatchString(s)
}

View File

@ -0,0 +1,39 @@
package binary
import (
"context"
"fmt"
"github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/generic"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pe"
)
// NewPEPackageCataloger returns a cataloger that interprets packages from DLL and EXE files.
func NewPEPackageCataloger() pkg.Cataloger {
return generic.NewCataloger("pe-binary-package-cataloger").
WithParserByGlobs(parsePE, "**/*.dll", "**/*.exe")
}
func parsePE(_ context.Context, _ file.Resolver, _ *generic.Environment, reader file.LocationReadCloser) ([]pkg.Package, []artifact.Relationship, error) {
f, err := pe.Read(reader)
if err != nil {
return nil, nil, fmt.Errorf("failed to parse PE file %q: %w", reader.RealPath, err)
}
if f == nil {
return nil, nil, unknown.Newf(reader, "unable to determine packages")
}
if f.CLR.HasEvidenceOfCLR() {
// this is for a .NET application, which is covered by other catalogers already
return nil, nil, nil
}
p := newPEPackage(f.VersionResources, f.Location)
return []pkg.Package{p}, nil, nil
}

View File

@ -0,0 +1,64 @@
package binary
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/pkg"
"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
)
func Test_PEPackageCataloger(t *testing.T) {
cases := []struct {
name string
fixture string
expected []pkg.Package
wantErr require.ErrorAssertionFunc
}{
{
name: "non-.NET package",
fixture: "image-jruby",
expected: []pkg.Package{
{
Name: "JRuby",
Version: "9.3.15.0",
Type: pkg.BinaryPkg,
Locations: file.NewLocationSet(
file.NewLocation("/jruby_windows_9_3_15_0.exe"),
),
FoundBy: "pe-binary-package-cataloger",
Metadata: pkg.PEBinary{
VersionResources: pkg.KeyValues{
{Key: "CompanyName", Value: "JRuby Dev Team"},
{Key: "FileDescription", Value: "JRuby"},
{Key: "FileVersion", Value: "9.3.15.0"},
{Key: "InternalName", Value: "jruby"},
{Key: "LegalCopyright", Value: "JRuby Dev Team"},
{Key: "OriginalFilename", Value: "jruby_windows-x32_9_3_15_0.exe"},
{Key: "ProductName", Value: "JRuby"},
{Key: "ProductVersion", Value: "9.3.15.0"},
},
},
},
},
},
{
name: "ignore .NET packages",
fixture: "image-dotnet-app",
expected: nil, // expect nothing!
},
}
for _, v := range cases {
t.Run(v.name, func(t *testing.T) {
pkgtest.NewCatalogTester().
WithImageResolver(t, v.fixture).
IgnoreLocationLayer(). // this fixture can be rebuilt, thus the layer ID will change
Expects(v.expected, nil).
TestCataloger(t, NewPEPackageCataloger())
})
}
}

View File

@ -0,0 +1,10 @@
FROM alpine:latest AS builder
RUN apk add --no-cache wget unzip
RUN mkdir -p /app
RUN wget -O /app/minver.nupkg https://www.nuget.org/api/v2/package/minver-cli/4.3.0
RUN unzip /app/minver.nupkg -d /app/minver
from busybox:latest
COPY --from=builder /app/minver /minver

View File

@ -0,0 +1,7 @@
FROM --platform=linux/amd64 alpine:latest AS builder
RUN wget -O jruby_windows_9_3_15_0.exe https://s3.amazonaws.com/jruby.org/downloads/9.3.15.0/jruby_windows_9_3_15_0.exe
FROM scratch
COPY --from=builder /jruby_windows_9_3_15_0.exe /jruby_windows_9_3_15_0.exe

View File

@ -473,7 +473,7 @@ func readPEFile(resolver file.Resolver, loc file.Location) (*logicalPE, error) {
} }
defer internal.CloseAndLogError(reader, loc.RealPath) defer internal.CloseAndLogError(reader, loc.RealPath)
ldpe, err := getLogicalDotnetPE(file.NewLocationReadCloser(loc, reader)) ldpe, err := readLogicalPE(file.NewLocationReadCloser(loc, reader))
if err != nil { if err != nil {
return nil, unknown.New(loc, fmt.Errorf("unable to parse PE file: %w", err)) return nil, unknown.New(loc, fmt.Errorf("unable to parse PE file: %w", err))
} }
@ -482,7 +482,7 @@ func readPEFile(resolver file.Resolver, loc file.Location) (*logicalPE, error) {
return nil, nil return nil, nil
} }
if !ldpe.CLR.hasEvidenceOfCLR() { if !ldpe.CLR.HasEvidenceOfCLR() {
// this is not a .NET binary // this is not a .NET binary
return nil, nil return nil, nil
} }

View File

@ -150,7 +150,7 @@ func newDotnetDepsEntry(lp logicalDepsJSONPackage) pkg.DotnetDepsEntry {
} }
} }
// newDotnetPortableExecutableEntry creates a portable executable entry from a logicalPE. // newDotnetPortableExecutableEntry creates a portable executable entry from a File.
func newDotnetPortableExecutableEntry(pe logicalPE) pkg.DotnetPortableExecutableEntry { func newDotnetPortableExecutableEntry(pe logicalPE) pkg.DotnetPortableExecutableEntry {
return newDotnetPortableExecutableEntryFromMap(pe.VersionResources) return newDotnetPortableExecutableEntryFromMap(pe.VersionResources)
} }

View File

@ -1,740 +1,30 @@
package dotnet package dotnet
import ( import (
"bytes"
"debug/pe"
"encoding/binary"
"errors"
"fmt"
"io"
"unicode/utf16"
"github.com/scylladb/go-set/strset"
"github.com/scylladb/go-set/u32set"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader" "github.com/anchore/syft/syft/pkg/cataloger/internal/pe"
) )
const peMaxAllowedDirectoryEntries = 0x1000 // logicalPE represents a PE file within the context of a .NET project (considering the deps.json file).
var imageDirectoryEntryIndexes = []int{
pe.IMAGE_DIRECTORY_ENTRY_RESOURCE, // where version resources are stored
pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR, // where info about the CLR is stored
}
// logicalPE does not directly represent a binary shape to be parsed, instead it represents the
// information of interest extracted from a PE file.
type logicalPE struct { type logicalPE struct {
// Location is where the PE file was found pe.File
Location file.Location
// TargetPath is the path is the deps.json target entry. This is not present in the PE file // TargetPath is the path is the deps.json target entry. This is not present in the PE file
// but instead is used in downstream processing to track associations between the PE file and the deps.json file. // but instead is used in downstream processing to track associations between the PE file and the deps.json file.
TargetPath string TargetPath string
// CLR is the information about the CLR (common language runtime) version found in the PE file which helps
// understand if this executable is even a .NET application.
CLR *clrEvidence
// VersionResources is a map of version resource keys to their values found in the VERSIONINFO resource directory.
VersionResources map[string]string
} }
// clrEvidence is basic info about the CLR (common language runtime) version from the COM descriptor. func readLogicalPE(reader file.LocationReadCloser) (*logicalPE, error) {
// This is not a complete representation of the CLR version, but rather a subset of the information that is peFile, err := pe.Read(reader)
// useful to us.
type clrEvidence struct {
// HasClrResourceNames is true if there are CLR resource names found in the PE file (e.g. "CLRDEBUGINFO").
HasClrResourceNames bool
// MajorVersion is the minimum supported major version of the CLR.
MajorVersion uint16
// MinorVersion is the minimum supported minor version of the CLR.
MinorVersion uint16
}
// hasEvidenceOfCLR returns true if the PE file has evidence of a CLR (common language runtime) version.
func (c *clrEvidence) hasEvidenceOfCLR() bool {
return c != nil && (c.MajorVersion != 0 && c.MinorVersion != 0 || c.HasClrResourceNames)
}
type peDosHeader struct {
Magic [2]byte // "MZ"
Unused [58]byte
AddressOfNewEXEHeader uint32 // offset to PE header
}
// peImageCore20 represents the .NET Core 2.0 header structure.
// Source: https://github.com/dotnet/msbuild/blob/9fa9d800dabce3bfcf8365f651f3a713e01f8a85/src/Tasks/NativeMethods.cs#L761-L775
type peImageCore20 struct {
Cb uint32
MajorRuntimeVersion uint16
MinorRuntimeVersion uint16
}
// peImageResourceDirectory represents the resource directory structure.
type peImageResourceDirectory struct {
Characteristics uint32
TimeDateStamp uint32
MajorVersion uint16
MinorVersion uint16
NumberOfNamedEntries uint16
NumberOfIDEntries uint16
}
// peImageResourceDirectoryEntry represents an entry in the resource directory entries.
type peImageResourceDirectoryEntry struct {
Name uint32
OffsetToData uint32
}
// peImageResourceDataEntry is the unit of raw data in the Resource Data area.
type peImageResourceDataEntry struct {
OffsetToData uint32
Size uint32
CodePage uint32
Reserved uint32
}
// peVsFixedFileInfo represents the fixed file information structure.
type peVsFixedFileInfo struct {
Signature uint32
StructVersion uint32
FileVersionMS uint32
FileVersionLS uint32
ProductVersionMS uint32
ProductVersionLS uint32
FileFlagsMask uint32
FileFlags uint32
FileOS uint32
FileType uint32
FileSubtype uint32
FileDateMS uint32
FileDateLS uint32
}
type peVsVersionInfo peLenValLenType
type peStringFileInfo peLenValLenType
type peStringTable peLenValLenType
type peString peLenValLenType
type peLenValLenType struct {
Length uint16
ValueLength uint16
Type uint16
}
type extractedSection struct {
RVA uint32
BaseRVA uint32
Size uint32
Reader *bytes.Reader
}
func (s extractedSection) exists() bool {
return s.RVA != 0 && s.Size != 0
}
func directoryName(i int) string {
switch i {
case pe.IMAGE_DIRECTORY_ENTRY_RESOURCE:
return "Resource"
case pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR:
return "COM Descriptor"
}
return fmt.Sprintf("Unknown (%d)", i)
}
func getLogicalDotnetPE(f file.LocationReadCloser) (*logicalPE, error) {
r, err := unionreader.GetUnionReader(f)
if err != nil { if err != nil {
return nil, err return nil, err
} }
sections, _, err := parsePEFile(r) if peFile == nil {
if err != nil { return nil, nil
return nil, fmt.Errorf("unable to parse PE sections: %w", err)
}
dirs := u32set.New() // keep track of the RVAs we have already parsed (prevent infinite recursion edge cases)
versionResources := make(map[string]string) // map of version resource keys to their values
resourceNames := strset.New() // set of resource names found in the PE file
err = parseResourceDirectory(sections[pe.IMAGE_DIRECTORY_ENTRY_RESOURCE], dirs, versionResources, resourceNames)
if err != nil {
return nil, err
}
c, err := parseCLR(sections[pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR], resourceNames)
if err != nil {
return nil, fmt.Errorf("unable to parse PE CLR directory: %w", err)
} }
return &logicalPE{ return &logicalPE{
Location: f.Location, File: *peFile,
CLR: c,
VersionResources: versionResources,
}, nil }, nil
} }
// parsePEFile creates readers for targeted sections of the binary used by downstream processing.
func parsePEFile(file unionreader.UnionReader) (map[int]*extractedSection, []pe.SectionHeader32, error) {
fileHeader, magic, err := parsePEHeader(file)
if err != nil {
return nil, nil, fmt.Errorf("error parsing PE header: %w", err)
}
soi, headers, err := parseSectionHeaders(file, magic, fileHeader.NumberOfSections)
if err != nil {
return nil, nil, fmt.Errorf("error parsing section headers: %w", err)
}
for i, sec := range soi {
if !sec.exists() {
continue
}
data, err := readDataFromRVA(file, sec.RVA, sec.Size, headers)
if err != nil {
return nil, nil, fmt.Errorf("error reading %q section data: %w", directoryName(i), err)
}
sec.Reader = data
}
return soi, headers, nil
}
// parsePEHeader reads the beginning of a PE formatted file, returning the file header and "magic" indicator
// for downstream logic to determine 32/64 bit parsing.
func parsePEHeader(file unionreader.UnionReader) (*pe.FileHeader, uint16, error) {
var dosHeader peDosHeader
if err := binary.Read(file, binary.LittleEndian, &dosHeader); err != nil {
return nil, 0, fmt.Errorf("error reading DOS header: %w", err)
}
if string(dosHeader.Magic[:]) != "MZ" {
return nil, 0, fmt.Errorf("invalid DOS header magic")
}
peOffset := int64(dosHeader.AddressOfNewEXEHeader)
if _, err := file.Seek(peOffset, io.SeekStart); err != nil {
return nil, 0, fmt.Errorf("error seeking to PE header: %w", err)
}
var signature [4]byte
if err := binary.Read(file, binary.LittleEndian, &signature); err != nil {
return nil, 0, fmt.Errorf("error reading PE signature: %w", err)
}
if !bytes.Equal(signature[:], []byte("PE\x00\x00")) {
return nil, 0, fmt.Errorf("invalid PE signature")
}
var fileHeader pe.FileHeader
if err := binary.Read(file, binary.LittleEndian, &fileHeader); err != nil {
return nil, 0, fmt.Errorf("error reading file header: %w", err)
}
var magic uint16
if err := binary.Read(file, binary.LittleEndian, &magic); err != nil {
return nil, 0, fmt.Errorf("error reading optional header magic: %w", err)
}
// seek back to before reading magic (since that value is in the header)
if _, err := file.Seek(-2, io.SeekCurrent); err != nil {
return nil, 0, fmt.Errorf("error seeking back to before reading magic: %w", err)
}
return &fileHeader, magic, nil
}
// parseSectionHeaders reads the section headers from the PE file and extracts the virtual addresses + section size
// information for the sections of interest. Additionally, all section headers are returned to aid in downstream processing.
func parseSectionHeaders(file unionreader.UnionReader, magic uint16, numberOfSections uint16) (map[int]*extractedSection, []pe.SectionHeader32, error) {
soi := make(map[int]*extractedSection)
switch magic {
case 0x10B: // PE32
var optHeader pe.OptionalHeader32
if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
return nil, nil, fmt.Errorf("error reading optional header (PE32): %w", err)
}
for _, i := range imageDirectoryEntryIndexes {
sectionHeader := optHeader.DataDirectory[i]
if sectionHeader.Size == 0 {
continue
}
soi[i] = &extractedSection{
RVA: sectionHeader.VirtualAddress,
Size: sectionHeader.Size,
}
}
case 0x20B: // PE32+ (64 bit)
var optHeader pe.OptionalHeader64
if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
return nil, nil, fmt.Errorf("error reading optional header (PE32+): %w", err)
}
for _, i := range imageDirectoryEntryIndexes {
sectionHeader := optHeader.DataDirectory[i]
if sectionHeader.Size == 0 {
continue
}
soi[i] = &extractedSection{
RVA: sectionHeader.VirtualAddress,
Size: sectionHeader.Size,
}
}
default:
return nil, nil, fmt.Errorf("unknown optional header magic: 0x%x", magic)
}
// read section headers
headers := make([]pe.SectionHeader32, numberOfSections)
for i := 0; i < int(numberOfSections); i++ {
if err := binary.Read(file, binary.LittleEndian, &headers[i]); err != nil {
return nil, nil, fmt.Errorf("error reading section header: %w", err)
}
}
return soi, headers, nil
}
// parseCLR extracts the CLR (common language runtime) version information from the COM descriptor and makes
// present/not-present determination based on the presence of CLR resource names.
func parseCLR(sec *extractedSection, resourceNames *strset.Set) (*clrEvidence, error) {
hasCLRDebugResourceNames := resourceNames.HasAny("CLRDEBUGINFO")
if sec == nil || sec.Reader == nil {
return &clrEvidence{
HasClrResourceNames: hasCLRDebugResourceNames,
}, nil
}
reader := sec.Reader
var c peImageCore20
if err := binary.Read(reader, binary.LittleEndian, &c); err != nil {
return nil, fmt.Errorf("error reading CLR header: %w", err)
}
return &clrEvidence{
HasClrResourceNames: hasCLRDebugResourceNames,
MajorVersion: c.MajorRuntimeVersion,
MinorVersion: c.MinorRuntimeVersion,
}, nil
}
// rvaToFileOffset is a helper function to convert RVA to file offset using section headers
func rvaToFileOffset(rva uint32, sections []pe.SectionHeader32) (uint32, error) {
for _, section := range sections {
if rva >= section.VirtualAddress && rva < section.VirtualAddress+section.VirtualSize {
return section.PointerToRawData + (rva - section.VirtualAddress), nil
}
}
return 0, fmt.Errorf("RVA 0x%x not found in any section", rva)
}
// readDataFromRVA will read data from a specific RVA in the PE file
func readDataFromRVA(file io.ReadSeeker, rva, size uint32, sections []pe.SectionHeader32) (*bytes.Reader, error) {
if size == 0 {
return nil, fmt.Errorf("zero size specified")
}
offset, err := rvaToFileOffset(rva, sections)
if err != nil {
return nil, err
}
if _, err := file.Seek(int64(offset), io.SeekStart); err != nil {
return nil, fmt.Errorf("error seeking to data: %w", err)
}
data := make([]byte, size)
if _, err := io.ReadFull(file, data); err != nil {
return nil, fmt.Errorf("error reading data: %w", err)
}
return bytes.NewReader(data), nil
}
// parseResourceDirectory recursively parses a PE resource directory. This takes a relative virtual address (offset of
// a piece of data or code relative to the base address), the size of the resource directory, the set of RVAs already
// parsed, and the map to populate discovered version resource values.
//
// .rsrc Section
// +------------------------------+
// | Resource Directory Table |
// +------------------------------+
// | Resource Directory Entries |
// | +------------------------+ |
// | | Subdirectory or Data | |
// | +------------------------+ |
// +------------------------------+
// | Resource Data Entries |
// | +------------------------+ |
// | | Resource Data | |
// | +------------------------+ |
// +------------------------------+
// | Actual Resource Data |
// +------------------------------+
//
// sources:
// - https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-rsrc-section
// - https://learn.microsoft.com/en-us/previous-versions/ms809762(v=msdn.10)#pe-file-resources
func parseResourceDirectory(sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
if sec == nil || sec.Size <= 0 {
return nil
}
if sec.Reader == nil {
return errors.New("resource section not found")
}
baseRVA := sec.BaseRVA
if baseRVA == 0 {
baseRVA = sec.RVA
}
offset := int64(sec.RVA - baseRVA)
if _, err := sec.Reader.Seek(offset, io.SeekStart); err != nil {
return fmt.Errorf("error seeking to directory offset: %w", err)
}
var directoryHeader peImageResourceDirectory
if err := readIntoStruct(sec.Reader, &directoryHeader); err != nil {
return fmt.Errorf("error reading directory header: %w", err)
}
numEntries := int(directoryHeader.NumberOfNamedEntries + directoryHeader.NumberOfIDEntries)
switch {
case numEntries > peMaxAllowedDirectoryEntries:
return fmt.Errorf("too many entries in resource directory: %d", numEntries)
case numEntries == 0:
return fmt.Errorf("no entries in resource directory")
case numEntries < 0:
return fmt.Errorf("invalid number of entries in resource directory: %d", numEntries)
}
for i := 0; i < numEntries; i++ {
var entry peImageResourceDirectoryEntry
entryOffset := offset + int64(binary.Size(directoryHeader)) + int64(i*binary.Size(entry))
if _, err := sec.Reader.Seek(entryOffset, io.SeekStart); err != nil {
log.Tracef("error seeking to PE entry offset: %v", err)
continue
}
if err := readIntoStruct(sec.Reader, &entry); err != nil {
continue
}
if err := processResourceEntry(entry, baseRVA, sec, dirs, fields, names); err != nil {
log.Tracef("error processing resource entry: %v", err)
continue
}
}
return nil
}
func processResourceEntry(entry peImageResourceDirectoryEntry, baseRVA uint32, sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
// if the high bit is set, this is a directory entry, otherwise it is a data entry
isDirectory := entry.OffsetToData&0x80000000 != 0
// note: the offset is relative to the beginning of the resource section, not an RVA
entryOffsetToData := entry.OffsetToData & 0x7FFFFFFF
nameIsString := entry.Name&0x80000000 != 0
nameOffset := entry.Name & 0x7FFFFFFF
// read the string name of the resource directory
if nameIsString {
currentPos, err := sec.Reader.Seek(0, io.SeekCurrent)
if err != nil {
return fmt.Errorf("error getting current reader position: %w", err)
}
if _, err := sec.Reader.Seek(int64(nameOffset), io.SeekStart); err != nil {
return fmt.Errorf("error restoring reader position: %w", err)
}
name, err := readUTF16WithLength(sec.Reader)
if err == nil {
names.Add(name)
}
if _, err := sec.Reader.Seek(currentPos, io.SeekStart); err != nil {
return fmt.Errorf("error restoring reader position: %w", err)
}
}
if isDirectory {
subRVA := baseRVA + entryOffsetToData
if dirs.Has(subRVA) {
// some malware uses recursive PE references to evade analysis
return fmt.Errorf("recursive PE reference detected; skipping directory at baseRVA=0x%x subRVA=0x%x", baseRVA, subRVA)
}
dirs.Add(subRVA)
err := parseResourceDirectory(
&extractedSection{
RVA: subRVA,
BaseRVA: baseRVA,
Size: sec.Size - (sec.RVA - baseRVA),
Reader: sec.Reader,
},
dirs, fields, names)
if err != nil {
return err
}
return nil
}
return parseResourceDataEntry(sec.Reader, baseRVA, baseRVA+entryOffsetToData, sec.Size, fields)
}
func parseResourceDataEntry(reader *bytes.Reader, baseRVA, rva, remainingSize uint32, fields map[string]string) error {
var dataEntry peImageResourceDataEntry
offset := int64(rva - baseRVA)
if _, err := reader.Seek(offset, io.SeekStart); err != nil {
return fmt.Errorf("error seeking to data entry offset: %w", err)
}
if err := readIntoStruct(reader, &dataEntry); err != nil {
return fmt.Errorf("error reading resource data entry: %w", err)
}
if remainingSize < dataEntry.Size {
return fmt.Errorf("resource data entry size exceeds remaining size")
}
data := make([]byte, dataEntry.Size)
if _, err := reader.Seek(int64(dataEntry.OffsetToData-baseRVA), io.SeekStart); err != nil {
return fmt.Errorf("error seeking to resource data: %w", err)
}
if _, err := reader.Read(data); err != nil {
return fmt.Errorf("error reading resource data: %w", err)
}
return parseVersionResourceSection(bytes.NewReader(data), fields)
}
// parseVersionResourceSection parses a PE version resource section from within a resource directory.
//
// "The main structure in a version resource is the VS_FIXEDFILEINFO structure. Additional structures include the
// VarFileInfo structure to store language information data, and StringFileInfo for user-defined string information.
// All strings in a version resource are in Unicode format. Each block of information is aligned on a DWORD boundary."
//
// "VS_VERSIONINFO" (utf16)
// +---------------------------------------------------+
// | wLength (2 bytes) |
// | wValueLength (2 bytes) |
// | wType (2 bytes) |
// | szKey ("VS_VERSION_INFO") (utf16) |
// | Padding (to DWORD) |
// +---------------------------------------------------+
// | VS_FIXEDFILEINFO (52 bytes) |
// +---------------------------------------------------+
// | "StringFileInfo" (utf16) |
// +---------------------------------------------------+
// | wLength (2 bytes) |
// | wValueLength (2 bytes) |
// | wType (2 bytes) |
// | szKey ("StringFileInfo") (utf16) |
// | Padding (to DWORD) |
// | StringTable |
// | +--------------------------------------------+ |
// | | wLength (2 bytes) | |
// | | wValueLength (2 bytes) | |
// | | wType (2 bytes) | |
// | | szKey ("040904b0") | |
// | | Padding (to DWORD) | |
// | | String | |
// | | +--------------------------------------+ | |
// | | | wLength (2 bytes) | | |
// | | | wValueLength (2 bytes) | | |
// | | | wType (2 bytes) | | |
// | | | szKey ("FileVersion") | | |
// | | | Padding (to DWORD) | | |
// | | | szValue ("15.00.0913.015") | | |
// | | | Padding (to DWORD) | | |
// | +--------------------------------------------+ |
// +---------------------------------------------------+
// | VarFileInfo (utf16) |
// +---------------------------------------------------+
// | (skip!) |
// +---------------------------------------------------+
//
// sources:
// - https://learn.microsoft.com/en-us/windows/win32/menurc/resource-file-formats
// - https://learn.microsoft.com/en-us/windows/win32/menurc/vs-versioninfo
// - https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/varfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/stringfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable
func parseVersionResourceSection(reader *bytes.Reader, fields map[string]string) error {
offset := 0
var info peVsVersionInfo
if szKey, err := readIntoStructAndSzKey(reader, &info, &offset); err != nil {
return fmt.Errorf("error reading PE version info: %v", err)
} else if szKey != "VS_VERSION_INFO" {
// this is a resource section, but not the version resources
return nil
}
if err := alignAndSeek(reader, &offset); err != nil {
return fmt.Errorf("error aligning past PE version info: %w", err)
}
var fixedFileInfo peVsFixedFileInfo
if err := readIntoStruct(reader, &fixedFileInfo, &offset); err != nil {
return fmt.Errorf("error reading PE FixedFileInfo: %v", err)
}
for reader.Len() > 0 {
if err := alignAndSeek(reader, &offset); err != nil {
return fmt.Errorf("error seeking to PE StringFileInfo: %w", err)
}
var sfiHeader peStringFileInfo
if szKey, err := readIntoStructAndSzKey(reader, &sfiHeader, &offset); err != nil {
return fmt.Errorf("error reading PE string file info header: %v", err)
} else if szKey != "StringFileInfo" {
// we only care about extracting strings from any string tables, skip this
offset += int(sfiHeader.ValueLength)
continue
}
var stOffset int
// note: the szKey for the prStringTable is the language
var stHeader peStringTable
if _, err := readIntoStructAndSzKey(reader, &stHeader, &offset, &stOffset); err != nil {
return fmt.Errorf("error reading PE string table header: %v", err)
}
for stOffset < int(stHeader.Length) {
var stringHeader peString
if err := readIntoStruct(reader, &stringHeader, &offset, &stOffset); err != nil {
break
}
key := readUTF16(reader, &offset, &stOffset)
if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
return fmt.Errorf("error aligning to next PE string table value: %w", err)
}
var value string
if stringHeader.ValueLength > 0 {
value = readUTF16(reader, &offset, &stOffset)
}
fields[key] = value
if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
return fmt.Errorf("error aligning to next PE string table key: %w", err)
}
}
}
if fields["FileVersion"] == "" {
// we can derive the file version from the fixed file info if it is not already specified as a string entry... neat!
fields["FileVersion"] = fmt.Sprintf("%d.%d.%d.%d",
fixedFileInfo.FileVersionMS>>16, fixedFileInfo.FileVersionMS&0xFFFF,
fixedFileInfo.FileVersionLS>>16, fixedFileInfo.FileVersionLS&0xFFFF)
}
return nil
}
// readIntoStructAndSzKey reads a struct from the reader and updates the offsets if provided, returning the szKey value.
// This is only useful in the context of the resource directory parsing in narrow cases (this is invalid to use outside of that context).
func readIntoStructAndSzKey[T any](reader *bytes.Reader, data *T, offsets ...*int) (string, error) {
if err := readIntoStruct(reader, data, offsets...); err != nil {
return "", err
}
return readUTF16(reader, offsets...), nil
}
// readIntoStruct reads a struct from the reader and updates the offsets if provided.
func readIntoStruct[T any](reader io.Reader, data *T, offsets ...*int) error {
if err := binary.Read(reader, binary.LittleEndian, data); err != nil {
if errors.Is(err, io.EOF) {
return nil
}
return err
}
for i := range offsets {
*offsets[i] += binary.Size(*data)
}
return nil
}
// alignAndSeek aligns the reader to the next DWORD boundary and seeks to the new offset (updating any provided trackOffsets).
func alignAndSeek(reader io.Seeker, offset *int, trackOffsets ...*int) error {
ogOffset := *offset
*offset = alignToDWORD(*offset)
diff := *offset - ogOffset
for i := range trackOffsets {
*trackOffsets[i] += diff
}
_, err := reader.Seek(int64(*offset), io.SeekStart)
return err
}
// alignToDWORD aligns the offset to the next DWORD boundary (4 byte boundary)
func alignToDWORD(offset int) int {
return (offset + 3) & ^3
}
// readUTF16 is a helper function to read a null-terminated UTF16 string
func readUTF16(reader *bytes.Reader, offsets ...*int) string {
startPos, err := reader.Seek(0, io.SeekCurrent)
if err != nil {
return ""
}
var result []rune
for {
var char uint16
err := binary.Read(reader, binary.LittleEndian, &char)
if err != nil || char == 0 {
break
}
result = append(result, rune(char))
}
// calculate how many bytes we've actually read (including null terminator)
endPos, _ := reader.Seek(0, io.SeekCurrent)
bytesRead := int(endPos - startPos)
for i := range offsets {
*offsets[i] += bytesRead
}
return string(result)
}
// readUTF16WithLength reads a length-prefixed UTF-16 string from reader.
// The first 2 bytes represent the number of UTF-16 code units.
func readUTF16WithLength(reader *bytes.Reader) (string, error) {
var length uint16
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
return "", err
}
if length == 0 {
return "", nil
}
// read length UTF-16 code units.
codes := make([]uint16, length)
if err := binary.Read(reader, binary.LittleEndian, &codes); err != nil {
return "", err
}
return string(utf16.Decode(codes)), nil
}

View File

@ -0,0 +1,736 @@
package pe
import (
"bytes"
"debug/pe"
"encoding/binary"
"errors"
"fmt"
"io"
"unicode/utf16"
"github.com/scylladb/go-set/strset"
"github.com/scylladb/go-set/u32set"
"github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/file"
"github.com/anchore/syft/syft/internal/unionreader"
)
const peMaxAllowedDirectoryEntries = 0x1000
var imageDirectoryEntryIndexes = []int{
pe.IMAGE_DIRECTORY_ENTRY_RESOURCE, // where version resources are stored
pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR, // where info about the CLR is stored
}
// File does not directly represent a binary shape to be parsed, instead it represents the
// information of interest extracted from a PE file.
type File struct {
// Location is where the PE file was found
Location file.Location
// CLR is the information about the CLR (common language runtime) version found in the PE file which helps
// understand if this executable is even a .NET application.
CLR *CLREvidence
// VersionResources is a map of version resource keys to their values found in the VERSIONINFO resource directory.
VersionResources map[string]string
}
// CLREvidence is basic info about the CLR (common language runtime) version from the COM descriptor.
// This is not a complete representation of the CLR version, but rather a subset of the information that is
// useful to us.
type CLREvidence struct {
// HasClrResourceNames is true if there are CLR resource names found in the PE file (e.g. "CLRDEBUGINFO").
HasClrResourceNames bool
// MajorVersion is the minimum supported major version of the CLR.
MajorVersion uint16
// MinorVersion is the minimum supported minor version of the CLR.
MinorVersion uint16
}
// HasEvidenceOfCLR returns true if the PE file has evidence of a CLR (common language runtime) version.
func (c *CLREvidence) HasEvidenceOfCLR() bool {
return c != nil && (c.MajorVersion != 0 && c.MinorVersion != 0 || c.HasClrResourceNames)
}
type peDosHeader struct {
Magic [2]byte // "MZ"
Unused [58]byte
AddressOfNewEXEHeader uint32 // offset to PE header
}
// peImageCore20 represents the .NET Core 2.0 header structure.
// Source: https://github.com/dotnet/msbuild/blob/9fa9d800dabce3bfcf8365f651f3a713e01f8a85/src/Tasks/NativeMethods.cs#L761-L775
type peImageCore20 struct {
Cb uint32
MajorRuntimeVersion uint16
MinorRuntimeVersion uint16
}
// peImageResourceDirectory represents the resource directory structure.
type peImageResourceDirectory struct {
Characteristics uint32
TimeDateStamp uint32
MajorVersion uint16
MinorVersion uint16
NumberOfNamedEntries uint16
NumberOfIDEntries uint16
}
// peImageResourceDirectoryEntry represents an entry in the resource directory entries.
type peImageResourceDirectoryEntry struct {
Name uint32
OffsetToData uint32
}
// peImageResourceDataEntry is the unit of raw data in the Resource Data area.
type peImageResourceDataEntry struct {
OffsetToData uint32
Size uint32
CodePage uint32
Reserved uint32
}
// peVsFixedFileInfo represents the fixed file information structure.
type peVsFixedFileInfo struct {
Signature uint32
StructVersion uint32
FileVersionMS uint32
FileVersionLS uint32
ProductVersionMS uint32
ProductVersionLS uint32
FileFlagsMask uint32
FileFlags uint32
FileOS uint32
FileType uint32
FileSubtype uint32
FileDateMS uint32
FileDateLS uint32
}
type peVsVersionInfo peLenValLenType
type peStringFileInfo peLenValLenType
type peStringTable peLenValLenType
type peString peLenValLenType
type peLenValLenType struct {
Length uint16
ValueLength uint16
Type uint16
}
type extractedSection struct {
RVA uint32
BaseRVA uint32
Size uint32
Reader *bytes.Reader
}
func (s extractedSection) exists() bool {
return s.RVA != 0 && s.Size != 0
}
func directoryName(i int) string {
switch i {
case pe.IMAGE_DIRECTORY_ENTRY_RESOURCE:
return "Resource"
case pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR:
return "COM Descriptor"
}
return fmt.Sprintf("Unknown (%d)", i)
}
func Read(f file.LocationReadCloser) (*File, error) {
r, err := unionreader.GetUnionReader(f)
if err != nil {
return nil, err
}
sections, _, err := parsePEFile(r)
if err != nil {
return nil, fmt.Errorf("unable to parse PE sections: %w", err)
}
dirs := u32set.New() // keep track of the RVAs we have already parsed (prevent infinite recursion edge cases)
versionResources := make(map[string]string) // map of version resource keys to their values
resourceNames := strset.New() // set of resource names found in the PE file
err = parseResourceDirectory(sections[pe.IMAGE_DIRECTORY_ENTRY_RESOURCE], dirs, versionResources, resourceNames)
if err != nil {
return nil, err
}
c, err := parseCLR(sections[pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR], resourceNames)
if err != nil {
return nil, fmt.Errorf("unable to parse PE CLR directory: %w", err)
}
return &File{
Location: f.Location,
CLR: c,
VersionResources: versionResources,
}, nil
}
// parsePEFile creates readers for targeted sections of the binary used by downstream processing.
func parsePEFile(file unionreader.UnionReader) (map[int]*extractedSection, []pe.SectionHeader32, error) {
fileHeader, magic, err := parsePEHeader(file)
if err != nil {
return nil, nil, fmt.Errorf("error parsing PE header: %w", err)
}
soi, headers, err := parseSectionHeaders(file, magic, fileHeader.NumberOfSections)
if err != nil {
return nil, nil, fmt.Errorf("error parsing section headers: %w", err)
}
for i, sec := range soi {
if !sec.exists() {
continue
}
data, err := readDataFromRVA(file, sec.RVA, sec.Size, headers)
if err != nil {
return nil, nil, fmt.Errorf("error reading %q section data: %w", directoryName(i), err)
}
sec.Reader = data
}
return soi, headers, nil
}
// parsePEHeader reads the beginning of a PE formatted file, returning the file header and "magic" indicator
// for downstream logic to determine 32/64 bit parsing.
func parsePEHeader(file unionreader.UnionReader) (*pe.FileHeader, uint16, error) {
var dosHeader peDosHeader
if err := binary.Read(file, binary.LittleEndian, &dosHeader); err != nil {
return nil, 0, fmt.Errorf("error reading DOS header: %w", err)
}
if string(dosHeader.Magic[:]) != "MZ" {
return nil, 0, fmt.Errorf("invalid DOS header magic")
}
peOffset := int64(dosHeader.AddressOfNewEXEHeader)
if _, err := file.Seek(peOffset, io.SeekStart); err != nil {
return nil, 0, fmt.Errorf("error seeking to PE header: %w", err)
}
var signature [4]byte
if err := binary.Read(file, binary.LittleEndian, &signature); err != nil {
return nil, 0, fmt.Errorf("error reading PE signature: %w", err)
}
if !bytes.Equal(signature[:], []byte("PE\x00\x00")) {
return nil, 0, fmt.Errorf("invalid PE signature")
}
var fileHeader pe.FileHeader
if err := binary.Read(file, binary.LittleEndian, &fileHeader); err != nil {
return nil, 0, fmt.Errorf("error reading file header: %w", err)
}
var magic uint16
if err := binary.Read(file, binary.LittleEndian, &magic); err != nil {
return nil, 0, fmt.Errorf("error reading optional header magic: %w", err)
}
// seek back to before reading magic (since that value is in the header)
if _, err := file.Seek(-2, io.SeekCurrent); err != nil {
return nil, 0, fmt.Errorf("error seeking back to before reading magic: %w", err)
}
return &fileHeader, magic, nil
}
// parseSectionHeaders reads the section headers from the PE file and extracts the virtual addresses + section size
// information for the sections of interest. Additionally, all section headers are returned to aid in downstream processing.
func parseSectionHeaders(file unionreader.UnionReader, magic uint16, numberOfSections uint16) (map[int]*extractedSection, []pe.SectionHeader32, error) {
soi := make(map[int]*extractedSection)
switch magic {
case 0x10B: // PE32
var optHeader pe.OptionalHeader32
if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
return nil, nil, fmt.Errorf("error reading optional header (PE32): %w", err)
}
for _, i := range imageDirectoryEntryIndexes {
sectionHeader := optHeader.DataDirectory[i]
if sectionHeader.Size == 0 {
continue
}
soi[i] = &extractedSection{
RVA: sectionHeader.VirtualAddress,
Size: sectionHeader.Size,
}
}
case 0x20B: // PE32+ (64 bit)
var optHeader pe.OptionalHeader64
if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
return nil, nil, fmt.Errorf("error reading optional header (PE32+): %w", err)
}
for _, i := range imageDirectoryEntryIndexes {
sectionHeader := optHeader.DataDirectory[i]
if sectionHeader.Size == 0 {
continue
}
soi[i] = &extractedSection{
RVA: sectionHeader.VirtualAddress,
Size: sectionHeader.Size,
}
}
default:
return nil, nil, fmt.Errorf("unknown optional header magic: 0x%x", magic)
}
// read section headers
headers := make([]pe.SectionHeader32, numberOfSections)
for i := 0; i < int(numberOfSections); i++ {
if err := binary.Read(file, binary.LittleEndian, &headers[i]); err != nil {
return nil, nil, fmt.Errorf("error reading section header: %w", err)
}
}
return soi, headers, nil
}
// parseCLR extracts the CLR (common language runtime) version information from the COM descriptor and makes
// present/not-present determination based on the presence of CLR resource names.
func parseCLR(sec *extractedSection, resourceNames *strset.Set) (*CLREvidence, error) {
hasCLRDebugResourceNames := resourceNames.HasAny("CLRDEBUGINFO")
if sec == nil || sec.Reader == nil {
return &CLREvidence{
HasClrResourceNames: hasCLRDebugResourceNames,
}, nil
}
reader := sec.Reader
var c peImageCore20
if err := binary.Read(reader, binary.LittleEndian, &c); err != nil {
return nil, fmt.Errorf("error reading CLR header: %w", err)
}
return &CLREvidence{
HasClrResourceNames: hasCLRDebugResourceNames,
MajorVersion: c.MajorRuntimeVersion,
MinorVersion: c.MinorRuntimeVersion,
}, nil
}
// rvaToFileOffset is a helper function to convert RVA to file offset using section headers
func rvaToFileOffset(rva uint32, sections []pe.SectionHeader32) (uint32, error) {
for _, section := range sections {
if rva >= section.VirtualAddress && rva < section.VirtualAddress+section.VirtualSize {
return section.PointerToRawData + (rva - section.VirtualAddress), nil
}
}
return 0, fmt.Errorf("RVA 0x%x not found in any section", rva)
}
// readDataFromRVA will read data from a specific RVA in the PE file
func readDataFromRVA(file io.ReadSeeker, rva, size uint32, sections []pe.SectionHeader32) (*bytes.Reader, error) {
if size == 0 {
return nil, fmt.Errorf("zero size specified")
}
offset, err := rvaToFileOffset(rva, sections)
if err != nil {
return nil, err
}
if _, err := file.Seek(int64(offset), io.SeekStart); err != nil {
return nil, fmt.Errorf("error seeking to data: %w", err)
}
data := make([]byte, size)
if _, err := io.ReadFull(file, data); err != nil {
return nil, fmt.Errorf("error reading data: %w", err)
}
return bytes.NewReader(data), nil
}
// parseResourceDirectory recursively parses a PE resource directory. This takes a relative virtual address (offset of
// a piece of data or code relative to the base address), the size of the resource directory, the set of RVAs already
// parsed, and the map to populate discovered version resource values.
//
// .rsrc Section
// +------------------------------+
// | Resource Directory Table |
// +------------------------------+
// | Resource Directory Entries |
// | +------------------------+ |
// | | Subdirectory or Data | |
// | +------------------------+ |
// +------------------------------+
// | Resource Data Entries |
// | +------------------------+ |
// | | Resource Data | |
// | +------------------------+ |
// +------------------------------+
// | Actual Resource Data |
// +------------------------------+
//
// sources:
// - https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-rsrc-section
// - https://learn.microsoft.com/en-us/previous-versions/ms809762(v=msdn.10)#pe-file-resources
func parseResourceDirectory(sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
if sec == nil || sec.Size <= 0 {
return nil
}
if sec.Reader == nil {
return errors.New("resource section not found")
}
baseRVA := sec.BaseRVA
if baseRVA == 0 {
baseRVA = sec.RVA
}
offset := int64(sec.RVA - baseRVA)
if _, err := sec.Reader.Seek(offset, io.SeekStart); err != nil {
return fmt.Errorf("error seeking to directory offset: %w", err)
}
var directoryHeader peImageResourceDirectory
if err := readIntoStruct(sec.Reader, &directoryHeader); err != nil {
return fmt.Errorf("error reading directory header: %w", err)
}
numEntries := int(directoryHeader.NumberOfNamedEntries + directoryHeader.NumberOfIDEntries)
switch {
case numEntries > peMaxAllowedDirectoryEntries:
return fmt.Errorf("too many entries in resource directory: %d", numEntries)
case numEntries == 0:
return fmt.Errorf("no entries in resource directory")
case numEntries < 0:
return fmt.Errorf("invalid number of entries in resource directory: %d", numEntries)
}
for i := 0; i < numEntries; i++ {
var entry peImageResourceDirectoryEntry
entryOffset := offset + int64(binary.Size(directoryHeader)) + int64(i*binary.Size(entry))
if _, err := sec.Reader.Seek(entryOffset, io.SeekStart); err != nil {
log.Tracef("error seeking to PE entry offset: %v", err)
continue
}
if err := readIntoStruct(sec.Reader, &entry); err != nil {
continue
}
if err := processResourceEntry(entry, baseRVA, sec, dirs, fields, names); err != nil {
log.Tracef("error processing resource entry: %v", err)
continue
}
}
return nil
}
func processResourceEntry(entry peImageResourceDirectoryEntry, baseRVA uint32, sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
// if the high bit is set, this is a directory entry, otherwise it is a data entry
isDirectory := entry.OffsetToData&0x80000000 != 0
// note: the offset is relative to the beginning of the resource section, not an RVA
entryOffsetToData := entry.OffsetToData & 0x7FFFFFFF
nameIsString := entry.Name&0x80000000 != 0
nameOffset := entry.Name & 0x7FFFFFFF
// read the string name of the resource directory
if nameIsString {
currentPos, err := sec.Reader.Seek(0, io.SeekCurrent)
if err != nil {
return fmt.Errorf("error getting current reader position: %w", err)
}
if _, err := sec.Reader.Seek(int64(nameOffset), io.SeekStart); err != nil {
return fmt.Errorf("error restoring reader position: %w", err)
}
name, err := readUTF16WithLength(sec.Reader)
if err == nil {
names.Add(name)
}
if _, err := sec.Reader.Seek(currentPos, io.SeekStart); err != nil {
return fmt.Errorf("error restoring reader position: %w", err)
}
}
if isDirectory {
subRVA := baseRVA + entryOffsetToData
if dirs.Has(subRVA) {
// some malware uses recursive PE references to evade analysis
return fmt.Errorf("recursive PE reference detected; skipping directory at baseRVA=0x%x subRVA=0x%x", baseRVA, subRVA)
}
dirs.Add(subRVA)
err := parseResourceDirectory(
&extractedSection{
RVA: subRVA,
BaseRVA: baseRVA,
Size: sec.Size - (sec.RVA - baseRVA),
Reader: sec.Reader,
},
dirs, fields, names)
if err != nil {
return err
}
return nil
}
return parseResourceDataEntry(sec.Reader, baseRVA, baseRVA+entryOffsetToData, sec.Size, fields)
}
func parseResourceDataEntry(reader *bytes.Reader, baseRVA, rva, remainingSize uint32, fields map[string]string) error {
var dataEntry peImageResourceDataEntry
offset := int64(rva - baseRVA)
if _, err := reader.Seek(offset, io.SeekStart); err != nil {
return fmt.Errorf("error seeking to data entry offset: %w", err)
}
if err := readIntoStruct(reader, &dataEntry); err != nil {
return fmt.Errorf("error reading resource data entry: %w", err)
}
if remainingSize < dataEntry.Size {
return fmt.Errorf("resource data entry size exceeds remaining size")
}
data := make([]byte, dataEntry.Size)
if _, err := reader.Seek(int64(dataEntry.OffsetToData-baseRVA), io.SeekStart); err != nil {
return fmt.Errorf("error seeking to resource data: %w", err)
}
if _, err := reader.Read(data); err != nil {
return fmt.Errorf("error reading resource data: %w", err)
}
return parseVersionResourceSection(bytes.NewReader(data), fields)
}
// parseVersionResourceSection parses a PE version resource section from within a resource directory.
//
// "The main structure in a version resource is the VS_FIXEDFILEINFO structure. Additional structures include the
// VarFileInfo structure to store language information data, and StringFileInfo for user-defined string information.
// All strings in a version resource are in Unicode format. Each block of information is aligned on a DWORD boundary."
//
// "VS_VERSIONINFO" (utf16)
// +---------------------------------------------------+
// | wLength (2 bytes) |
// | wValueLength (2 bytes) |
// | wType (2 bytes) |
// | szKey ("VS_VERSION_INFO") (utf16) |
// | Padding (to DWORD) |
// +---------------------------------------------------+
// | VS_FIXEDFILEINFO (52 bytes) |
// +---------------------------------------------------+
// | "StringFileInfo" (utf16) |
// +---------------------------------------------------+
// | wLength (2 bytes) |
// | wValueLength (2 bytes) |
// | wType (2 bytes) |
// | szKey ("StringFileInfo") (utf16) |
// | Padding (to DWORD) |
// | StringTable |
// | +--------------------------------------------+ |
// | | wLength (2 bytes) | |
// | | wValueLength (2 bytes) | |
// | | wType (2 bytes) | |
// | | szKey ("040904b0") | |
// | | Padding (to DWORD) | |
// | | String | |
// | | +--------------------------------------+ | |
// | | | wLength (2 bytes) | | |
// | | | wValueLength (2 bytes) | | |
// | | | wType (2 bytes) | | |
// | | | szKey ("FileVersion") | | |
// | | | Padding (to DWORD) | | |
// | | | szValue ("15.00.0913.015") | | |
// | | | Padding (to DWORD) | | |
// | +--------------------------------------------+ |
// +---------------------------------------------------+
// | VarFileInfo (utf16) |
// +---------------------------------------------------+
// | (skip!) |
// +---------------------------------------------------+
//
// sources:
// - https://learn.microsoft.com/en-us/windows/win32/menurc/resource-file-formats
// - https://learn.microsoft.com/en-us/windows/win32/menurc/vs-versioninfo
// - https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/varfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/stringfileinfo
// - https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable
func parseVersionResourceSection(reader *bytes.Reader, fields map[string]string) error {
offset := 0
var info peVsVersionInfo
if szKey, err := readIntoStructAndSzKey(reader, &info, &offset); err != nil {
return fmt.Errorf("error reading PE version info: %v", err)
} else if szKey != "VS_VERSION_INFO" {
// this is a resource section, but not the version resources
return nil
}
if err := alignAndSeek(reader, &offset); err != nil {
return fmt.Errorf("error aligning past PE version info: %w", err)
}
var fixedFileInfo peVsFixedFileInfo
if err := readIntoStruct(reader, &fixedFileInfo, &offset); err != nil {
return fmt.Errorf("error reading PE FixedFileInfo: %v", err)
}
for reader.Len() > 0 {
if err := alignAndSeek(reader, &offset); err != nil {
return fmt.Errorf("error seeking to PE StringFileInfo: %w", err)
}
var sfiHeader peStringFileInfo
if szKey, err := readIntoStructAndSzKey(reader, &sfiHeader, &offset); err != nil {
return fmt.Errorf("error reading PE string file info header: %v", err)
} else if szKey != "StringFileInfo" {
// we only care about extracting strings from any string tables, skip this
offset += int(sfiHeader.ValueLength)
continue
}
var stOffset int
// note: the szKey for the prStringTable is the language
var stHeader peStringTable
if _, err := readIntoStructAndSzKey(reader, &stHeader, &offset, &stOffset); err != nil {
return fmt.Errorf("error reading PE string table header: %v", err)
}
for stOffset < int(stHeader.Length) {
var stringHeader peString
if err := readIntoStruct(reader, &stringHeader, &offset, &stOffset); err != nil {
break
}
key := readUTF16(reader, &offset, &stOffset)
if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
return fmt.Errorf("error aligning to next PE string table value: %w", err)
}
var value string
if stringHeader.ValueLength > 0 {
value = readUTF16(reader, &offset, &stOffset)
}
fields[key] = value
if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
return fmt.Errorf("error aligning to next PE string table key: %w", err)
}
}
}
if fields["FileVersion"] == "" {
// we can derive the file version from the fixed file info if it is not already specified as a string entry... neat!
fields["FileVersion"] = fmt.Sprintf("%d.%d.%d.%d",
fixedFileInfo.FileVersionMS>>16, fixedFileInfo.FileVersionMS&0xFFFF,
fixedFileInfo.FileVersionLS>>16, fixedFileInfo.FileVersionLS&0xFFFF)
}
return nil
}
// readIntoStructAndSzKey reads a struct from the reader and updates the offsets if provided, returning the szKey value.
// This is only useful in the context of the resource directory parsing in narrow cases (this is invalid to use outside of that context).
func readIntoStructAndSzKey[T any](reader *bytes.Reader, data *T, offsets ...*int) (string, error) {
if err := readIntoStruct(reader, data, offsets...); err != nil {
return "", err
}
return readUTF16(reader, offsets...), nil
}
// readIntoStruct reads a struct from the reader and updates the offsets if provided.
func readIntoStruct[T any](reader io.Reader, data *T, offsets ...*int) error {
if err := binary.Read(reader, binary.LittleEndian, data); err != nil {
if errors.Is(err, io.EOF) {
return nil
}
return err
}
for i := range offsets {
*offsets[i] += binary.Size(*data)
}
return nil
}
// alignAndSeek aligns the reader to the next DWORD boundary and seeks to the new offset (updating any provided trackOffsets).
func alignAndSeek(reader io.Seeker, offset *int, trackOffsets ...*int) error {
ogOffset := *offset
*offset = alignToDWORD(*offset)
diff := *offset - ogOffset
for i := range trackOffsets {
*trackOffsets[i] += diff
}
_, err := reader.Seek(int64(*offset), io.SeekStart)
return err
}
// alignToDWORD aligns the offset to the next DWORD boundary (4 byte boundary)
func alignToDWORD(offset int) int {
return (offset + 3) & ^3
}
// readUTF16 is a helper function to read a null-terminated UTF16 string
func readUTF16(reader *bytes.Reader, offsets ...*int) string {
startPos, err := reader.Seek(0, io.SeekCurrent)
if err != nil {
return ""
}
var result []rune
for {
var char uint16
err := binary.Read(reader, binary.LittleEndian, &char)
if err != nil || char == 0 {
break
}
result = append(result, rune(char))
}
// calculate how many bytes we've actually read (including null terminator)
endPos, _ := reader.Seek(0, io.SeekCurrent)
bytesRead := int(endPos - startPos)
for i := range offsets {
*offsets[i] += bytesRead
}
return string(result)
}
// readUTF16WithLength reads a length-prefixed UTF-16 string from reader.
// The first 2 bytes represent the number of UTF-16 code units.
func readUTF16WithLength(reader *bytes.Reader) (string, error) {
var length uint16
if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
return "", err
}
if length == 0 {
return "", nil
}
// read length UTF-16 code units.
codes := make([]uint16, length)
if err := binary.Read(reader, binary.LittleEndian, &codes); err != nil {
return "", err
}
return string(utf16.Decode(codes)), nil
}

View File

@ -1,4 +1,4 @@
package dotnet package pe
import ( import (
"testing" "testing"
@ -13,7 +13,7 @@ import (
"github.com/anchore/syft/syft/source/stereoscopesource" "github.com/anchore/syft/syft/source/stereoscopesource"
) )
func Test_getLogicalDotnetPE(t *testing.T) { func Test_Read_DotNetDetection(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
fixture string fixture string
@ -126,7 +126,7 @@ func Test_getLogicalDotnetPE(t *testing.T) {
reader := fixtureFile(t, tt.fixture, tt.path) reader := fixtureFile(t, tt.fixture, tt.path)
got, err := getLogicalDotnetPE(reader) got, err := Read(reader)
tt.wantErr(t, err) tt.wantErr(t, err)
if err != nil { if err != nil {
return return
@ -136,7 +136,7 @@ func Test_getLogicalDotnetPE(t *testing.T) {
t.Errorf("unexpected version resources (-want +got): %s", d) t.Errorf("unexpected version resources (-want +got): %s", d)
} }
assert.Equal(t, tt.wantCLR, got.CLR.hasEvidenceOfCLR()) assert.Equal(t, tt.wantCLR, got.CLR.HasEvidenceOfCLR())
}) })
} }
} }

View File

@ -0,0 +1 @@
../../../dotnet/test-fixtures/image-net8-app

View File

@ -0,0 +1 @@
../../../dotnet/test-fixtures/image-net8-app-single-file

View File

@ -10,9 +10,9 @@ import (
"strconv" "strconv"
"time" "time"
rpmdb "github.com/anchore/go-rpmdb/pkg"
"github.com/sassoftware/go-rpmutils" "github.com/sassoftware/go-rpmutils"
rpmdb "github.com/anchore/go-rpmdb/pkg"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"
"github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/file"

View File

@ -9,7 +9,6 @@ import (
"strings" "strings"
rpmdb "github.com/anchore/go-rpmdb/pkg" rpmdb "github.com/anchore/go-rpmdb/pkg"
"github.com/anchore/syft/internal/log" "github.com/anchore/syft/internal/log"
"github.com/anchore/syft/internal/unknown" "github.com/anchore/syft/internal/unknown"
"github.com/anchore/syft/syft/artifact" "github.com/anchore/syft/syft/artifact"