From 79b6d5daa4d5553e935865b81ab8d6251b8980d9 Mon Sep 17 00:00:00 2001 From: Alex Goodman Date: Tue, 10 Jun 2025 15:03:50 -0400 Subject: [PATCH] Allow decoding of anchorectl json files (#3973) * allow decoding of import sbom file shape Signed-off-by: Alex Goodman * address formatting Signed-off-by: Alex Goodman * add file mode and type processing Signed-off-by: Alex Goodman * use type to interpret the raw value Signed-off-by: Alex Goodman * safe mode convert should use uint32 Signed-off-by: Alex Goodman * simpler decoder type Signed-off-by: Alex Goodman --------- Signed-off-by: Alex Goodman --- syft/format/syftjson/model/document.go | 24 ++ syft/format/syftjson/model/document_test.go | 99 +++++++ syft/format/syftjson/model/file.go | 68 +++++ syft/format/syftjson/model/file_test.go | 277 ++++++++++++++++++++ syft/format/syftjson/to_syft_model.go | 7 +- syft/format/syftjson/to_syft_model_test.go | 6 + 6 files changed, 477 insertions(+), 4 deletions(-) create mode 100644 syft/format/syftjson/model/document_test.go create mode 100644 syft/format/syftjson/model/file_test.go diff --git a/syft/format/syftjson/model/document.go b/syft/format/syftjson/model/document.go index a9886ba4b..a650af4fb 100644 --- a/syft/format/syftjson/model/document.go +++ b/syft/format/syftjson/model/document.go @@ -1,5 +1,10 @@ package model +import ( + "encoding/json" + "fmt" +) + // Document represents the syft cataloging findings as a JSON document type Document struct { Artifacts []Package `json:"artifacts"` // Artifacts is the list of packages discovered and placed into the catalog @@ -11,6 +16,25 @@ type Document struct { Schema Schema `json:"schema"` // Schema is a block reserved for defining the version for the shape of this JSON document and where to find the schema document to validate the shape } +func (d *Document) UnmarshalJSON(data []byte) error { + type Alias *Document + aux := Alias(d) + + if err := json.Unmarshal(data, aux); err != nil { + return fmt.Errorf("could not unmarshal syft JSON document: %w", err) + } + + // in previous versions of anchorectl, the file modes were stored as decimal values instead of octal. + if d.Schema.Version == "1.0.0" && d.Descriptor.Name == "anchorectl" { + // convert all file modes from decimal to octal + for i := range d.Files { + d.Files[i].Metadata.Mode = convertFileModeToBase8(d.Files[i].Metadata.Mode) + } + } + + return nil +} + // Descriptor describes what created the document as well as surrounding metadata type Descriptor struct { Name string `json:"name"` diff --git a/syft/format/syftjson/model/document_test.go b/syft/format/syftjson/model/document_test.go new file mode 100644 index 000000000..141644020 --- /dev/null +++ b/syft/format/syftjson/model/document_test.go @@ -0,0 +1,99 @@ +package model + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDocumentUnmarshalJSON_SchemaDetection(t *testing.T) { + tests := []struct { + name string + jsonData string + modes []int + }{ + { + name: "schema version 1.0.0 + anchorectl", + jsonData: `{ + "files": [ + {"metadata": {"mode": 493}}, + {"metadata": {"mode": 420}} + ], + "schema": {"version": "1.0.0"}, + "descriptor": { + "name": "anchorectl" + } + }`, + modes: []int{755, 644}, + }, + { + name: "schema version 1.0.0 + syft", + jsonData: `{ + "files": [ + {"metadata": {"mode": 755}}, + {"metadata": {"mode": 644}} + ], + "schema": {"version": "1.0.0"}, + "descriptor": { + "name": "syft" + } + }`, + modes: []int{755, 644}, + }, + { + name: "schema version 2.0.0 + anchorectl", + jsonData: `{ + "files": [ + {"metadata": {"mode": 755}}, + {"metadata": {"mode": 644}} + ], + "schema": {"version": "2.0.0"}, + "descriptor": { + "name": "anchorectl" + } + }`, + modes: []int{755, 644}, + }, + { + name: "missing schema version should not convert modes", + jsonData: `{ + "files": [ + {"metadata": {"mode": 755}} + ], + "schema": {} + }`, + modes: []int{755}, + }, + { + name: "empty files array with version 1.0.0", + jsonData: `{ + "files": [], + "schema": {"version": "1.0.0"}, + "descriptor": { + "name": "anchorectl" + } + }`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var doc Document + + err := json.Unmarshal([]byte(tt.jsonData), &doc) + if err != nil { + t.Fatalf("Failed to unmarshal JSON: %v", err) + } + + var modes []int + for _, file := range doc.Files { + modes = append(modes, file.Metadata.Mode) + } + + require.Len(t, doc.Files, len(tt.modes), "Unexpected number of files") + assert.Equal(t, tt.modes, modes, "File modes do not match expected values") + }) + } +} diff --git a/syft/format/syftjson/model/file.go b/syft/format/syftjson/model/file.go index 0ce0d92b3..be406d2ba 100644 --- a/syft/format/syftjson/model/file.go +++ b/syft/format/syftjson/model/file.go @@ -1,6 +1,11 @@ package model import ( + "encoding/json" + "fmt" + "strconv" + + stereoFile "github.com/anchore/stereoscope/pkg/file" "github.com/anchore/syft/syft/file" "github.com/anchore/syft/syft/license" ) @@ -26,6 +31,44 @@ type FileMetadataEntry struct { Size int64 `json:"size"` } +func (f *FileMetadataEntry) UnmarshalJSON(data []byte) error { + type Alias FileMetadataEntry + aux := (*Alias)(f) + + if err := json.Unmarshal(data, aux); err == nil { + // we should have at least one field set to a non-zero value... otherwise this is a legacy entry + if f.Mode != 0 || f.Type != "" || f.LinkDestination != "" || + f.UserID != 0 || f.GroupID != 0 || f.MIMEType != "" || f.Size != 0 { + return nil + } + } + + var legacy sbomImportLegacyFileMetadataEntry + if err := json.Unmarshal(data, &legacy); err != nil { + return err + } + + f.Mode = legacy.Mode + f.Type = string(legacy.Type) + f.LinkDestination = legacy.LinkDestination + f.UserID = legacy.UserID + f.GroupID = legacy.GroupID + f.MIMEType = legacy.MIMEType + f.Size = legacy.Size + + return nil +} + +type sbomImportLegacyFileMetadataEntry struct { + Mode int `json:"Mode"` + Type intOrStringFileType `json:"Type"` + LinkDestination string `json:"LinkDestination"` + UserID int `json:"UserID"` + GroupID int `json:"GroupID"` + MIMEType string `json:"MIMEType"` + Size int64 `json:"Size"` +} + type FileLicense struct { Value string `json:"value"` SPDXExpression string `json:"spdxExpression"` @@ -38,3 +81,28 @@ type FileLicenseEvidence struct { Offset int `json:"offset"` Extent int `json:"extent"` } + +type intOrStringFileType string + +func (lt *intOrStringFileType) UnmarshalJSON(data []byte) error { + var str string + if err := json.Unmarshal(data, &str); err == nil { + *lt = intOrStringFileType(str) + return nil + } + + var num stereoFile.Type + if err := json.Unmarshal(data, &num); err != nil { + return fmt.Errorf("file.Type must be either string or int, got: %s", string(data)) + } + + *lt = intOrStringFileType(num.String()) + return nil +} + +func convertFileModeToBase8(rawMode int) int { + octalStr := fmt.Sprintf("%o", rawMode) + // we don't need to check that this is a valid octal string since the input is always an integer + result, _ := strconv.Atoi(octalStr) + return result +} diff --git a/syft/format/syftjson/model/file_test.go b/syft/format/syftjson/model/file_test.go new file mode 100644 index 000000000..e2aee3ed3 --- /dev/null +++ b/syft/format/syftjson/model/file_test.go @@ -0,0 +1,277 @@ +package model + +import ( + "testing" + + "github.com/google/go-cmp/cmp" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func Test_FileMetadataEntry_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + jsonData []byte + expected FileMetadataEntry + }{ + { + name: "unmarshal current format", + jsonData: []byte(`{ + "mode": 644, + "type": "RegularFile", + "linkDestination": "/usr/bin/python3", + "userID": 1000, + "groupID": 1000, + "mimeType": "text/plain", + "size": 10174 + }`), + expected: FileMetadataEntry{ + Mode: 644, + Type: "RegularFile", + LinkDestination: "/usr/bin/python3", + UserID: 1000, + GroupID: 1000, + MIMEType: "text/plain", + Size: 10174, + }, + }, + { + name: "unmarshal legacy sbom import format", + jsonData: []byte(`{ + "Mode": 644, + "Type": "RegularFile", + "LinkDestination": "/usr/bin/python3", + "UserID": 1000, + "GroupID": 1000, + "MIMEType": "text/plain", + "Size": 10174 + }`), + expected: FileMetadataEntry{ + Mode: 644, + Type: "RegularFile", + LinkDestination: "/usr/bin/python3", + UserID: 1000, + GroupID: 1000, + MIMEType: "text/plain", + Size: 10174, + }, + }, + { + name: "unmarshal legacy sbom import format - integer type", + jsonData: []byte(`{ + "Mode": 644, + "Type": 0, + "LinkDestination": "/usr/bin/python3", + "UserID": 1000, + "GroupID": 1000, + "MIMEType": "text/plain", + "Size": 10174 + }`), + expected: FileMetadataEntry{ + Mode: 644, + Type: "RegularFile", + LinkDestination: "/usr/bin/python3", + UserID: 1000, + GroupID: 1000, + MIMEType: "text/plain", + Size: 10174, + }, + }, + { + name: "unmarshal minimal current format", + jsonData: []byte(`{ + "mode": 0, + "type": "RegularFile", + "userID": 0, + "groupID": 0, + "size": 0 + }`), + expected: FileMetadataEntry{ + Type: "RegularFile", + }, + }, + { + name: "unmarshal minimal legacy format", + jsonData: []byte(`{ + "Mode": 0, + "Type": "RegularFile", + "UserID": 0, + "GroupID": 0, + "Size": 0 + }`), + expected: FileMetadataEntry{ + Type: "RegularFile", + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + var actual FileMetadataEntry + err := actual.UnmarshalJSON(test.jsonData) + require.NoError(t, err) + + if diff := cmp.Diff(test.expected, actual); diff != "" { + t.Errorf("FileMetadataEntry mismatch (-expected +actual):\n%s", diff) + } + }) + } +} + +func Test_intOrStringFileType_UnmarshalJSON(t *testing.T) { + tests := []struct { + name string + jsonData []byte + expected string + wantErr require.ErrorAssertionFunc + }{ + // string inputs - should pass through unchanged + { + name: "string RegularFile", + jsonData: []byte(`"RegularFile"`), + expected: "RegularFile", + }, + { + name: "string HardLink", + jsonData: []byte(`"HardLink"`), + expected: "HardLink", + }, + { + name: "string Directory", + jsonData: []byte(`"Directory"`), + expected: "Directory", + }, + { + name: "string custom value", + jsonData: []byte(`"CustomFileType"`), + expected: "CustomFileType", + }, + // integer inputs - should convert to string representation + { + name: "int 0 (TypeRegular)", + jsonData: []byte(`0`), + expected: "RegularFile", + }, + { + name: "int 1 (TypeHardLink)", + jsonData: []byte(`1`), + expected: "HardLink", + }, + { + name: "int 2 (TypeSymLink)", + jsonData: []byte(`2`), + expected: "SymbolicLink", + }, + { + name: "int 3 (TypeCharacterDevice)", + jsonData: []byte(`3`), + expected: "CharacterDevice", + }, + { + name: "int 4 (TypeBlockDevice)", + jsonData: []byte(`4`), + expected: "BlockDevice", + }, + { + name: "int 5 (TypeDirectory)", + jsonData: []byte(`5`), + expected: "Directory", + }, + { + name: "int 6 (TypeFIFO)", + jsonData: []byte(`6`), + expected: "FIFONode", + }, + { + name: "int 7 (TypeSocket)", + jsonData: []byte(`7`), + expected: "Socket", + }, + { + name: "int 8 (TypeIrregular)", + jsonData: []byte(`8`), + expected: "IrregularFile", + }, + { + name: "unknown int", + jsonData: []byte(`99`), + expected: "Unknown", + }, + { + name: "negative int", + jsonData: []byte(`-1`), + expected: "Unknown", + }, + { + name: "null value", + jsonData: []byte(`null`), + }, + { + name: "invalid JSON", + jsonData: []byte(`{`), + wantErr: require.Error, + }, + { + name: "boolean value", + jsonData: []byte(`true`), + wantErr: require.Error, + }, + { + name: "array value", + jsonData: []byte(`[]`), + wantErr: require.Error, + }, + { + name: "object value", + jsonData: []byte(`{}`), + wantErr: require.Error, + }, + { + name: "float value", + jsonData: []byte(`1.5`), + wantErr: require.Error, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.wantErr == nil { + test.wantErr = require.NoError + } + var ft intOrStringFileType + err := ft.UnmarshalJSON(test.jsonData) + test.wantErr(t, err) + if err != nil { + return + } + assert.Equal(t, test.expected, string(ft)) + }) + } +} + +func Test_convertFileModeToBase8(t *testing.T) { + tests := []struct { + name string + input int + expected int + }{ + { + name: "no permissions", + input: 0, + expected: 0, + }, + { + name: "symlink + rwxrwxrwx", + input: 134218239, + expected: 1000000777, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := convertFileModeToBase8(tt.input) + + require.Equal(t, tt.expected, actual) + }) + } +} diff --git a/syft/format/syftjson/to_syft_model.go b/syft/format/syftjson/to_syft_model.go index 503f9ba45..143a639a6 100644 --- a/syft/format/syftjson/to_syft_model.go +++ b/syft/format/syftjson/to_syft_model.go @@ -144,13 +144,12 @@ func toSyftFiles(files []model.File) sbom.Artifacts { } func safeFileModeConvert(val int) (fs.FileMode, error) { - if val < math.MinInt32 || val > math.MaxInt32 { - // Value is out of the range that int32 can represent + mode, err := strconv.ParseInt(strconv.Itoa(val), 8, 64) + if mode < 0 || mode > math.MaxUint32 { + // value is out of the range that int32 can represent return 0, fmt.Errorf("value %d is out of the range that int32 can represent", val) } - // Safe to convert to os.FileMode - mode, err := strconv.ParseInt(strconv.Itoa(val), 8, 64) if err != nil { return 0, err } diff --git a/syft/format/syftjson/to_syft_model_test.go b/syft/format/syftjson/to_syft_model_test.go index babba6345..1c84fba4a 100644 --- a/syft/format/syftjson/to_syft_model_test.go +++ b/syft/format/syftjson/to_syft_model_test.go @@ -491,6 +491,12 @@ func Test_safeFileModeConvert(t *testing.T) { want: os.FileMode(511), // 777 in octal equals 511 in decimal wantErr: false, }, + { + name: "valid perm with symlink type", + val: 1000000777, // symlink + rwxrwxrwx + want: os.FileMode(0o1000000777), // 134218239 + wantErr: false, + }, { name: "outside int32 high", val: int(math.MaxInt32) + 1,