diff --git a/internal/file/digest.go b/internal/file/digest.go index c9aa1552c..75e87dffd 100644 --- a/internal/file/digest.go +++ b/internal/file/digest.go @@ -24,6 +24,16 @@ func supportedHashAlgorithms() []crypto.Hash { } } +// Pre-computed hash values for empty files +const ( + emptyMD5 = "d41d8cd98f00b204e9800998ecf8427e" + emptySHA1 = "da39a3ee5e6b4b0d3255bfef95601890afd80709" + emptySHA224 = "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f" + emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + emptySHA384 = "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b" + emptySHA512 = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e" +) + func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) { hashes = NormalizeHashes(hashes) // create a set of hasher objects tied together with a single writer to feed content into @@ -39,14 +49,40 @@ func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []cryp return nil, err } + // For empty files, use pre-computed constants for better performance if size == 0 { - return make([]file.Digest, 0), nil + result := make([]file.Digest, len(hashes)) + for idx, hashObj := range hashes { + var value string + switch hashObj { + case crypto.MD5: + value = emptyMD5 + case crypto.SHA1: + value = emptySHA1 + case crypto.SHA224: + value = emptySHA224 + case crypto.SHA256: + value = emptySHA256 + case crypto.SHA384: + value = emptySHA384 + case crypto.SHA512: + value = emptySHA512 + default: + // Fallback to calculated hash for unsupported algorithms + value = fmt.Sprintf("%+x", hashers[idx].Sum(nil)) + } + result[idx] = file.Digest{ + Algorithm: CleanDigestAlgorithmName(hashObj.String()), + Value: value, + } + } + return result, nil } result := make([]file.Digest, len(hashes)) - // only capture digests when there is content. It is important to do this based on SIZE and not - // FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only - // file type but a body is still allowed. + // Capture digests for all files with content. It is important to base this on actual + // content SIZE rather than FILE TYPE, as it is possible for a tar to be crafted with + // a header-only file type but a body is still allowed. for idx, hasher := range hashers { result[idx] = file.Digest{ Algorithm: CleanDigestAlgorithmName(hashes[idx].String()), diff --git a/internal/file/digest_test.go b/internal/file/digest_test.go index 4e793743f..87e45af40 100644 --- a/internal/file/digest_test.go +++ b/internal/file/digest_test.go @@ -72,6 +72,37 @@ func TestNewDigestsFromFile(t *testing.T) { }, }, }, + { + name: "empty file has valid checksums", + fixture: "test-fixtures/empty.txt", + hashes: supportedHashAlgorithms(), + want: []file.Digest{ + { + Algorithm: "md5", + Value: "d41d8cd98f00b204e9800998ecf8427e", + }, + { + Algorithm: "sha1", + Value: "da39a3ee5e6b4b0d3255bfef95601890afd80709", + }, + { + Algorithm: "sha224", + Value: "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f", + }, + { + Algorithm: "sha256", + Value: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + Algorithm: "sha384", + Value: "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b", + }, + { + Algorithm: "sha512", + Value: "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e", + }, + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/internal/file/test-fixtures/empty.txt b/internal/file/test-fixtures/empty.txt new file mode 100644 index 000000000..e69de29bb diff --git a/syft/file/cataloger/filedigest/cataloger_test.go b/syft/file/cataloger/filedigest/cataloger_test.go index 5fbdf9bd1..5b83fb37e 100644 --- a/syft/file/cataloger/filedigest/cataloger_test.go +++ b/syft/file/cataloger/filedigest/cataloger_test.go @@ -34,12 +34,7 @@ func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Has t.Fatalf("could not read %q : %+v", f, err) } - if len(b) == 0 { - // we don't keep digests for empty files - digests[file.NewLocation(f).Coordinates] = []file.Digest{} - continue - } - + // Calculate digests for all files, including empty files for _, hash := range hashes { h := hash.New() h.Write(b) @@ -65,13 +60,13 @@ func TestDigestsCataloger(t *testing.T) { name: "md5", digests: []crypto.Hash{crypto.MD5}, files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, - expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5), + expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5), }, { name: "md5-sha1-sha256", digests: []crypto.Hash{crypto.MD5, crypto.SHA1, crypto.SHA256}, files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, - expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256), + expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256), }, }