fix: use correct hashes for empty files (#4620)

Signed-off-by: Paweł Pałucha <pawel.palucha@chainguard.dev>
This commit is contained in:
Paweł Pałucha 2026-02-24 15:52:29 +01:00 committed by GitHub
parent e9e7e20cc8
commit db76d85d51
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 74 additions and 12 deletions

View File

@ -24,6 +24,16 @@ func supportedHashAlgorithms() []crypto.Hash {
} }
} }
// Pre-computed hash values for empty files
const (
emptyMD5 = "d41d8cd98f00b204e9800998ecf8427e"
emptySHA1 = "da39a3ee5e6b4b0d3255bfef95601890afd80709"
emptySHA224 = "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f"
emptySHA256 = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
emptySHA384 = "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b"
emptySHA512 = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"
)
func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) { func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []crypto.Hash) ([]file.Digest, error) {
hashes = NormalizeHashes(hashes) hashes = NormalizeHashes(hashes)
// create a set of hasher objects tied together with a single writer to feed content into // create a set of hasher objects tied together with a single writer to feed content into
@ -39,14 +49,40 @@ func NewDigestsFromFile(ctx context.Context, closer io.ReadCloser, hashes []cryp
return nil, err return nil, err
} }
// For empty files, use pre-computed constants for better performance
if size == 0 { if size == 0 {
return make([]file.Digest, 0), nil result := make([]file.Digest, len(hashes))
for idx, hashObj := range hashes {
var value string
switch hashObj {
case crypto.MD5:
value = emptyMD5
case crypto.SHA1:
value = emptySHA1
case crypto.SHA224:
value = emptySHA224
case crypto.SHA256:
value = emptySHA256
case crypto.SHA384:
value = emptySHA384
case crypto.SHA512:
value = emptySHA512
default:
// Fallback to calculated hash for unsupported algorithms
value = fmt.Sprintf("%+x", hashers[idx].Sum(nil))
}
result[idx] = file.Digest{
Algorithm: CleanDigestAlgorithmName(hashObj.String()),
Value: value,
}
}
return result, nil
} }
result := make([]file.Digest, len(hashes)) result := make([]file.Digest, len(hashes))
// only capture digests when there is content. It is important to do this based on SIZE and not // Capture digests for all files with content. It is important to base this on actual
// FILE TYPE. The reasoning is that it is possible for a tar to be crafted with a header-only // content SIZE rather than FILE TYPE, as it is possible for a tar to be crafted with
// file type but a body is still allowed. // a header-only file type but a body is still allowed.
for idx, hasher := range hashers { for idx, hasher := range hashers {
result[idx] = file.Digest{ result[idx] = file.Digest{
Algorithm: CleanDigestAlgorithmName(hashes[idx].String()), Algorithm: CleanDigestAlgorithmName(hashes[idx].String()),

View File

@ -72,6 +72,37 @@ func TestNewDigestsFromFile(t *testing.T) {
}, },
}, },
}, },
{
name: "empty file has valid checksums",
fixture: "test-fixtures/empty.txt",
hashes: supportedHashAlgorithms(),
want: []file.Digest{
{
Algorithm: "md5",
Value: "d41d8cd98f00b204e9800998ecf8427e",
},
{
Algorithm: "sha1",
Value: "da39a3ee5e6b4b0d3255bfef95601890afd80709",
},
{
Algorithm: "sha224",
Value: "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f",
},
{
Algorithm: "sha256",
Value: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
},
{
Algorithm: "sha384",
Value: "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b",
},
{
Algorithm: "sha512",
Value: "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
},
},
},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {

View File

View File

@ -34,12 +34,7 @@ func testDigests(t testing.TB, root string, files []string, hashes ...crypto.Has
t.Fatalf("could not read %q : %+v", f, err) t.Fatalf("could not read %q : %+v", f, err)
} }
if len(b) == 0 { // Calculate digests for all files, including empty files
// we don't keep digests for empty files
digests[file.NewLocation(f).Coordinates] = []file.Digest{}
continue
}
for _, hash := range hashes { for _, hash := range hashes {
h := hash.New() h := hash.New()
h.Write(b) h.Write(b)
@ -65,13 +60,13 @@ func TestDigestsCataloger(t *testing.T) {
name: "md5", name: "md5",
digests: []crypto.Hash{crypto.MD5}, digests: []crypto.Hash{crypto.MD5},
files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"},
expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5), expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5),
}, },
{ {
name: "md5-sha1-sha256", name: "md5-sha1-sha256",
digests: []crypto.Hash{crypto.MD5, crypto.SHA1, crypto.SHA256}, digests: []crypto.Hash{crypto.MD5, crypto.SHA1, crypto.SHA256},
files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"}, files: []string{"test-fixtures/last/empty/empty", "test-fixtures/last/path.txt"},
expected: testDigests(t, "test-fixtures/last", []string{"path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256), expected: testDigests(t, "test-fixtures/last", []string{"empty/empty", "path.txt"}, crypto.MD5, crypto.SHA1, crypto.SHA256),
}, },
} }