fix: referenced sha256:* container images may be deleted
The inventory of the sha256:* images and the manifest index that reference them is incomplete because it does not take into account any image older than the expiration limit. As a result some sha256:* will be considered orphaned although they are referenced from a manifest index that was created more recently than the expiration limit. There must not be any filtering based on the creation time when building the inventory. The expiration limit must only be taken into account when deleting orphaned images: those that are more recent than the expiration limit must not be deleted. This limit is specially important because it protects against a race between a cleanup task and an ongoing mirroring task. A mirroring task (such as skopeo sync) will first upload sha256:* images and then create the corresponding manifest index. If a cleanup races against it, the sha256:* images that are not yet referenced will be deleted without skopeo noticing and the published index manifest that happens at a later time will contain references to non-existent images.
This commit is contained in:
parent
b2aa644859
commit
0a5fd7fdb8
3 changed files with 163 additions and 17 deletions
116
services/packages/cleanup/cleanup_sha256_test.go
Normal file
116
services/packages/cleanup/cleanup_sha256_test.go
Normal file
|
@ -0,0 +1,116 @@
|
|||
// Copyright 2024 The Forgejo Authors.
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
package container
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/models/db"
|
||||
"code.gitea.io/gitea/models/packages"
|
||||
"code.gitea.io/gitea/models/unittest"
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
container_module "code.gitea.io/gitea/modules/packages/container"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
container_service "code.gitea.io/gitea/services/packages/container"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCleanupSHA256(t *testing.T) {
|
||||
require.NoError(t, unittest.PrepareTestDatabase())
|
||||
defer test.MockVariableValue(&container_service.SHA256BatchSize, 1)()
|
||||
|
||||
ctx := db.DefaultContext
|
||||
|
||||
createContainer := func(t *testing.T, name, version, digest string, created timeutil.TimeStamp) {
|
||||
t.Helper()
|
||||
|
||||
ownerID := int64(2001)
|
||||
|
||||
p := packages.Package{
|
||||
OwnerID: ownerID,
|
||||
LowerName: name,
|
||||
Type: packages.TypeContainer,
|
||||
}
|
||||
_, err := db.GetEngine(ctx).Insert(&p)
|
||||
// package_version").Where("version = ?", multiTag).Update(&packages_model.PackageVersion{MetadataJSON: `corrupted "manifests":[{ bad`})
|
||||
require.NoError(t, err)
|
||||
|
||||
var metadata string
|
||||
if digest != "" {
|
||||
m := container_module.Metadata{
|
||||
Manifests: []*container_module.Manifest{
|
||||
{
|
||||
Digest: digest,
|
||||
},
|
||||
},
|
||||
}
|
||||
mt, err := json.Marshal(m)
|
||||
require.NoError(t, err)
|
||||
metadata = string(mt)
|
||||
}
|
||||
v := packages.PackageVersion{
|
||||
PackageID: p.ID,
|
||||
LowerVersion: version,
|
||||
MetadataJSON: metadata,
|
||||
CreatedUnix: created,
|
||||
}
|
||||
_, err = db.GetEngine(ctx).NoAutoTime().Insert(&v)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
cleanupAndCheckLogs := func(t *testing.T, olderThan time.Duration, expected ...string) {
|
||||
t.Helper()
|
||||
logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE)
|
||||
logChecker.Filter(expected...)
|
||||
logChecker.StopMark(container_service.SHA256LogFinish)
|
||||
defer cleanup()
|
||||
|
||||
require.NoError(t, CleanupExpiredData(ctx, olderThan))
|
||||
|
||||
logFiltered, logStopped := logChecker.Check(5 * time.Second)
|
||||
assert.True(t, logStopped)
|
||||
filtered := make([]bool, 0, len(expected))
|
||||
for range expected {
|
||||
filtered = append(filtered, true)
|
||||
}
|
||||
assert.EqualValues(t, filtered, logFiltered, expected)
|
||||
}
|
||||
|
||||
ancient := 1 * time.Hour
|
||||
|
||||
t.Run("no packages, cleanup nothing", func(t *testing.T) {
|
||||
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
|
||||
})
|
||||
|
||||
orphan := "orphan"
|
||||
createdLongAgo := timeutil.TimeStamp(time.Now().Add(-(ancient * 2)).Unix())
|
||||
createdRecently := timeutil.TimeStamp(time.Now().Add(-(ancient / 2)).Unix())
|
||||
|
||||
t.Run("an orphaned package created a long time ago is removed", func(t *testing.T) {
|
||||
createContainer(t, orphan, "sha256:"+orphan, "", createdLongAgo)
|
||||
cleanupAndCheckLogs(t, ancient, "Removing 1 entries from `package_version`")
|
||||
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
|
||||
})
|
||||
|
||||
t.Run("a newly created orphaned package is not cleaned up", func(t *testing.T) {
|
||||
createContainer(t, orphan, "sha256:"+orphan, "", createdRecently)
|
||||
cleanupAndCheckLogs(t, ancient, "1 out of 1 container image(s) are not deleted because they were created less than")
|
||||
cleanupAndCheckLogs(t, 0, "Removing 1 entries from `package_version`")
|
||||
cleanupAndCheckLogs(t, 0, "Nothing to cleanup")
|
||||
})
|
||||
|
||||
t.Run("a referenced package is not removed", func(t *testing.T) {
|
||||
referenced := "referenced"
|
||||
digest := "sha256:" + referenced
|
||||
createContainer(t, referenced, digest, "", createdRecently)
|
||||
index := "index"
|
||||
createContainer(t, index, index, digest, createdRecently)
|
||||
cleanupAndCheckLogs(t, ancient, "Nothing to cleanup")
|
||||
})
|
||||
}
|
14
services/packages/cleanup/main_test.go
Normal file
14
services/packages/cleanup/main_test.go
Normal file
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2024 The Forgejo Authors.
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
package container
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"code.gitea.io/gitea/models/unittest"
|
||||
)
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
unittest.MainTest(m)
|
||||
}
|
|
@ -13,6 +13,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
container_module "code.gitea.io/gitea/modules/packages/container"
|
||||
"code.gitea.io/gitea/modules/timeutil"
|
||||
)
|
||||
|
||||
var (
|
||||
|
@ -37,18 +38,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
|
|||
defer committer.Close()
|
||||
|
||||
foundAtLeastOneSHA256 := false
|
||||
shaToVersionID := make(map[string]int64, 100)
|
||||
type packageVersion struct {
|
||||
id int64
|
||||
created timeutil.TimeStamp
|
||||
}
|
||||
shaToPackageVersion := make(map[string]packageVersion, 100)
|
||||
knownSHA := make(map[string]any, 100)
|
||||
|
||||
// compute before making the inventory to not race against ongoing
|
||||
// image creations
|
||||
old := timeutil.TimeStamp(time.Now().Add(-olderThan).Unix())
|
||||
|
||||
log.Debug("Look for all package_version.version that start with sha256:")
|
||||
|
||||
old := time.Now().Add(-olderThan).Unix()
|
||||
|
||||
// Iterate over all container versions in ascending order and store
|
||||
// in shaToVersionID all versions with a sha256: prefix. If an index
|
||||
// in shaToPackageVersion all versions with a sha256: prefix. If an index
|
||||
// manifest is found, the sha256: digest it references are removed
|
||||
// from shaToVersionID. If the sha256: digest found in an index
|
||||
// manifest is not already in shaToVersionID, it is stored in
|
||||
// from shaToPackageVersion. If the sha256: digest found in an index
|
||||
// manifest is not already in shaToPackageVersion, it is stored in
|
||||
// knownSHA to be dealt with later.
|
||||
//
|
||||
// Although it is theoretically possible that a sha256: is uploaded
|
||||
|
@ -56,16 +63,16 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
|
|||
// normal order of operations. First the sha256: version is uploaded
|
||||
// and then the index manifest. When the iteration completes,
|
||||
// knownSHA will therefore be empty most of the time and
|
||||
// shaToVersionID will only contain unreferenced sha256: versions.
|
||||
// shaToPackageVersion will only contain unreferenced sha256: versions.
|
||||
if err := db.GetEngine(ctx).
|
||||
Select("`package_version`.`id`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
|
||||
Select("`package_version`.`id`, `package_version`.`created_unix`, `package_version`.`lower_version`, `package_version`.`metadata_json`").
|
||||
Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`").
|
||||
Where("`package`.`type` = ? AND `package_version`.`created_unix` < ?", packages.TypeContainer, old).
|
||||
Where("`package`.`type` = ?", packages.TypeContainer).
|
||||
OrderBy("`package_version`.`id` ASC").
|
||||
Iterate(new(packages.PackageVersion), func(_ int, bean any) error {
|
||||
v := bean.(*packages.PackageVersion)
|
||||
if strings.HasPrefix(v.LowerVersion, "sha256:") {
|
||||
shaToVersionID[v.LowerVersion] = v.ID
|
||||
shaToPackageVersion[v.LowerVersion] = packageVersion{id: v.ID, created: v.CreatedUnix}
|
||||
foundAtLeastOneSHA256 = true
|
||||
} else if strings.Contains(v.MetadataJSON, `"manifests":[{`) {
|
||||
var metadata container_module.Metadata
|
||||
|
@ -74,8 +81,8 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
|
|||
return nil
|
||||
}
|
||||
for _, manifest := range metadata.Manifests {
|
||||
if _, ok := shaToVersionID[manifest.Digest]; ok {
|
||||
delete(shaToVersionID, manifest.Digest)
|
||||
if _, ok := shaToPackageVersion[manifest.Digest]; ok {
|
||||
delete(shaToPackageVersion, manifest.Digest)
|
||||
} else {
|
||||
knownSHA[manifest.Digest] = true
|
||||
}
|
||||
|
@ -87,10 +94,10 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
|
|||
}
|
||||
|
||||
for sha := range knownSHA {
|
||||
delete(shaToVersionID, sha)
|
||||
delete(shaToPackageVersion, sha)
|
||||
}
|
||||
|
||||
if len(shaToVersionID) == 0 {
|
||||
if len(shaToPackageVersion) == 0 {
|
||||
if foundAtLeastOneSHA256 {
|
||||
log.Debug("All container images with a version matching sha256:* are referenced by an index manifest")
|
||||
} else {
|
||||
|
@ -100,15 +107,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
found := len(shaToVersionID)
|
||||
found := len(shaToPackageVersion)
|
||||
|
||||
log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found)
|
||||
|
||||
log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize)
|
||||
|
||||
packageVersionIDs := make([]int64, 0, SHA256BatchSize)
|
||||
for _, id := range shaToVersionID {
|
||||
packageVersionIDs = append(packageVersionIDs, id)
|
||||
tooYoung := 0
|
||||
for _, p := range shaToPackageVersion {
|
||||
if p.created < old {
|
||||
packageVersionIDs = append(packageVersionIDs, p.id)
|
||||
} else {
|
||||
tooYoung++
|
||||
}
|
||||
}
|
||||
|
||||
if tooYoung > 0 {
|
||||
log.Warn("%d out of %d container image(s) are not deleted because they were created less than %v ago", tooYoung, found, olderThan)
|
||||
}
|
||||
|
||||
for len(packageVersionIDs) > 0 {
|
||||
|
|
Loading…
Add table
Reference in a new issue