diff --git a/services/packages/cleanup/cleanup_sha256_test.go b/services/packages/cleanup/cleanup_sha256_test.go new file mode 100644 index 0000000000..6d7cc47d3e --- /dev/null +++ b/services/packages/cleanup/cleanup_sha256_test.go @@ -0,0 +1,116 @@ +// Copyright 2024 The Forgejo Authors. +// SPDX-License-Identifier: GPL-3.0-or-later + +package container + +import ( + "testing" + "time" + + "code.gitea.io/gitea/models/db" + "code.gitea.io/gitea/models/packages" + "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" + container_module "code.gitea.io/gitea/modules/packages/container" + "code.gitea.io/gitea/modules/test" + "code.gitea.io/gitea/modules/timeutil" + container_service "code.gitea.io/gitea/services/packages/container" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestCleanupSHA256(t *testing.T) { + require.NoError(t, unittest.PrepareTestDatabase()) + defer test.MockVariableValue(&container_service.SHA256BatchSize, 1)() + + ctx := db.DefaultContext + + createContainer := func(t *testing.T, name, version, digest string, created timeutil.TimeStamp) { + t.Helper() + + ownerID := int64(2001) + + p := packages.Package{ + OwnerID: ownerID, + LowerName: name, + Type: packages.TypeContainer, + } + _, err := db.GetEngine(ctx).Insert(&p) + // package_version").Where("version = ?", multiTag).Update(&packages_model.PackageVersion{MetadataJSON: `corrupted "manifests":[{ bad`}) + require.NoError(t, err) + + var metadata string + if digest != "" { + m := container_module.Metadata{ + Manifests: []*container_module.Manifest{ + { + Digest: digest, + }, + }, + } + mt, err := json.Marshal(m) + require.NoError(t, err) + metadata = string(mt) + } + v := packages.PackageVersion{ + PackageID: p.ID, + LowerVersion: version, + MetadataJSON: metadata, + CreatedUnix: created, + } + _, err = db.GetEngine(ctx).NoAutoTime().Insert(&v) + require.NoError(t, err) + } + + cleanupAndCheckLogs := func(t *testing.T, olderThan time.Duration, expected ...string) { + t.Helper() + logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE) + logChecker.Filter(expected...) + logChecker.StopMark(container_service.SHA256LogFinish) + defer cleanup() + + require.NoError(t, CleanupExpiredData(ctx, olderThan)) + + logFiltered, logStopped := logChecker.Check(5 * time.Second) + assert.True(t, logStopped) + filtered := make([]bool, 0, len(expected)) + for range expected { + filtered = append(filtered, true) + } + assert.EqualValues(t, filtered, logFiltered, expected) + } + + ancient := 1 * time.Hour + + t.Run("no packages, cleanup nothing", func(t *testing.T) { + cleanupAndCheckLogs(t, ancient, "Nothing to cleanup") + }) + + orphan := "orphan" + createdLongAgo := timeutil.TimeStamp(time.Now().Add(-(ancient * 2)).Unix()) + createdRecently := timeutil.TimeStamp(time.Now().Add(-(ancient / 2)).Unix()) + + t.Run("an orphaned package created a long time ago is removed", func(t *testing.T) { + createContainer(t, orphan, "sha256:"+orphan, "", createdLongAgo) + cleanupAndCheckLogs(t, ancient, "Removing 1 entries from `package_version`") + cleanupAndCheckLogs(t, ancient, "Nothing to cleanup") + }) + + t.Run("a newly created orphaned package is not cleaned up", func(t *testing.T) { + createContainer(t, orphan, "sha256:"+orphan, "", createdRecently) + cleanupAndCheckLogs(t, ancient, "1 out of 1 container image(s) are not deleted because they were created less than") + cleanupAndCheckLogs(t, 0, "Removing 1 entries from `package_version`") + cleanupAndCheckLogs(t, 0, "Nothing to cleanup") + }) + + t.Run("a referenced package is not removed", func(t *testing.T) { + referenced := "referenced" + digest := "sha256:" + referenced + createContainer(t, referenced, digest, "", createdRecently) + index := "index" + createContainer(t, index, index, digest, createdRecently) + cleanupAndCheckLogs(t, ancient, "Nothing to cleanup") + }) +} diff --git a/services/packages/cleanup/main_test.go b/services/packages/cleanup/main_test.go new file mode 100644 index 0000000000..ded3d76c83 --- /dev/null +++ b/services/packages/cleanup/main_test.go @@ -0,0 +1,14 @@ +// Copyright 2024 The Forgejo Authors. +// SPDX-License-Identifier: GPL-3.0-or-later + +package container + +import ( + "testing" + + "code.gitea.io/gitea/models/unittest" +) + +func TestMain(m *testing.M) { + unittest.MainTest(m) +} diff --git a/services/packages/container/cleanup_sha256.go b/services/packages/container/cleanup_sha256.go index 558aea3a55..16afc74b18 100644 --- a/services/packages/container/cleanup_sha256.go +++ b/services/packages/container/cleanup_sha256.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" container_module "code.gitea.io/gitea/modules/packages/container" + "code.gitea.io/gitea/modules/timeutil" ) var ( @@ -37,18 +38,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { defer committer.Close() foundAtLeastOneSHA256 := false - shaToVersionID := make(map[string]int64, 100) + type packageVersion struct { + id int64 + created timeutil.TimeStamp + } + shaToPackageVersion := make(map[string]packageVersion, 100) knownSHA := make(map[string]any, 100) + // compute before making the inventory to not race against ongoing + // image creations + old := timeutil.TimeStamp(time.Now().Add(-olderThan).Unix()) + log.Debug("Look for all package_version.version that start with sha256:") - old := time.Now().Add(-olderThan).Unix() - // Iterate over all container versions in ascending order and store - // in shaToVersionID all versions with a sha256: prefix. If an index + // in shaToPackageVersion all versions with a sha256: prefix. If an index // manifest is found, the sha256: digest it references are removed - // from shaToVersionID. If the sha256: digest found in an index - // manifest is not already in shaToVersionID, it is stored in + // from shaToPackageVersion. If the sha256: digest found in an index + // manifest is not already in shaToPackageVersion, it is stored in // knownSHA to be dealt with later. // // Although it is theoretically possible that a sha256: is uploaded @@ -56,16 +63,16 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { // normal order of operations. First the sha256: version is uploaded // and then the index manifest. When the iteration completes, // knownSHA will therefore be empty most of the time and - // shaToVersionID will only contain unreferenced sha256: versions. + // shaToPackageVersion will only contain unreferenced sha256: versions. if err := db.GetEngine(ctx). - Select("`package_version`.`id`, `package_version`.`lower_version`, `package_version`.`metadata_json`"). + Select("`package_version`.`id`, `package_version`.`created_unix`, `package_version`.`lower_version`, `package_version`.`metadata_json`"). Join("INNER", "`package`", "`package`.`id` = `package_version`.`package_id`"). - Where("`package`.`type` = ? AND `package_version`.`created_unix` < ?", packages.TypeContainer, old). + Where("`package`.`type` = ?", packages.TypeContainer). OrderBy("`package_version`.`id` ASC"). Iterate(new(packages.PackageVersion), func(_ int, bean any) error { v := bean.(*packages.PackageVersion) if strings.HasPrefix(v.LowerVersion, "sha256:") { - shaToVersionID[v.LowerVersion] = v.ID + shaToPackageVersion[v.LowerVersion] = packageVersion{id: v.ID, created: v.CreatedUnix} foundAtLeastOneSHA256 = true } else if strings.Contains(v.MetadataJSON, `"manifests":[{`) { var metadata container_module.Metadata @@ -74,8 +81,8 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { return nil } for _, manifest := range metadata.Manifests { - if _, ok := shaToVersionID[manifest.Digest]; ok { - delete(shaToVersionID, manifest.Digest) + if _, ok := shaToPackageVersion[manifest.Digest]; ok { + delete(shaToPackageVersion, manifest.Digest) } else { knownSHA[manifest.Digest] = true } @@ -87,10 +94,10 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { } for sha := range knownSHA { - delete(shaToVersionID, sha) + delete(shaToPackageVersion, sha) } - if len(shaToVersionID) == 0 { + if len(shaToPackageVersion) == 0 { if foundAtLeastOneSHA256 { log.Debug("All container images with a version matching sha256:* are referenced by an index manifest") } else { @@ -100,15 +107,24 @@ func cleanupSHA256(outerCtx context.Context, olderThan time.Duration) error { return nil } - found := len(shaToVersionID) + found := len(shaToPackageVersion) log.Warn("%d container image(s) with a version matching sha256:* are not referenced by an index manifest", found) log.Debug("Deleting unreferenced image versions from `package_version`, `package_file` and `package_property` (%d at a time)", SHA256BatchSize) packageVersionIDs := make([]int64, 0, SHA256BatchSize) - for _, id := range shaToVersionID { - packageVersionIDs = append(packageVersionIDs, id) + tooYoung := 0 + for _, p := range shaToPackageVersion { + if p.created < old { + packageVersionIDs = append(packageVersionIDs, p.id) + } else { + tooYoung++ + } + } + + if tooYoung > 0 { + log.Warn("%d out of %d container image(s) are not deleted because they were created less than %v ago", tooYoung, found, olderThan) } for len(packageVersionIDs) > 0 {