Do not update PRs based on events that happened before they existed

* Split TestPullRequest out of AddTestPullRequestTask
* A Created field is added to the Issue table
* The Created field is set to the time (with nano resolution) on creation
* Record the nano time repo_module.PushUpdateOptions is created by the hook
* The decision to update a pull request created before a commit was
  pushed is based on the time (with nano resolution) the git hook
  was run and the Created field

It ensures the following happens:

* commit C is pushed
* the git hook queues AddTestPullRequestTask for processing and returns with success
* TestPullRequest is not called yet
* a pull request P with commit C as the head is created
* TestPullRequest runs and ignores P because it was created after the commit was received

When the "created" column is NULL, no verification is done, pull
requests that were created before the column was created in the
database cannot be newer than the latest call to a git hook.

Fixes: https://codeberg.org/forgejo/forgejo/issues/2009
(cherry picked from commit 998a431747)

Conflicts:
	models/forgejo_migrations/migrate.go
	see https://codeberg.org/forgejo/forgejo/pulls/3165#issuecomment-1755941
	services/pull/pull.go
	trivial conflicts
This commit is contained in:
Earl Warren 2024-03-31 15:27:59 +02:00
parent ce8bfa25fb
commit 50822f361e
No known key found for this signature in database
GPG key ID: 0579CB2928A78A00
17 changed files with 418 additions and 91 deletions

View file

@ -0,0 +1,12 @@
-
id: 1001
repo_id: 1
index: 1001
poster_id: 1
name: issue1
content: content for the first issue
is_pull: true
created: 111111111
created_unix: 946684800
updated_unix: 978307200
is_closed: false

View file

@ -0,0 +1,13 @@
-
id: 1001
type: 0 # pull request
status: 2 # mergable
issue_id: 1001
index: 1001
head_repo_id: 1
base_repo_id: 1
head_branch: branchmax
base_branch: master
merge_base: 4a357436d925b5c974181ff12a994538ddc5a269
has_merged: false
flow: 0

View file

@ -58,6 +58,9 @@ var migrations = []*Migration{
NewMigration("Add the `apply_to_admins` column to the `protected_branch` table", forgejo_v1_22.AddApplyToAdminsSetting),
// v9 -> v10
NewMigration("Add pronouns to user", forgejo_v1_22.AddPronounsToUser),
// v11 -> v12
// it is a v7.0 migration backport see https://codeberg.org/forgejo/forgejo/pulls/3165#issuecomment-1755941
NewMigration("Add the `created` column to the `issue` table", forgejo_v1_22.AddCreatedToIssue),
}
// GetCurrentDBVersion returns the current Forgejo database version.

View file

@ -0,0 +1,19 @@
// Copyright 2024 The Forgejo Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package v1_22 //nolint
import (
"code.gitea.io/gitea/modules/timeutil"
"xorm.io/xorm"
)
func AddCreatedToIssue(x *xorm.Engine) error {
type Issue struct {
ID int64 `xorm:"pk autoincr"`
Created timeutil.TimeStampNano
}
return x.Sync(&Issue{})
}

View file

@ -124,6 +124,8 @@ type Issue struct {
DeadlineUnix timeutil.TimeStamp `xorm:"INDEX"`
Created timeutil.TimeStampNano
CreatedUnix timeutil.TimeStamp `xorm:"INDEX created"`
UpdatedUnix timeutil.TimeStamp `xorm:"INDEX updated"`
ClosedUnix timeutil.TimeStamp `xorm:"INDEX"`

View file

@ -9,6 +9,14 @@ import (
"code.gitea.io/gitea/models/db"
)
func GetMaxIssueIndexForRepo(ctx context.Context, repoID int64) (int64, error) {
var max int64
if _, err := db.GetEngine(ctx).Select("MAX(`index`)").Table("issue").Where("repo_id=?", repoID).Get(&max); err != nil {
return 0, err
}
return max, nil
}
// RecalculateIssueIndexForRepo create issue_index for repo if not exist and
// update it based on highest index of existing issues assigned to a repo
func RecalculateIssueIndexForRepo(ctx context.Context, repoID int64) error {
@ -18,8 +26,8 @@ func RecalculateIssueIndexForRepo(ctx context.Context, repoID int64) error {
}
defer committer.Close()
var max int64
if _, err = db.GetEngine(ctx).Select(" MAX(`index`)").Table("issue").Where("repo_id=?", repoID).Get(&max); err != nil {
max, err := GetMaxIssueIndexForRepo(ctx, repoID)
if err != nil {
return err
}

View file

@ -0,0 +1,38 @@
// Copyright 2024 The Forgejo Authors
// SPDX-License-Identifier: MIT
package issues_test
import (
"testing"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
"github.com/stretchr/testify/assert"
)
func TestGetMaxIssueIndexForRepo(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 1})
maxPR, err := issues_model.GetMaxIssueIndexForRepo(db.DefaultContext, repo.ID)
assert.NoError(t, err)
issue := testCreateIssue(t, repo.ID, repo.OwnerID, "title1", "content1", false)
assert.Greater(t, issue.Index, maxPR)
maxPR, err = issues_model.GetMaxIssueIndexForRepo(db.DefaultContext, repo.ID)
assert.NoError(t, err)
pull := testCreateIssue(t, repo.ID, repo.OwnerID, "title2", "content2", true)
assert.Greater(t, pull.Index, maxPR)
maxPR, err = issues_model.GetMaxIssueIndexForRepo(db.DefaultContext, repo.ID)
assert.NoError(t, err)
assert.Equal(t, maxPR, pull.Index)
}

View file

@ -325,6 +325,8 @@ func NewIssueWithIndex(ctx context.Context, doer *user_model.User, opts NewIssue
return fmt.Errorf("issue exist")
}
opts.Issue.Created = timeutil.TimeStampNanoNow()
if _, err := e.Insert(opts.Issue); err != nil {
return err
}

View file

@ -47,6 +47,14 @@ func listPullRequestStatement(ctx context.Context, baseRepoID int64, opts *PullR
return sess, nil
}
func GetUnmergedPullRequestsByHeadInfoMax(ctx context.Context, repoID, olderThan int64, branch string) ([]*PullRequest, error) {
prs := make([]*PullRequest, 0, 2)
sess := db.GetEngine(ctx).
Join("INNER", "issue", "issue.id = `pull_request`.issue_id").
Where("`pull_request`.head_repo_id = ? AND `pull_request`.head_branch = ? AND `pull_request`.has_merged = ? AND `issue`.is_closed = ? AND `pull_request`.flow = ? AND (`issue`.`created` IS NULL OR `issue`.`created` <= ?)", repoID, branch, false, false, PullRequestFlowGithub, olderThan)
return prs, sess.Find(&prs)
}
// GetUnmergedPullRequestsByHeadInfo returns all pull requests that are open and has not been merged
func GetUnmergedPullRequestsByHeadInfo(ctx context.Context, repoID int64, branch string) ([]*PullRequest, error) {
prs := make([]*PullRequest, 0, 2)

View file

@ -4,7 +4,9 @@
package issues_test
import (
"fmt"
"testing"
"time"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
@ -12,6 +14,7 @@ import (
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
)
@ -156,6 +159,100 @@ func TestGetUnmergedPullRequestsByHeadInfo(t *testing.T) {
}
}
func TestGetUnmergedPullRequestsByHeadInfoMax(t *testing.T) {
defer tests.AddFixtures("models/fixtures/TestGetUnmergedPullRequestsByHeadInfoMax/")()
assert.NoError(t, unittest.PrepareTestDatabase())
repoID := int64(1)
olderThan := int64(0)
// for NULL created field the olderThan condition is ignored
prs, err := issues_model.GetUnmergedPullRequestsByHeadInfoMax(db.DefaultContext, repoID, olderThan, "branch2")
assert.NoError(t, err)
assert.Equal(t, int64(1), prs[0].HeadRepoID)
// test for when the created field is set
branch := "branchmax"
prs, err = issues_model.GetUnmergedPullRequestsByHeadInfoMax(db.DefaultContext, repoID, olderThan, branch)
assert.NoError(t, err)
assert.Len(t, prs, 0)
olderThan = time.Now().UnixNano()
assert.NoError(t, err)
prs, err = issues_model.GetUnmergedPullRequestsByHeadInfoMax(db.DefaultContext, repoID, olderThan, branch)
assert.NoError(t, err)
assert.Len(t, prs, 1)
for _, pr := range prs {
assert.Equal(t, int64(1), pr.HeadRepoID)
assert.Equal(t, branch, pr.HeadBranch)
}
pr := prs[0]
for _, testCase := range []struct {
table string
field string
id int64
match any
nomatch any
}{
{
table: "issue",
field: "is_closed",
id: pr.IssueID,
match: false,
nomatch: true,
},
{
table: "pull_request",
field: "flow",
id: pr.ID,
match: issues_model.PullRequestFlowGithub,
nomatch: issues_model.PullRequestFlowAGit,
},
{
table: "pull_request",
field: "head_repo_id",
id: pr.ID,
match: pr.HeadRepoID,
nomatch: 0,
},
{
table: "pull_request",
field: "head_branch",
id: pr.ID,
match: pr.HeadBranch,
nomatch: "something else",
},
{
table: "pull_request",
field: "has_merged",
id: pr.ID,
match: false,
nomatch: true,
},
} {
t.Run(testCase.field, func(t *testing.T) {
update := fmt.Sprintf("UPDATE `%s` SET `%s` = ? WHERE `id` = ?", testCase.table, testCase.field)
// expect no match
_, err = db.GetEngine(db.DefaultContext).Exec(update, testCase.nomatch, testCase.id)
assert.NoError(t, err)
prs, err = issues_model.GetUnmergedPullRequestsByHeadInfoMax(db.DefaultContext, repoID, olderThan, branch)
assert.NoError(t, err)
assert.Len(t, prs, 0)
// expect one match
_, err = db.GetEngine(db.DefaultContext).Exec(update, testCase.match, testCase.id)
assert.NoError(t, err)
prs, err = issues_model.GetUnmergedPullRequestsByHeadInfoMax(db.DefaultContext, repoID, olderThan, branch)
assert.NoError(t, err)
assert.Len(t, prs, 1)
// identical to the known PR
assert.Equal(t, pr.ID, prs[0].ID)
})
}
}
func TestGetUnmergedPullRequestsByBaseInfo(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
prs, err := issues_model.GetUnmergedPullRequestsByBaseInfo(db.DefaultContext, 1, "master")

View file

@ -16,6 +16,7 @@ type PushUpdateOptions struct {
RefFullName git.RefName // branch, tag or other name to push
OldCommitID string
NewCommitID string
TimeNano int64
}
// IsNewRef return true if it's a first-time push to a branch, tag or etc.

View file

@ -7,6 +7,7 @@ import (
"fmt"
"net/http"
"strconv"
"time"
git_model "code.gitea.io/gitea/models/git"
issues_model "code.gitea.io/gitea/models/issues"
@ -71,6 +72,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
PusherName: opts.UserName,
RepoUserName: ownerName,
RepoName: repoName,
TimeNano: time.Now().UnixNano(),
}
updates = append(updates, option)
if repo.IsEmpty && (refFullName.BranchName() == "master" || refFullName.BranchName() == "main") {

View file

@ -187,7 +187,7 @@ func Merge(ctx context.Context, pr *issues_model.PullRequest, doer *user_model.U
}
defer func() {
go AddTestPullRequestTask(doer, pr.BaseRepo.ID, pr.BaseBranch, false, "", "")
AddTestPullRequestTask(ctx, doer, pr.BaseRepo.ID, pr.BaseBranch, false, "", "", 0)
}()
pr.MergedCommitID, err = doMergeAndPush(ctx, pr, doer, mergeStyle, expectedHeadCommitID, message)

View file

@ -295,22 +295,36 @@ func checkForInvalidation(ctx context.Context, requests issues_model.PullRequest
// AddTestPullRequestTask adds new test tasks by given head/base repository and head/base branch,
// and generate new patch for testing as needed.
func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string, isSync bool, oldCommitID, newCommitID string) {
log.Trace("AddTestPullRequestTask [head_repo_id: %d, head_branch: %s]: finding pull requests", repoID, branch)
graceful.GetManager().RunWithShutdownContext(func(ctx context.Context) {
func AddTestPullRequestTask(ctx context.Context, doer *user_model.User, repoID int64, branch string, isSync bool, oldCommitID, newCommitID string, timeNano int64) {
log.Trace("AddTestPullRequestTask [head_repo_id: %d, head_branch: %s]: only pull requests created before nano time %d will be considered", repoID, branch, timeNano)
go graceful.GetManager().RunWithShutdownContext(func(ctx context.Context) {
// There is no sensible way to shut this down ":-("
// If you don't let it run all the way then you will lose data
// TODO: graceful: AddTestPullRequestTask needs to become a queue!
// TODO: graceful: TestPullRequest needs to become a queue!
// GetUnmergedPullRequestsByHeadInfo() only return open and unmerged PR.
prs, err := issues_model.GetUnmergedPullRequestsByHeadInfo(ctx, repoID, branch)
TestPullRequest(ctx, doer, repoID, timeNano, branch, isSync, oldCommitID, newCommitID)
})
}
func TestPullRequest(ctx context.Context, doer *user_model.User, repoID, olderThan int64, branch string, isSync bool, oldCommitID, newCommitID string) {
// Only consider PR that are older than olderThan, which is the time at
// which the newCommitID was added to repoID.
//
// * commit C is pushed
// * the git hook queues AddTestPullRequestTask for processing and returns with success
// * TestPullRequest is not called yet
// * a pull request P with commit C as the head is created
// * TestPullRequest runs and ignores P because it was created after the commit was received
//
// In other words, a PR must not be updated based on events that happened before it existed
prs, err := issues_model.GetUnmergedPullRequestsByHeadInfoMax(ctx, repoID, olderThan, branch)
if err != nil {
log.Error("Find pull requests [head_repo_id: %d, head_branch: %s]: %v", repoID, branch, err)
return
}
for _, pr := range prs {
log.Trace("Updating PR[%d]: composing new test task", pr.ID)
log.Trace("Updating PR[id=%d,index=%d]: composing new test task", pr.ID, pr.Index)
if pr.Flow == issues_model.PullRequestFlowGithub {
if err := PushToBaseRepo(ctx, pr); err != nil {
log.Error("PushToBaseRepo: %v", err)
@ -379,7 +393,7 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
}
}
log.Trace("AddTestPullRequestTask [base_repo_id: %d, base_branch: %s]: finding pull requests", repoID, branch)
log.Trace("TestPullRequest [base_repo_id: %d, base_branch: %s]: finding pull requests", repoID, branch)
prs, err = issues_model.GetUnmergedPullRequestsByBaseInfo(ctx, repoID, branch)
if err != nil {
log.Error("Find pull requests [base_repo_id: %d, base_branch: %s]: %v", repoID, branch, err)
@ -401,7 +415,6 @@ func AddTestPullRequestTask(doer *user_model.User, repoID int64, branch string,
}
AddToTaskQueue(ctx, pr)
}
})
}
// checkIfPRContentChanged checks if diff to target branch has changed by push

View file

@ -36,7 +36,7 @@ func Update(ctx context.Context, pr *issues_model.PullRequest, doer *user_model.
if rebase {
defer func() {
go AddTestPullRequestTask(doer, pr.BaseRepo.ID, pr.BaseBranch, false, "", "")
AddTestPullRequestTask(ctx, doer, pr.BaseRepo.ID, pr.BaseBranch, false, "", "", 0)
}()
return updateHeadByRebaseOnToBase(ctx, pr, doer, message)
@ -75,7 +75,7 @@ func Update(ctx context.Context, pr *issues_model.PullRequest, doer *user_model.
_, err = doMergeAndPush(ctx, reversePR, doer, repo_model.MergeStyleMerge, "", message)
defer func() {
go AddTestPullRequestTask(doer, reversePR.HeadRepo.ID, reversePR.HeadBranch, false, "", "")
AddTestPullRequestTask(ctx, doer, reversePR.HeadRepo.ID, reversePR.HeadBranch, false, "", "", 0)
}()
return err

View file

@ -166,7 +166,7 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error {
branch := opts.RefFullName.BranchName()
if !opts.IsDelRef() {
log.Trace("TriggerTask '%s/%s' by %s", repo.Name, branch, pusher.Name)
go pull_service.AddTestPullRequestTask(pusher, repo.ID, branch, true, opts.OldCommitID, opts.NewCommitID)
pull_service.AddTestPullRequestTask(ctx, pusher, repo.ID, branch, true, opts.OldCommitID, opts.NewCommitID, opts.TimeNano)
newCommit, err := gitRepo.GetCommit(opts.NewCommitID)
if err != nil {

View file

@ -0,0 +1,109 @@
// Copyright 2024 The Forgejo Authors
// SPDX-License-Identifier: MIT
package integration
import (
"context"
"testing"
"time"
"code.gitea.io/gitea/models/db"
issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
repo_module "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/test"
"code.gitea.io/gitea/modules/timeutil"
pull_service "code.gitea.io/gitea/services/pull"
repo_service "code.gitea.io/gitea/services/repository"
"code.gitea.io/gitea/tests"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestPullRequestSynchronized(t *testing.T) {
defer tests.PrepareTestEnv(t)()
// unmerged pull request of user2/repo1 from branch2 to master
pull := unittest.AssertExistsAndLoadBean(t, &issues_model.PullRequest{ID: 2})
// tip of tests/gitea-repositories-meta/user2/repo1 branch2
pull.HeadCommitID = "985f0301dba5e7b34be866819cd15ad3d8f508ee"
pull.LoadIssue(db.DefaultContext)
pull.Issue.Created = timeutil.TimeStampNanoNow()
issues_model.UpdateIssueCols(db.DefaultContext, pull.Issue, "created")
require.Equal(t, pull.HeadRepoID, pull.BaseRepoID)
repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: pull.HeadRepoID})
owner := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: repo.OwnerID})
for _, testCase := range []struct {
name string
timeNano int64
expected bool
}{
{
name: "AddTestPullRequestTask process PR",
timeNano: int64(pull.Issue.Created),
expected: true,
},
{
name: "AddTestPullRequestTask skip PR",
timeNano: 0,
expected: false,
},
} {
t.Run(testCase.name, func(t *testing.T) {
logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE)
logChecker.Filter("Updating PR").StopMark("TestPullRequest ")
defer cleanup()
opt := &repo_module.PushUpdateOptions{
PusherID: owner.ID,
PusherName: owner.Name,
RepoUserName: owner.Name,
RepoName: repo.Name,
RefFullName: git.RefName("refs/heads/branch2"),
OldCommitID: pull.HeadCommitID,
NewCommitID: pull.HeadCommitID,
TimeNano: testCase.timeNano,
}
require.NoError(t, repo_service.PushUpdate(opt))
logFiltered, logStopped := logChecker.Check(5 * time.Second)
assert.True(t, logStopped)
assert.Equal(t, testCase.expected, logFiltered[0])
})
}
for _, testCase := range []struct {
name string
olderThan int64
expected bool
}{
{
name: "TestPullRequest process PR",
olderThan: int64(pull.Issue.Created),
expected: true,
},
{
name: "TestPullRequest skip PR",
olderThan: int64(pull.Issue.Created) - 1,
expected: false,
},
} {
t.Run(testCase.name, func(t *testing.T) {
logChecker, cleanup := test.NewLogChecker(log.DEFAULT, log.TRACE)
logChecker.Filter("Updating PR").StopMark("TestPullRequest ")
defer cleanup()
pull_service.TestPullRequest(context.Background(), owner, repo.ID, testCase.olderThan, "branch2", true, pull.HeadCommitID, pull.HeadCommitID)
logFiltered, logStopped := logChecker.Check(5 * time.Second)
assert.True(t, logStopped)
assert.Equal(t, testCase.expected, logFiltered[0])
})
}
}