[BUG] Handle bigger files in git grep
- The parser of `git grep`'s output uses `bufio.Scanner`, which is a good
choice overall, however it does have a limit that's usually not noticed,
it will not read more than `64 * 1024` bytes at once which can be hit in
practical scenarios.
- Use `bufio.Reader` instead which doesn't have this limitation, but is
a bit harder to work with as it's a more lower level primitive.
- Adds unit test.
- Resolves https://codeberg.org/forgejo/forgejo/issues/3149
(cherry picked from commit 668709a33f
)
This commit is contained in:
parent
db56ccf993
commit
b203e535e5
2 changed files with 43 additions and 4 deletions
|
@ -10,6 +10,7 @@ import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -80,10 +81,21 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
|
||||||
defer stdoutReader.Close()
|
defer stdoutReader.Close()
|
||||||
|
|
||||||
isInBlock := false
|
isInBlock := false
|
||||||
scanner := bufio.NewScanner(stdoutReader)
|
scanner := bufio.NewReader(stdoutReader)
|
||||||
var res *GrepResult
|
var res *GrepResult
|
||||||
for scanner.Scan() {
|
for {
|
||||||
line := scanner.Text()
|
line, err := scanner.ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Remove delimiter.
|
||||||
|
if len(line) > 0 {
|
||||||
|
line = line[:len(line)-1]
|
||||||
|
}
|
||||||
|
|
||||||
if !isInBlock {
|
if !isInBlock {
|
||||||
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
|
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
|
||||||
isInBlock = true
|
isInBlock = true
|
||||||
|
@ -109,7 +121,7 @@ func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepO
|
||||||
res.LineCodes = append(res.LineCodes, lineCode)
|
res.LineCodes = append(res.LineCodes, lineCode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return scanner.Err()
|
return nil
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
// git grep exits by cancel (killed), usually it is caused by the limit of results
|
// git grep exits by cancel (killed), usually it is caused by the limit of results
|
||||||
|
|
|
@ -4,7 +4,10 @@
|
||||||
package git
|
package git
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
|
"os"
|
||||||
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -49,3 +52,27 @@ func TestGrepSearch(t *testing.T) {
|
||||||
assert.Error(t, err)
|
assert.Error(t, err)
|
||||||
assert.Len(t, res, 0)
|
assert.Len(t, res, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGrepLongFiles(t *testing.T) {
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
|
||||||
|
err := InitRepository(DefaultContext, tmpDir, false, Sha1ObjectFormat.Name())
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
gitRepo, err := openRepositoryWithDefaultContext(tmpDir)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
defer gitRepo.Close()
|
||||||
|
|
||||||
|
assert.NoError(t, os.WriteFile(path.Join(tmpDir, "README.md"), bytes.Repeat([]byte{'a'}, 65*1024), 0o666))
|
||||||
|
|
||||||
|
err = AddChanges(tmpDir, true)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
err = CommitChanges(tmpDir, CommitChangesOptions{Message: "Long file"})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
res, err := GrepSearch(context.Background(), gitRepo, "a", GrepOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Len(t, res, 1)
|
||||||
|
assert.Len(t, res[0].LineCodes[0], 65*1024)
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue