From bb5f859ec0f853383b85cb16b189fbacf7397551 Mon Sep 17 00:00:00 2001 From: Gusted Date: Tue, 1 Feb 2022 13:59:25 +0100 Subject: [PATCH] Fix non-ASCII search on database (#18437) Use `ToASCIIUpper` for SQLite database on issues search, this because `UPPER(x)` on SQLite only transforms ASCII letters. Resolves #18429 --- models/issue.go | 8 +++++++- modules/util/util.go | 11 +++++++++++ modules/util/util_test.go | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/models/issue.go b/models/issue.go index 3a61b085dc..8eb61f2050 100644 --- a/models/issue.go +++ b/models/issue.go @@ -23,6 +23,7 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/references" + "code.gitea.io/gitea/modules/setting" api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" @@ -1862,7 +1863,12 @@ func GetRepoIssueStats(repoID, uid int64, filterMode int, isPull bool) (numOpen, func SearchIssueIDsByKeyword(ctx context.Context, kw string, repoIDs []int64, limit, start int) (int64, []int64, error) { repoCond := builder.In("repo_id", repoIDs) subQuery := builder.Select("id").From("issue").Where(repoCond) - kw = strings.ToUpper(kw) + // SQLite's UPPER function only transforms ASCII letters. + if setting.Database.UseSQLite3 { + kw = util.ToUpperASCII(kw) + } else { + kw = strings.ToUpper(kw) + } cond := builder.And( repoCond, builder.Or( diff --git a/modules/util/util.go b/modules/util/util.go index 90d0eca15c..af6581f7cd 100644 --- a/modules/util/util.go +++ b/modules/util/util.go @@ -170,3 +170,14 @@ func CryptoRandomBytes(length int64) ([]byte, error) { _, err := rand.Read(buf) return buf, err } + +// ToUpperASCII returns s with all ASCII letters mapped to their upper case. +func ToUpperASCII(s string) string { + b := []byte(s) + for i, c := range b { + if 'a' <= c && c <= 'z' { + b[i] -= 'a' - 'A' + } + } + return string(b) +} diff --git a/modules/util/util_test.go b/modules/util/util_test.go index b32cec23d9..0c2792a9cb 100644 --- a/modules/util/util_test.go +++ b/modules/util/util_test.go @@ -186,3 +186,37 @@ func Test_OptionalBool(t *testing.T) { assert.Equal(t, OptionalBoolTrue, OptionalBoolParse("t")) assert.Equal(t, OptionalBoolTrue, OptionalBoolParse("True")) } + +// Test case for any function which accepts and returns a single string. +type StringTest struct { + in, out string +} + +var upperTests = []StringTest{ + {"", ""}, + {"ONLYUPPER", "ONLYUPPER"}, + {"abc", "ABC"}, + {"AbC123", "ABC123"}, + {"azAZ09_", "AZAZ09_"}, + {"longStrinGwitHmixofsmaLLandcAps", "LONGSTRINGWITHMIXOFSMALLANDCAPS"}, + {"long\u0250string\u0250with\u0250nonascii\u2C6Fchars", "LONG\u0250STRING\u0250WITH\u0250NONASCII\u2C6FCHARS"}, + {"\u0250\u0250\u0250\u0250\u0250", "\u0250\u0250\u0250\u0250\u0250"}, + {"a\u0080\U0010FFFF", "A\u0080\U0010FFFF"}, + {"lél", "LéL"}, +} + +func TestToUpperASCII(t *testing.T) { + for _, tc := range upperTests { + assert.Equal(t, ToUpperASCII(tc.in), tc.out) + } +} + +func BenchmarkToUpper(b *testing.B) { + for _, tc := range upperTests { + b.Run(tc.in, func(b *testing.B) { + for i := 0; i < b.N; i++ { + ToUpperASCII(tc.in) + } + }) + } +}