Merge pull request '[v7.0/forgejo] [FIX] Set max fuzziness to 2 for bleve' (#3477) from bp-v7.0/forgejo-a641ebf into v7.0/forgejo
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3477 Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
This commit is contained in:
commit
d58b74d368
4 changed files with 26 additions and 2 deletions
|
@ -41,6 +41,8 @@ const (
|
||||||
maxBatchSize = 16
|
maxBatchSize = 16
|
||||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||||
fuzzyDenominator = 4
|
fuzzyDenominator = 4
|
||||||
|
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
|
||||||
|
maxFuzziness = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||||
|
@ -246,7 +248,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
||||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||||
keywordQuery = phraseQuery
|
keywordQuery = phraseQuery
|
||||||
if opts.IsKeywordFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
|
phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(opts.RepoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
|
|
|
@ -49,6 +49,12 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
||||||
IDs: []int64{},
|
IDs: []int64{},
|
||||||
Langs: 0,
|
Langs: 0,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
RepoIDs: nil,
|
||||||
|
Keyword: "Description for",
|
||||||
|
IDs: []int64{repoID},
|
||||||
|
Langs: 1,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
RepoIDs: nil,
|
RepoIDs: nil,
|
||||||
Keyword: "repo1",
|
Keyword: "repo1",
|
||||||
|
|
|
@ -39,6 +39,8 @@ const (
|
||||||
maxBatchSize = 16
|
maxBatchSize = 16
|
||||||
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||||
fuzzyDenominator = 4
|
fuzzyDenominator = 4
|
||||||
|
// see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311
|
||||||
|
maxFuzziness = 2
|
||||||
)
|
)
|
||||||
|
|
||||||
// IndexerData an update to the issue indexer
|
// IndexerData an update to the issue indexer
|
||||||
|
@ -162,7 +164,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
||||||
if options.Keyword != "" {
|
if options.Keyword != "" {
|
||||||
fuzziness := 0
|
fuzziness := 0
|
||||||
if options.IsFuzzyKeyword {
|
if options.IsFuzzyKeyword {
|
||||||
fuzziness = len(options.Keyword) / fuzzyDenominator
|
fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator)
|
||||||
}
|
}
|
||||||
|
|
||||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||||
|
|
|
@ -130,6 +130,20 @@ var cases = []*testIndexerCase{
|
||||||
ExpectedIDs: []int64{1002, 1001, 1000},
|
ExpectedIDs: []int64{1002, 1001, 1000},
|
||||||
ExpectedTotal: 3,
|
ExpectedTotal: 3,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "Keyword Fuzzy",
|
||||||
|
ExtraData: []*internal.IndexerData{
|
||||||
|
{ID: 1000, Title: "hi hello world"},
|
||||||
|
{ID: 1001, Content: "hi hello world"},
|
||||||
|
{ID: 1002, Comments: []string{"hi", "hello world"}},
|
||||||
|
},
|
||||||
|
SearchOptions: &internal.SearchOptions{
|
||||||
|
Keyword: "hello wrold",
|
||||||
|
IsFuzzyKeyword: true,
|
||||||
|
},
|
||||||
|
ExpectedIDs: []int64{1002, 1001, 1000},
|
||||||
|
ExpectedTotal: 3,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
Name: "RepoIDs",
|
Name: "RepoIDs",
|
||||||
ExtraData: []*internal.IndexerData{
|
ExtraData: []*internal.IndexerData{
|
||||||
|
|
Loading…
Add table
Reference in a new issue