Code/repo search (#2582)

Indexed search of repository contents (for default branch only)
This commit is contained in:
Ethan Koenig 2017-10-26 23:10:54 -07:00 committed by Lauris BH
parent 762f1d7237
commit 5866eb2321
33 changed files with 1214 additions and 31 deletions

4
conf/app.ini vendored
View file

@ -192,7 +192,11 @@ ITERATE_BUFFER_SIZE = 50
[indexer]
ISSUE_INDEXER_PATH = indexers/issues.bleve
; repo indexer by default disabled, since it uses a lot of disk space
REPO_INDEXER_ENABLED = false
REPO_INDEXER_PATH = indexers/repos.bleve
UPDATE_BUFFER_LEN = 20
MAX_FILE_SIZE = 1048576
[admin]
; Disable regular (non-admin) users to create organizations

View file

@ -63,6 +63,10 @@ func TestMain(m *testing.M) {
fmt.Printf("os.RemoveAll: %v\n", err)
os.Exit(1)
}
if err = os.RemoveAll(setting.Indexer.RepoPath); err != nil {
fmt.Printf("Unable to remove repo indexer: %v\n", err)
os.Exit(1)
}
os.Exit(exitCode)
}

View file

@ -12,6 +12,8 @@ PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve
REPO_INDEXER_ENABLED = true
REPO_INDEXER_PATH = integrations/indexers-mysql/repos.bleve
[repository]
ROOT = integrations/gitea-integration-mysql/gitea-repositories

View file

@ -12,6 +12,8 @@ PATH = data/gitea.db
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve
REPO_INDEXER_ENABLED = true
REPO_INDEXER_PATH = integrations/indexers-pgsql/repos.bleve
[repository]
ROOT = integrations/gitea-integration-pgsql/gitea-repositories

View file

@ -0,0 +1,35 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package integrations
import (
"net/http"
"testing"
"github.com/PuerkitoBio/goquery"
"github.com/stretchr/testify/assert"
)
func resultFilenames(t testing.TB, doc *HTMLDoc) []string {
resultsSelection := doc.doc.Find(".repository.search")
assert.EqualValues(t, 1, resultsSelection.Length(),
"Invalid template (repo search template has changed?)")
filenameSelections := resultsSelection.Find(".repo-search-result").Find(".header").Find("span.file")
result := make([]string, filenameSelections.Length())
filenameSelections.Each(func(i int, selection *goquery.Selection) {
result[i] = selection.Text()
})
return result
}
func TestSearchRepo(t *testing.T) {
prepareTestEnv(t)
req := NewRequestf(t, "GET", "/user2/repo1/search?q=Description&page=1")
resp := MakeRequest(t, req, http.StatusOK)
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
assert.EqualValues(t, []string{"README.md"}, filenames)
}

View file

@ -7,6 +7,8 @@ PATH = :memory:
[indexer]
ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve
REPO_INDEXER_ENABLED = true
REPO_INDEXER_PATH = integrations/indexers-sqlite/repos.bleve
[repository]
ROOT = integrations/gitea-integration-sqlite/gitea-repositories

View file

@ -0,0 +1 @@
[] # empty

View file

@ -144,6 +144,8 @@ var migrations = []Migration{
NewMigration("remove organization watch repositories", removeOrganizationWatchRepo),
// v47 -> v48
NewMigration("add deleted branches", addDeletedBranch),
// v48 -> v49
NewMigration("add repo indexer status", addRepoIndexerStatus),
}
// Migrate database to current version

25
models/migrations/v48.go Normal file
View file

@ -0,0 +1,25 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migrations
import (
"fmt"
"github.com/go-xorm/xorm"
)
func addRepoIndexerStatus(x *xorm.Engine) error {
// RepoIndexerStatus see models/repo_indexer.go
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX NOT NULL"`
CommitSha string `xorm:"VARCHAR(40)"`
}
if err := x.Sync2(new(RepoIndexerStatus)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}
return nil
}

View file

@ -13,6 +13,10 @@ import (
"path"
"strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
// Needed for the MySQL driver
_ "github.com/go-sql-driver/mysql"
"github.com/go-xorm/core"
@ -23,9 +27,6 @@ import (
// Needed for the MSSSQL driver
_ "github.com/denisenkom/go-mssqldb"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
// Engine represents a xorm engine or session.
@ -115,6 +116,7 @@ func init() {
new(Stopwatch),
new(TrackedTime),
new(DeletedBranch),
new(RepoIndexerStatus),
)
gonicNames := []string{"SSL", "UID"}
@ -150,8 +152,13 @@ func LoadConfigs() {
DbCfg.Timeout = sec.Key("SQLITE_TIMEOUT").MustInt(500)
sec = setting.Cfg.Section("indexer")
setting.Indexer.IssuePath = sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve")
setting.Indexer.IssuePath = absolutePath(
sec.Key("ISSUE_INDEXER_PATH").MustString("indexers/issues.bleve"))
setting.Indexer.RepoIndexerEnabled = sec.Key("REPO_INDEXER_ENABLED").MustBool(false)
setting.Indexer.RepoPath = absolutePath(
sec.Key("REPO_INDEXER_PATH").MustString("indexers/repos.bleve"))
setting.Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
setting.Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(512 * 1024 * 1024)
}
// parsePostgreSQLHostPort parses given input in various forms defined in
@ -336,3 +343,12 @@ func DumpDatabase(filePath string, dbType string) error {
}
return x.DumpTablesToFile(tbs, filePath)
}
// absolutePath make path absolute if it is relative
func absolutePath(path string) string {
workDir, err := setting.WorkDir()
if err != nil {
log.Fatal(4, "Failed to get work directory: %v", err)
}
return util.EnsureAbsolutePath(path, workDir)
}

View file

@ -205,10 +205,11 @@ type Repository struct {
ExternalMetas map[string]string `xorm:"-"`
Units []*RepoUnit `xorm:"-"`
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
ForkID int64 `xorm:"INDEX"`
BaseRepo *Repository `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
IsFork bool `xorm:"INDEX NOT NULL DEFAULT false"`
ForkID int64 `xorm:"INDEX"`
BaseRepo *Repository `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
IndexerStatus *RepoIndexerStatus `xorm:"-"`
Created time.Time `xorm:"-"`
CreatedUnix int64 `xorm:"INDEX created"`
@ -782,8 +783,10 @@ func UpdateLocalCopyBranch(repoPath, localPath, branch string) error {
if err != nil {
return fmt.Errorf("git fetch origin: %v", err)
}
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
if len(branch) > 0 {
if err := git.ResetHEAD(localPath, true, "origin/"+branch); err != nil {
return fmt.Errorf("git reset --hard origin/%s: %v", branch, err)
}
}
}
return nil
@ -989,6 +992,7 @@ func MigrateRepository(doer, u *User, opts MigrateRepoOptions) (*Repository, err
if err = SyncReleasesWithTags(repo, gitRepo); err != nil {
log.Error(4, "Failed to synchronize tags to releases for repository: %v", err)
}
UpdateRepoIndexer(repo)
}
if err = repo.UpdateSize(); err != nil {
@ -1883,6 +1887,7 @@ func DeleteRepository(doer *User, uid, repoID int64) error {
go HookQueue.Add(repo.ID)
}
DeleteRepoFromIndexer(repo)
return nil
}

View file

@ -178,6 +178,8 @@ func (repo *Repository) UpdateRepoFile(doer *User, opts UpdateRepoFileOptions) (
if err != nil {
return fmt.Errorf("PushUpdate: %v", err)
}
UpdateRepoIndexer(repo)
return nil
}

302
models/repo_indexer.go Normal file
View file

@ -0,0 +1,302 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package models
import (
"io/ioutil"
"os"
"path"
"strconv"
"strings"
"code.gitea.io/git"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/Unknwon/com"
)
// RepoIndexerStatus status of a repo's entry in the repo indexer
// For now, implicitly refers to default branch
type RepoIndexerStatus struct {
ID int64 `xorm:"pk autoincr"`
RepoID int64 `xorm:"INDEX"`
CommitSha string `xorm:"VARCHAR(40)"`
}
func (repo *Repository) getIndexerStatus() error {
if repo.IndexerStatus != nil {
return nil
}
status := &RepoIndexerStatus{RepoID: repo.ID}
has, err := x.Get(status)
if err != nil {
return err
} else if !has {
status.CommitSha = ""
}
repo.IndexerStatus = status
return nil
}
func (repo *Repository) updateIndexerStatus(sha string) error {
if err := repo.getIndexerStatus(); err != nil {
return err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
repo.IndexerStatus.CommitSha = sha
_, err := x.Insert(repo.IndexerStatus)
return err
}
repo.IndexerStatus.CommitSha = sha
_, err := x.ID(repo.IndexerStatus.ID).Cols("commit_sha").
Update(repo.IndexerStatus)
return err
}
type repoIndexerOperation struct {
repo *Repository
deleted bool
}
var repoIndexerOperationQueue chan repoIndexerOperation
// InitRepoIndexer initialize the repo indexer
func InitRepoIndexer() {
if !setting.Indexer.RepoIndexerEnabled {
return
}
indexer.InitRepoIndexer(populateRepoIndexer)
repoIndexerOperationQueue = make(chan repoIndexerOperation, setting.Indexer.UpdateQueueLength)
go processRepoIndexerOperationQueue()
}
// populateRepoIndexer populate the repo indexer with data
func populateRepoIndexer() error {
log.Info("Populating repository indexer (this may take a while)")
for page := 1; ; page++ {
repos, _, err := SearchRepositoryByName(&SearchRepoOptions{
Page: page,
PageSize: 10,
OrderBy: SearchOrderByID,
Private: true,
})
if err != nil {
return err
} else if len(repos) == 0 {
return nil
}
for _, repo := range repos {
if err = updateRepoIndexer(repo); err != nil {
// only log error, since this should not prevent
// gitea from starting up
log.Error(4, "updateRepoIndexer: repoID=%d, %v", repo.ID, err)
}
}
}
}
type updateBatch struct {
updates []indexer.RepoIndexerUpdate
}
func updateRepoIndexer(repo *Repository) error {
changes, err := getRepoChanges(repo)
if err != nil {
return err
} else if changes == nil {
return nil
}
batch := indexer.RepoIndexerBatch()
for _, filename := range changes.UpdatedFiles {
if err := addUpdate(filename, repo, batch); err != nil {
return err
}
}
for _, filename := range changes.RemovedFiles {
if err := addDelete(filename, repo, batch); err != nil {
return err
}
}
if err = batch.Flush(); err != nil {
return err
}
return updateLastIndexSync(repo)
}
// repoChanges changes (file additions/updates/removals) to a repo
type repoChanges struct {
UpdatedFiles []string
RemovedFiles []string
}
// getRepoChanges returns changes to repo since last indexer update
func getRepoChanges(repo *Repository) (*repoChanges, error) {
repoWorkingPool.CheckIn(com.ToStr(repo.ID))
defer repoWorkingPool.CheckOut(com.ToStr(repo.ID))
if err := repo.UpdateLocalCopyBranch(""); err != nil {
return nil, err
} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) {
// repo does not have any commits yet, so nothing to update
return nil, nil
} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil {
return nil, err
} else if err = repo.getIndexerStatus(); err != nil {
return nil, err
}
if len(repo.IndexerStatus.CommitSha) == 0 {
return genesisChanges(repo)
}
return nonGenesisChanges(repo)
}
func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error {
filepath := path.Join(repo.LocalCopyPath(), filename)
if stat, err := os.Stat(filepath); err != nil {
return err
} else if stat.Size() > setting.Indexer.MaxIndexerFileSize {
return nil
}
fileContents, err := ioutil.ReadFile(filepath)
if err != nil {
return err
} else if !base.IsTextFile(fileContents) {
return nil
}
return batch.Add(indexer.RepoIndexerUpdate{
Filepath: filename,
Op: indexer.RepoIndexerOpUpdate,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
Content: string(fileContents),
},
})
}
func addDelete(filename string, repo *Repository, batch *indexer.Batch) error {
return batch.Add(indexer.RepoIndexerUpdate{
Filepath: filename,
Op: indexer.RepoIndexerOpDelete,
Data: &indexer.RepoIndexerData{
RepoID: repo.ID,
},
})
}
// genesisChanges get changes to add repo to the indexer for the first time
func genesisChanges(repo *Repository) (*repoChanges, error) {
var changes repoChanges
stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath())
if err != nil {
return nil, err
}
for _, line := range strings.Split(stdout, "\n") {
filename := strings.TrimSpace(line)
if len(filename) == 0 {
continue
} else if filename[0] == '"' {
filename, err = strconv.Unquote(filename)
if err != nil {
return nil, err
}
}
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
}
return &changes, nil
}
// nonGenesisChanges get changes since the previous indexer update
func nonGenesisChanges(repo *Repository) (*repoChanges, error) {
diffCmd := git.NewCommand("diff", "--name-status",
repo.IndexerStatus.CommitSha, "HEAD")
stdout, err := diffCmd.RunInDir(repo.LocalCopyPath())
if err != nil {
// previous commit sha may have been removed by a force push, so
// try rebuilding from scratch
if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil {
return nil, err
}
return genesisChanges(repo)
}
var changes repoChanges
for _, line := range strings.Split(stdout, "\n") {
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}
filename := strings.TrimSpace(line[1:])
if len(filename) == 0 {
continue
} else if filename[0] == '"' {
filename, err = strconv.Unquote(filename)
if err != nil {
return nil, err
}
}
switch status := line[0]; status {
case 'M', 'A':
changes.UpdatedFiles = append(changes.UpdatedFiles, filename)
case 'D':
changes.RemovedFiles = append(changes.RemovedFiles, filename)
default:
log.Warn("Unrecognized status: %c (line=%s)", status, line)
}
}
return &changes, nil
}
func updateLastIndexSync(repo *Repository) error {
stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath())
if err != nil {
return err
}
sha := strings.TrimSpace(stdout)
return repo.updateIndexerStatus(sha)
}
func processRepoIndexerOperationQueue() {
for {
op := <-repoIndexerOperationQueue
if op.deleted {
if err := indexer.DeleteRepoFromIndexer(op.repo.ID); err != nil {
log.Error(4, "DeleteRepoFromIndexer: %v", err)
}
} else {
if err := updateRepoIndexer(op.repo); err != nil {
log.Error(4, "updateRepoIndexer: %v", err)
}
}
}
}
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
func DeleteRepoFromIndexer(repo *Repository) {
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: true})
}
// UpdateRepoIndexer update a repository's entries in the indexer
func UpdateRepoIndexer(repo *Repository) {
addOperationToQueue(repoIndexerOperation{repo: repo, deleted: false})
}
func addOperationToQueue(op repoIndexerOperation) {
if !setting.Indexer.RepoIndexerEnabled {
return
}
select {
case repoIndexerOperationQueue <- op:
break
default:
go func() {
repoIndexerOperationQueue <- op
}()
}
}

View file

@ -263,6 +263,10 @@ func pushUpdate(opts PushUpdateOptions) (repo *Repository, err error) {
commits = ListToPushCommits(l)
}
if opts.RefFullName == git.BranchPrefix+repo.DefaultBranch {
UpdateRepoIndexer(repo)
}
if err := CommitRepoAction(CommitRepoActionOptions{
PusherName: opts.PusherName,
RepoOwnerID: owner.ID,

View file

@ -367,6 +367,7 @@ func RepoAssignment() macaron.Handler {
ctx.Data["DisableSSH"] = setting.SSH.Disabled
ctx.Data["ExposeAnonSSH"] = setting.SSH.ExposeAnonymous
ctx.Data["DisableHTTP"] = setting.Repository.DisableHTTPGit
ctx.Data["RepoSearchEnabled"] = setting.Indexer.RepoIndexerEnabled
ctx.Data["CloneLink"] = repo.CloneLink()
ctx.Data["WikiCloneLink"] = repo.WikiCloneLink()

199
modules/indexer/repo.go Normal file
View file

@ -0,0 +1,199 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package indexer
import (
"os"
"strings"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/custom"
"github.com/blevesearch/bleve/analysis/token/camelcase"
"github.com/blevesearch/bleve/analysis/token/lowercase"
"github.com/blevesearch/bleve/analysis/tokenizer/unicode"
)
const repoIndexerAnalyzer = "repoIndexerAnalyzer"
// repoIndexer (thread-safe) index for repository contents
var repoIndexer bleve.Index
// RepoIndexerOp type of operation to perform on repo indexer
type RepoIndexerOp int
const (
// RepoIndexerOpUpdate add/update a file's contents
RepoIndexerOpUpdate = iota
// RepoIndexerOpDelete delete a file
RepoIndexerOpDelete
)
// RepoIndexerData data stored in the repo indexer
type RepoIndexerData struct {
RepoID int64
Content string
}
// RepoIndexerUpdate an update to the repo indexer
type RepoIndexerUpdate struct {
Filepath string
Op RepoIndexerOp
Data *RepoIndexerData
}
func (update RepoIndexerUpdate) addToBatch(batch *bleve.Batch) error {
id := filenameIndexerID(update.Data.RepoID, update.Filepath)
switch update.Op {
case RepoIndexerOpUpdate:
return batch.Index(id, update.Data)
case RepoIndexerOpDelete:
batch.Delete(id)
default:
log.Error(4, "Unrecognized repo indexer op: %d", update.Op)
}
return nil
}
// InitRepoIndexer initialize repo indexer
func InitRepoIndexer(populateIndexer func() error) {
_, err := os.Stat(setting.Indexer.RepoPath)
if err != nil {
if os.IsNotExist(err) {
if err = createRepoIndexer(); err != nil {
log.Fatal(4, "CreateRepoIndexer: %v", err)
}
if err = populateIndexer(); err != nil {
log.Fatal(4, "PopulateRepoIndex: %v", err)
}
} else {
log.Fatal(4, "InitRepoIndexer: %v", err)
}
} else {
repoIndexer, err = bleve.Open(setting.Indexer.RepoPath)
if err != nil {
log.Fatal(4, "InitRepoIndexer, open index: %v", err)
}
}
}
// createRepoIndexer create a repo indexer if one does not already exist
func createRepoIndexer() error {
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())
textFieldMapping := bleve.NewTextFieldMapping()
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
mapping := bleve.NewIndexMapping()
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return err
} else if err := mapping.AddCustomAnalyzer(repoIndexerAnalyzer, map[string]interface{}{
"type": custom.Name,
"char_filters": []string{},
"tokenizer": unicode.Name,
"token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
}); err != nil {
return err
}
mapping.DefaultAnalyzer = repoIndexerAnalyzer
mapping.AddDocumentMapping("repo", docMapping)
var err error
repoIndexer, err = bleve.New(setting.Indexer.RepoPath, mapping)
return err
}
func filenameIndexerID(repoID int64, filename string) string {
return indexerID(repoID) + "_" + filename
}
func filenameOfIndexerID(indexerID string) string {
index := strings.IndexByte(indexerID, '_')
if index == -1 {
log.Error(4, "Unexpected ID in repo indexer: %s", indexerID)
}
return indexerID[index+1:]
}
// RepoIndexerBatch batch to add updates to
func RepoIndexerBatch() *Batch {
return &Batch{
batch: repoIndexer.NewBatch(),
index: repoIndexer,
}
}
// DeleteRepoFromIndexer delete all of a repo's files from indexer
func DeleteRepoFromIndexer(repoID int64) error {
query := numericEqualityQuery(repoID, "RepoID")
searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false)
result, err := repoIndexer.Search(searchRequest)
if err != nil {
return err
}
batch := RepoIndexerBatch()
for _, hit := range result.Hits {
batch.batch.Delete(hit.ID)
if err = batch.flushIfFull(); err != nil {
return err
}
}
return batch.Flush()
}
// RepoSearchResult result of performing a search in a repo
type RepoSearchResult struct {
StartIndex int
EndIndex int
Filename string
Content string
}
// SearchRepoByKeyword searches for files in the specified repo.
// Returns the matching file-paths
func SearchRepoByKeyword(repoID int64, keyword string, page, pageSize int) (int64, []*RepoSearchResult, error) {
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
indexerQuery := bleve.NewConjunctionQuery(
numericEqualityQuery(repoID, "RepoID"),
phraseQuery,
)
from := (page - 1) * pageSize
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
searchRequest.Fields = []string{"Content"}
searchRequest.IncludeLocations = true
result, err := repoIndexer.Search(searchRequest)
if err != nil {
return 0, nil, err
}
searchResults := make([]*RepoSearchResult, len(result.Hits))
for i, hit := range result.Hits {
var startIndex, endIndex int = -1, -1
for _, locations := range hit.Locations["Content"] {
location := locations[0]
locationStart := int(location.Start)
locationEnd := int(location.End)
if startIndex < 0 || locationStart < startIndex {
startIndex = locationStart
}
if endIndex < 0 || locationEnd > endIndex {
endIndex = locationEnd
}
}
searchResults[i] = &RepoSearchResult{
StartIndex: startIndex,
EndIndex: endIndex,
Filename: filenameOfIndexerID(hit.ID),
Content: hit.Fields["Content"].(string),
}
}
return int64(result.Total), searchResults, nil
}

128
modules/search/search.go Normal file
View file

@ -0,0 +1,128 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package search
import (
"bytes"
gotemplate "html/template"
"strings"
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/indexer"
"code.gitea.io/gitea/modules/util"
)
// Result a search result to display
type Result struct {
Filename string
HighlightClass string
LineNumbers []int
FormattedLines gotemplate.HTML
}
func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
startIndex := selectionStartIndex
numLinesBefore := 0
for ; startIndex > 0; startIndex-- {
if content[startIndex-1] == '\n' {
if numLinesBefore == 1 {
break
}
numLinesBefore++
}
}
endIndex := selectionEndIndex
numLinesAfter := 0
for ; endIndex < len(content); endIndex++ {
if content[endIndex] == '\n' {
if numLinesAfter == 1 {
break
}
numLinesAfter++
}
}
return startIndex, endIndex
}
func writeStrings(buf *bytes.Buffer, strs ...string) error {
for _, s := range strs {
_, err := buf.WriteString(s)
if err != nil {
return err
}
}
return nil
}
func searchResult(result *indexer.RepoSearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")
var formattedLinesBuffer bytes.Buffer
contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lineNumbers := make([]int, len(contentLines))
index := startIndex
for i, line := range contentLines {
var err error
if index < result.EndIndex &&
result.StartIndex < index+len(line) &&
result.StartIndex < result.EndIndex {
openActiveIndex := util.Max(result.StartIndex-index, 0)
closeActiveIndex := util.Min(result.EndIndex-index, len(line))
err = writeStrings(&formattedLinesBuffer,
`<li>`,
line[:openActiveIndex],
`<span class='active'>`,
line[openActiveIndex:closeActiveIndex],
`</span>`,
line[closeActiveIndex:],
`</li>`,
)
} else {
err = writeStrings(&formattedLinesBuffer,
`<li>`,
line,
`</li>`,
)
}
if err != nil {
return nil, err
}
lineNumbers[i] = startLineNum + i
index += len(line)
}
return &Result{
Filename: result.Filename,
HighlightClass: highlight.FileNameToHighlightClass(result.Filename),
LineNumbers: lineNumbers,
FormattedLines: gotemplate.HTML(formattedLinesBuffer.String()),
}, nil
}
// PerformSearch perform a search on a repository
func PerformSearch(repoID int64, keyword string, page, pageSize int) (int, []*Result, error) {
if len(keyword) == 0 {
return 0, nil, nil
}
total, results, err := indexer.SearchRepoByKeyword(repoID, keyword, page, pageSize)
if err != nil {
return 0, nil, err
}
displayResults := make([]*Result, len(results))
for i, result := range results {
startIndex, endIndex := indices(result.Content, result.StartIndex, result.EndIndex)
displayResults[i], err = searchResult(result, startIndex, endIndex)
if err != nil {
return 0, nil, err
}
}
return int(total), displayResults, nil
}

View file

@ -140,8 +140,11 @@ var (
// Indexer settings
Indexer struct {
IssuePath string
UpdateQueueLength int
IssuePath string
RepoIndexerEnabled bool
RepoPath string
UpdateQueueLength int
MaxIndexerFileSize int64
}
// Webhook settings
@ -234,12 +237,13 @@ var (
// UI settings
UI = struct {
ExplorePagingNum int
IssuePagingNum int
FeedMaxCommitNum int
ThemeColorMetaTag string
MaxDisplayFileSize int64
ShowUserEmail bool
ExplorePagingNum int
IssuePagingNum int
RepoSearchPagingNum int
FeedMaxCommitNum int
ThemeColorMetaTag string
MaxDisplayFileSize int64
ShowUserEmail bool
Admin struct {
UserPagingNum int
@ -256,11 +260,12 @@ var (
Keywords string
} `ini:"ui.meta"`
}{
ExplorePagingNum: 20,
IssuePagingNum: 10,
FeedMaxCommitNum: 5,
ThemeColorMetaTag: `#6cc644`,
MaxDisplayFileSize: 8388608,
ExplorePagingNum: 20,
IssuePagingNum: 10,
RepoSearchPagingNum: 10,
FeedMaxCommitNum: 5,
ThemeColorMetaTag: `#6cc644`,
MaxDisplayFileSize: 8388608,
Admin: struct {
UserPagingNum int
RepoPagingNum int

16
modules/util/path.go Normal file
View file

@ -0,0 +1,16 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package util
import "path/filepath"
// EnsureAbsolutePath ensure that a path is absolute, making it
// relative to absoluteBase if necessary
func EnsureAbsolutePath(path string, absoluteBase string) string {
if filepath.IsAbs(path) {
return path
}
return filepath.Join(absoluteBase, path)
}

View file

@ -38,3 +38,19 @@ func OptionalBoolOf(b bool) OptionalBool {
}
return OptionalBoolFalse
}
// Max max of two ints
func Max(a, b int) int {
if a < b {
return b
}
return a
}
// Min min of two ints
func Min(a, b int) int {
if a > b {
return b
}
return a
}

View file

@ -848,6 +848,10 @@ activity.title.releases_n = %d Releases
activity.title.releases_published_by = %s published by %s
activity.published_release_label = Published
search = Search
search.search_repo = Search repository
search.results = Search results for "%s" in <a href="%s">%s</a>
settings = Settings
settings.desc = Settings is where you can manage the settings for the repository
settings.options = Options

File diff suppressed because one or more lines are too long

View file

@ -158,6 +158,11 @@
}
&.file.list {
.repo-description {
display: flex;
justify-content: space-between;
align-items: center;
}
#repo-desc {
font-size: 1.2em;
}
@ -226,7 +231,7 @@
}
}
#file-content {
.non-diff-file-content {
.header {
.icon {
font-size: 1em;
@ -244,7 +249,7 @@
background: transparent;
border: 0;
outline: none;
}
}
.btn-octicon:hover {
color: #4078c0;
}
@ -323,6 +328,9 @@
}
}
}
.active {
background: #ffffdd;
}
}
}
@ -1038,6 +1046,13 @@
overflow-x: auto;
overflow-y: hidden;
}
.repo-search-result {
padding-top: 10px;
padding-bottom: 10px;
.lines-num a {
color: inherit;
}
}
&.quickstart {
.guide {

View file

@ -66,6 +66,7 @@ func GlobalInit() {
// Booting long running goroutines.
cron.NewContext()
models.InitIssueIndexer()
models.InitRepoIndexer()
models.InitSyncMirrors()
models.InitDeliverHooks()
models.InitTestPullRequests()

46
routers/repo/search.go Normal file
View file

@ -0,0 +1,46 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package repo
import (
"path"
"strings"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/search"
"code.gitea.io/gitea/modules/setting"
"github.com/Unknwon/paginater"
)
const tplSearch base.TplName = "repo/search"
// Search render repository search page
func Search(ctx *context.Context) {
if !setting.Indexer.RepoIndexerEnabled {
ctx.Redirect(ctx.Repo.RepoLink, 302)
return
}
keyword := strings.TrimSpace(ctx.Query("q"))
page := ctx.QueryInt("page")
if page <= 0 {
page = 1
}
total, searchResults, err := search.PerformSearch(ctx.Repo.Repository.ID, keyword, page, setting.UI.RepoSearchPagingNum)
if err != nil {
ctx.Handle(500, "SearchResults", err)
return
}
ctx.Data["Keyword"] = keyword
pager := paginater.New(total, setting.UI.RepoSearchPagingNum, page, 5)
ctx.Data["Page"] = pager
ctx.Data["SourcePath"] = setting.AppSubURL + "/" +
path.Join(ctx.Repo.Repository.Owner.Name, ctx.Repo.Repository.Name, "src", ctx.Repo.Repository.DefaultBranch)
ctx.Data["SearchResults"] = searchResults
ctx.Data["RequireHighlightJS"] = true
ctx.Data["PageIsViewCode"] = true
ctx.HTML(200, tplSearch)
}

View file

@ -649,6 +649,7 @@ func RegisterRoutes(m *macaron.Macaron) {
m.Group("/:username/:reponame", func() {
m.Get("/stars", repo.Stars)
m.Get("/watchers", repo.Watchers)
m.Get("/search", context.CheckUnit(models.UnitTypeCode), repo.Search)
}, ignSignIn, context.RepoAssignment(), context.RepoRef(), context.UnitTypes(), context.LoadRepoUnits())
m.Group("/:username", func() {

View file

@ -3,10 +3,26 @@
{{template "repo/header" .}}
<div class="ui container">
{{template "base/alert" .}}
<p id="repo-desc">
{{if .Repository.DescriptionHTML}}<span class="description has-emoji">{{.Repository.DescriptionHTML}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{.i18n.Tr "repo.no_desc"}}</span>{{end}}
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
</p>
<div class="ui repo-description">
<div id="repo-desc">
{{if .Repository.DescriptionHTML}}<span class="description has-emoji">{{.Repository.DescriptionHTML}}</span>{{else if .IsRepositoryAdmin}}<span class="no-description text-italic">{{.i18n.Tr "repo.no_desc"}}</span>{{end}}
<a class="link" href="{{.Repository.Website}}">{{.Repository.Website}}</a>
</div>
{{if .RepoSearchEnabled}}
<div class="ui repo-search">
<form class="ui form" action="{{.RepoLink}}/search" method="get">
<div class="field">
<div class="ui action input">
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}">
<button class="ui icon button" type="submit">
<i class="search icon"></i>
</button>
</div>
</div>
</form>
</div>
{{end}}
</div>
{{template "repo/sub_menu" .}}
<div class="ui secondary menu">
{{if .PullRequestCtx.Allowed}}

View file

@ -0,0 +1,49 @@
{{template "base/head" .}}
<div class="repository file list">
{{template "repo/header" .}}
<div class="ui container">
<div class="ui repo-search">
<form class="ui form" method="get">
<div class="ui fluid action input">
<input name="q" value="{{.Keyword}}" placeholder="{{.i18n.Tr "repo.search.search_repo"}}">
<button class="ui button" type="submit">
<i class="search icon"></i>
</button>
</div>
</form>
</div>
{{if .Keyword}}
<h3>
{{.i18n.Tr "repo.search.results" .Keyword .RepoLink .RepoName | Str2html}}
</h3>
<div class="repository search">
{{range $result := .SearchResults}}
<div class="diff-file-box diff-box file-content non-diff-file-content repo-search-result">
<h4 class="ui top attached normal header">
<span class="file">{{.Filename}}</span>
<a class="ui basic grey tiny button" rel="nofollow" href="{{EscapePound $.SourcePath}}/{{EscapePound .Filename}}">{{$.i18n.Tr "repo.diff.view_file"}}</a>
</h4>
<div class="ui attached table segment">
<div class="file-body file-code code-view">
<table>
<tbody>
<tr>
<td class="lines-num">
{{range .LineNumbers}}
<a href="{{EscapePound $.SourcePath}}/{{EscapePound $result.Filename}}#L{{.}}"><span>{{.}}</span></a>
{{end}}
</td>
<td class="lines-code"><pre><code class="{{.HighlightClass}}"><ol class="linenums">{{.FormattedLines}}</ol></code></pre></td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
{{end}}
</div>
{{template "base/paginate" .}}
{{end}}
</div>
</div>
{{template "base/footer" .}}

View file

@ -1,4 +1,4 @@
<div id="file-content" class="{{TabSizeClass .Editorconfig .FileName}}">
<div class="{{TabSizeClass .Editorconfig .FileName}} non-diff-file-content">
<h4 class="ui top attached header" id="{{if .ReadmeExist}}repo-readme{{else}}repo-read-file{{end}}">
{{if .ReadmeExist}}
<i class="book icon ui left"></i>

View file

@ -0,0 +1,78 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"bytes"
"unicode/utf8"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
)
const Name = "camelCase"
// CamelCaseFilter splits a given token into a set of tokens where each resulting token
// falls into one the following classes:
// 1) Upper case followed by lower case letters.
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
// 2) Upper case followed by upper case letters.
// Terminated by a number, an upper case followed by a lower case letter, and a non alpha-numeric symbol.
// 3) Lower case followed by lower case letters.
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
// 4) Number followed by numbers.
// Terminated by a letter, and a non alpha-numeric symbol.
// 5) Non alpha-numeric symbol followed by non alpha-numeric symbols.
// Terminated by a number, and a letter.
//
// It does a one-time sequential pass over an input token, from left to right.
// The scan is greedy and generates the longest substring that fits into one of the classes.
//
// See the test file for examples of classes and their parsings.
type CamelCaseFilter struct{}
func NewCamelCaseFilter() *CamelCaseFilter {
return &CamelCaseFilter{}
}
func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
nextPosition := 1
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
p := NewParser(runeCount, nextPosition, token.Start)
for i := 0; i < runeCount; i++ {
if i+1 >= runeCount {
p.Push(runes[i], nil)
} else {
p.Push(runes[i], &runes[i+1])
}
}
rv = append(rv, p.FlushTokens()...)
nextPosition = p.NextPosition()
}
return rv
}
func CamelCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewCamelCaseFilter(), nil
}
func init() {
registry.RegisterTokenFilter(Name, CamelCaseFilterConstructor)
}

View file

@ -0,0 +1,109 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"github.com/blevesearch/bleve/analysis"
)
func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token {
term := analysis.BuildTermFromRunes(buffer)
token := &analysis.Token{
Term: term,
Position: p.position,
Start: p.index,
End: p.index + len(term),
}
p.position++
p.index += len(term)
return token
}
// Parser accepts a symbol and passes it to the current state (representing a class).
// The state can accept it (and accumulate it). Otherwise, the parser creates a new state that
// starts with the pushed symbol.
//
// Parser accumulates a new resulting token every time it switches state.
// Use FlushTokens() to get the results after the last symbol was pushed.
type Parser struct {
bufferLen int
buffer []rune
current State
tokens []*analysis.Token
position int
index int
}
func NewParser(len, position, index int) *Parser {
return &Parser{
bufferLen: len,
buffer: make([]rune, 0, len),
tokens: make([]*analysis.Token, 0, len),
position: position,
index: index,
}
}
func (p *Parser) Push(sym rune, peek *rune) {
if p.current == nil {
// the start of parsing
p.current = p.NewState(sym)
p.buffer = append(p.buffer, sym)
} else if p.current.Member(sym, peek) {
// same state, just accumulate
p.buffer = append(p.buffer, sym)
} else {
// the old state is no more, thus convert the buffer
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
// let the new state begin
p.current = p.NewState(sym)
p.buffer = make([]rune, 0, p.bufferLen)
p.buffer = append(p.buffer, sym)
}
}
// Note. States have to have different starting symbols.
func (p *Parser) NewState(sym rune) State {
var found State
found = &LowerCaseState{}
if found.StartSym(sym) {
return found
}
found = &UpperCaseState{}
if found.StartSym(sym) {
return found
}
found = &NumberCaseState{}
if found.StartSym(sym) {
return found
}
return &NonAlphaNumericCaseState{}
}
func (p *Parser) FlushTokens() []*analysis.Token {
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
return p.tokens
}
func (p *Parser) NextPosition() int {
return p.position
}

View file

@ -0,0 +1,87 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"unicode"
)
// States codify the classes that the parser recognizes.
type State interface {
// is _sym_ the start character
StartSym(sym rune) bool
// is _sym_ a member of a class.
// peek, the next sym on the tape, can also be used to determine a class.
Member(sym rune, peek *rune) bool
}
type LowerCaseState struct{}
func (s *LowerCaseState) Member(sym rune, peek *rune) bool {
return unicode.IsLower(sym)
}
func (s *LowerCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}
type UpperCaseState struct {
startedCollecting bool // denotes that the start character has been read
collectingUpper bool // denotes if this is a class of all upper case letters
}
func (s *UpperCaseState) Member(sym rune, peek *rune) bool {
if !(unicode.IsLower(sym) || unicode.IsUpper(sym)) {
return false
}
if peek != nil && unicode.IsUpper(sym) && unicode.IsLower(*peek) {
return false
}
if !s.startedCollecting {
// now we have to determine if upper-case letters are collected.
s.startedCollecting = true
s.collectingUpper = unicode.IsUpper(sym)
return true
}
return s.collectingUpper == unicode.IsUpper(sym)
}
func (s *UpperCaseState) StartSym(sym rune) bool {
return unicode.IsUpper(sym)
}
type NumberCaseState struct{}
func (s *NumberCaseState) Member(sym rune, peek *rune) bool {
return unicode.IsNumber(sym)
}
func (s *NumberCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}
type NonAlphaNumericCaseState struct{}
func (s *NonAlphaNumericCaseState) Member(sym rune, peek *rune) bool {
return !unicode.IsLower(sym) && !unicode.IsUpper(sym) && !unicode.IsNumber(sym)
}
func (s *NonAlphaNumericCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}

6
vendor/vendor.json vendored
View file

@ -98,6 +98,12 @@
"revision": "011b168f7b84ffef05aed6716d73d21b1a33e971",
"revisionTime": "2017-06-14T16:31:07Z"
},
{
"checksumSHA1": "xj8o/nQj59yt+o+RZSa0n9V3vKY=",
"path": "github.com/blevesearch/bleve/analysis/token/camelcase",
"revision": "174f8ed44a0bf65e7c8fb228b60b58de62654cd2",
"revisionTime": "2017-06-28T17:18:15Z"
},
{
"checksumSHA1": "3VIPkl12t1ko4y6DkbPcz+MtQjY=",
"path": "github.com/blevesearch/bleve/analysis/token/lowercase",