this repo has no description
at ci-statuses 6.8 kB view raw
1// heavily inspired by gitea's model (basically copy-pasted) 2package issues_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 "tangled.org/core/appview/pagination" 24 tlog "tangled.org/core/log" 25) 26 27const ( 28 issueIndexerAnalyzer = "issueIndexer" 29 issueIndexerDocType = "issueIndexerDocType" 30 31 unicodeNormalizeName = "uicodeNormalize" 32) 33 34type Indexer struct { 35 indexer bleve.Index 36 path string 37} 38 39func NewIndexer(indexDir string) *Indexer { 40 return &Indexer{ 41 path: indexDir, 42 } 43} 44 45// Init initializes the indexer 46func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 47 l := tlog.FromContext(ctx) 48 existed, err := ix.intialize(ctx) 49 if err != nil { 50 log.Fatalln("failed to initialize issue indexer", err) 51 } 52 if !existed { 53 l.Debug("Populating the issue indexer") 54 err := PopulateIndexer(ctx, ix, e) 55 if err != nil { 56 log.Fatalln("failed to populate issue indexer", err) 57 } 58 } 59 l.Info("Initialized the issue indexer") 60} 61 62func generateIssueIndexMapping() (mapping.IndexMapping, error) { 63 mapping := bleve.NewIndexMapping() 64 docMapping := bleve.NewDocumentMapping() 65 66 textFieldMapping := bleve.NewTextFieldMapping() 67 textFieldMapping.Store = false 68 textFieldMapping.IncludeInAll = false 69 70 boolFieldMapping := bleve.NewBooleanFieldMapping() 71 boolFieldMapping.Store = false 72 boolFieldMapping.IncludeInAll = false 73 74 keywordFieldMapping := bleve.NewKeywordFieldMapping() 75 keywordFieldMapping.Store = false 76 keywordFieldMapping.IncludeInAll = false 77 78 // numericFieldMapping := bleve.NewNumericFieldMapping() 79 80 docMapping.AddFieldMappingsAt("title", textFieldMapping) 81 docMapping.AddFieldMappingsAt("body", textFieldMapping) 82 83 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 84 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 85 86 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 87 "type": unicodenorm.Name, 88 "form": unicodenorm.NFC, 89 }) 90 if err != nil { 91 return nil, err 92 } 93 94 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 95 "type": custom.Name, 96 "char_filters": []string{}, 97 "tokenizer": unicode.Name, 98 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 99 }) 100 if err != nil { 101 return nil, err 102 } 103 104 mapping.DefaultAnalyzer = issueIndexerAnalyzer 105 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 106 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 107 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 108 109 return mapping, nil 110} 111 112func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 113 if ix.indexer != nil { 114 return false, errors.New("indexer is already initialized") 115 } 116 117 indexer, err := openIndexer(ctx, ix.path) 118 if err != nil { 119 return false, err 120 } 121 if indexer != nil { 122 ix.indexer = indexer 123 return true, nil 124 } 125 126 mapping, err := generateIssueIndexMapping() 127 if err != nil { 128 return false, err 129 } 130 indexer, err = bleve.New(ix.path, mapping) 131 if err != nil { 132 return false, err 133 } 134 135 ix.indexer = indexer 136 137 return false, nil 138} 139 140func openIndexer(ctx context.Context, path string) (bleve.Index, error) { 141 l := tlog.FromContext(ctx) 142 indexer, err := bleve.Open(path) 143 if err != nil { 144 if errors.Is(err, upsidedown.IncompatibleVersion) { 145 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 146 return nil, os.RemoveAll(path) 147 } 148 return nil, nil 149 } 150 return indexer, nil 151} 152 153func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 154 l := tlog.FromContext(ctx) 155 count := 0 156 err := pagination.IterateAll( 157 func(page pagination.Page) ([]models.Issue, error) { 158 return db.GetIssuesPaginated(e, page) 159 }, 160 func(issues []models.Issue) error { 161 count += len(issues) 162 return ix.Index(ctx, issues...) 163 }, 164 ) 165 l.Info("issues indexed", "count", count) 166 return err 167} 168 169// issueData data stored and will be indexed 170type issueData struct { 171 ID int64 `json:"id"` 172 RepoAt string `json:"repo_at"` 173 IssueID int `json:"issue_id"` 174 Title string `json:"title"` 175 Body string `json:"body"` 176 177 IsOpen bool `json:"is_open"` 178 Comments []IssueCommentData `json:"comments"` 179} 180 181func makeIssueData(issue *models.Issue) *issueData { 182 return &issueData{ 183 ID: issue.Id, 184 RepoAt: issue.RepoAt.String(), 185 IssueID: issue.IssueId, 186 Title: issue.Title, 187 Body: issue.Body, 188 IsOpen: issue.Open, 189 } 190} 191 192// Type returns the document type, for bleve's mapping.Classifier interface. 193func (i *issueData) Type() string { 194 return issueIndexerDocType 195} 196 197type IssueCommentData struct { 198 Body string `json:"body"` 199} 200 201type SearchResult struct { 202 Hits []int64 203 Total uint64 204} 205 206const maxBatchSize = 20 207 208func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error { 209 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 210 for _, issue := range issues { 211 issueData := makeIssueData(&issue) 212 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil { 213 return err 214 } 215 } 216 return batch.Flush() 217} 218 219func (ix *Indexer) Delete(ctx context.Context, issueId int64) error { 220 return ix.indexer.Delete(base36.Encode(issueId)) 221} 222 223// Search searches for issues 224func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) { 225 var queries []query.Query 226 227 if opts.Keyword != "" { 228 queries = append(queries, bleve.NewDisjunctionQuery( 229 bleveutil.MatchAndQuery("title", opts.Keyword, issueIndexerAnalyzer, 0), 230 bleveutil.MatchAndQuery("body", opts.Keyword, issueIndexerAnalyzer, 0), 231 )) 232 } 233 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 234 queries = append(queries, bleveutil.BoolFieldQuery("is_open", opts.IsOpen)) 235 // TODO: append more queries 236 237 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...) 238 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 239 res, err := ix.indexer.SearchInContext(ctx, searchReq) 240 if err != nil { 241 return nil, nil 242 } 243 ret := &SearchResult{ 244 Total: res.Total, 245 Hits: make([]int64, len(res.Hits)), 246 } 247 for i, hit := range res.Hits { 248 id, err := base36.Decode(hit.ID) 249 if err != nil { 250 return nil, err 251 } 252 ret.Hits[i] = id 253 } 254 return ret, nil 255}