Monorepo for Tangled
at master 326 lines 9.3 kB view raw
1// heavily inspired by gitea's model (basically copy-pasted) 2package issues_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 bleveutil "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 "tangled.org/core/appview/pagination" 24 tlog "tangled.org/core/log" 25) 26 27const ( 28 issueIndexerAnalyzer = "issueIndexer" 29 issueIndexerDocType = "issueIndexerDocType" 30 31 unicodeNormalizeName = "uicodeNormalize" 32 33 // Bump this when the index mapping changes to trigger a rebuild. 34 issueIndexerVersion = 3 35) 36 37type Indexer struct { 38 indexer bleve.Index 39 path string 40} 41 42func NewIndexer(indexDir string) *Indexer { 43 return &Indexer{ 44 path: indexDir, 45 } 46} 47 48// Init initializes the indexer 49func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 50 l := tlog.FromContext(ctx) 51 existed, err := ix.intialize(ctx) 52 if err != nil { 53 log.Fatalln("failed to initialize issue indexer", err) 54 } 55 if !existed { 56 l.Debug("Populating the issue indexer") 57 err := PopulateIndexer(ctx, ix, e) 58 if err != nil { 59 log.Fatalln("failed to populate issue indexer", err) 60 } 61 } 62 63 count, _ := ix.indexer.DocCount() 64 l.Info("Initialized the issue indexer", "docCount", count) 65} 66 67func generateIssueIndexMapping() (mapping.IndexMapping, error) { 68 mapping := bleve.NewIndexMapping() 69 docMapping := bleve.NewDocumentMapping() 70 71 textFieldMapping := bleve.NewTextFieldMapping() 72 textFieldMapping.Store = false 73 textFieldMapping.IncludeInAll = false 74 75 boolFieldMapping := bleve.NewBooleanFieldMapping() 76 boolFieldMapping.Store = false 77 boolFieldMapping.IncludeInAll = false 78 79 keywordFieldMapping := bleve.NewKeywordFieldMapping() 80 keywordFieldMapping.Store = false 81 keywordFieldMapping.IncludeInAll = false 82 83 // numericFieldMapping := bleve.NewNumericFieldMapping() 84 85 docMapping.AddFieldMappingsAt("title", textFieldMapping) 86 docMapping.AddFieldMappingsAt("body", textFieldMapping) 87 88 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 89 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 90 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping) 91 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping) 92 docMapping.AddFieldMappingsAt("label_values", keywordFieldMapping) 93 94 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 95 "type": unicodenorm.Name, 96 "form": unicodenorm.NFC, 97 }) 98 if err != nil { 99 return nil, err 100 } 101 102 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 103 "type": custom.Name, 104 "char_filters": []string{}, 105 "tokenizer": unicode.Name, 106 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 107 }) 108 if err != nil { 109 return nil, err 110 } 111 112 mapping.DefaultAnalyzer = issueIndexerAnalyzer 113 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 114 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 115 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 116 117 return mapping, nil 118} 119 120func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 121 if ix.indexer != nil { 122 return false, errors.New("indexer is already initialized") 123 } 124 125 indexer, err := openIndexer(ctx, ix.path, issueIndexerVersion) 126 if err != nil { 127 return false, err 128 } 129 if indexer != nil { 130 ix.indexer = indexer 131 return true, nil 132 } 133 134 mapping, err := generateIssueIndexMapping() 135 if err != nil { 136 return false, err 137 } 138 indexer, err = bleve.New(ix.path, mapping) 139 if err != nil { 140 return false, err 141 } 142 indexer.SetInternal([]byte("mapping_version"), []byte{byte(issueIndexerVersion)}) 143 144 ix.indexer = indexer 145 146 return false, nil 147} 148 149func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) { 150 l := tlog.FromContext(ctx) 151 indexer, err := bleve.Open(path) 152 if err != nil { 153 if errors.Is(err, upsidedown.IncompatibleVersion) { 154 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 155 return nil, os.RemoveAll(path) 156 } 157 return nil, nil 158 } 159 160 storedVersion, _ := indexer.GetInternal([]byte("mapping_version")) 161 if storedVersion == nil || int(storedVersion[0]) != version { 162 l.Info("Indexer mapping version changed, deleting and rebuilding") 163 indexer.Close() 164 return nil, os.RemoveAll(path) 165 } 166 167 return indexer, nil 168} 169 170func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 171 l := tlog.FromContext(ctx) 172 count := 0 173 err := pagination.IterateAll( 174 func(page pagination.Page) ([]models.Issue, error) { 175 return db.GetIssuesPaginated(e, page) 176 }, 177 func(issues []models.Issue) error { 178 count += len(issues) 179 return ix.Index(ctx, issues...) 180 }, 181 ) 182 l.Info("issues indexed", "count", count) 183 return err 184} 185 186type issueData struct { 187 ID int64 `json:"id"` 188 RepoAt string `json:"repo_at"` 189 IssueID int `json:"issue_id"` 190 Title string `json:"title"` 191 Body string `json:"body"` 192 IsOpen bool `json:"is_open"` 193 AuthorDid string `json:"author_did"` 194 Labels []string `json:"labels"` 195 LabelValues []string `json:"label_values"` 196 197 Comments []IssueCommentData `json:"comments"` 198} 199 200func makeIssueData(issue *models.Issue) *issueData { 201 return &issueData{ 202 ID: issue.Id, 203 RepoAt: issue.RepoAt.String(), 204 IssueID: issue.IssueId, 205 Title: issue.Title, 206 Body: issue.Body, 207 IsOpen: issue.Open, 208 AuthorDid: issue.Did, 209 Labels: issue.Labels.LabelNames(), 210 LabelValues: issue.Labels.LabelNameValues(), 211 } 212} 213 214// Type returns the document type, for bleve's mapping.Classifier interface. 215func (i *issueData) Type() string { 216 return issueIndexerDocType 217} 218 219type IssueCommentData struct { 220 Body string `json:"body"` 221} 222 223type SearchResult struct { 224 Hits []int64 225 Total uint64 226} 227 228const maxBatchSize = 20 229 230func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error { 231 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 232 for _, issue := range issues { 233 issueData := makeIssueData(&issue) 234 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil { 235 return err 236 } 237 } 238 return batch.Flush() 239} 240 241func (ix *Indexer) Delete(ctx context.Context, issueId int64) error { 242 return ix.indexer.Delete(base36.Encode(issueId)) 243} 244 245func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) { 246 var musts []query.Query 247 var mustNots []query.Query 248 249 for _, keyword := range opts.Keywords { 250 musts = append(musts, bleve.NewDisjunctionQuery( 251 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 252 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 253 )) 254 } 255 256 for _, phrase := range opts.Phrases { 257 musts = append(musts, bleve.NewDisjunctionQuery( 258 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 259 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 260 )) 261 } 262 263 for _, keyword := range opts.NegatedKeywords { 264 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 265 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 266 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 267 )) 268 } 269 270 for _, phrase := range opts.NegatedPhrases { 271 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 272 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 273 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 274 )) 275 } 276 277 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 278 if opts.IsOpen != nil { 279 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen)) 280 } 281 282 if opts.AuthorDid != "" { 283 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid)) 284 } 285 286 for _, label := range opts.Labels { 287 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label)) 288 } 289 290 for _, did := range opts.NegatedAuthorDids { 291 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", did)) 292 } 293 294 for _, label := range opts.NegatedLabels { 295 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label)) 296 } 297 298 for _, lv := range opts.LabelValues { 299 musts = append(musts, bleveutil.KeywordFieldQuery("label_values", lv)) 300 } 301 302 for _, lv := range opts.NegatedLabelValues { 303 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("label_values", lv)) 304 } 305 306 indexerQuery := bleve.NewBooleanQuery() 307 indexerQuery.AddMust(musts...) 308 indexerQuery.AddMustNot(mustNots...) 309 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 310 res, err := ix.indexer.SearchInContext(ctx, searchReq) 311 if err != nil { 312 return nil, nil 313 } 314 ret := &SearchResult{ 315 Total: res.Total, 316 Hits: make([]int64, len(res.Hits)), 317 } 318 for i, hit := range res.Hits { 319 id, err := base36.Decode(hit.ID) 320 if err != nil { 321 return nil, err 322 } 323 ret.Hits[i] = id 324 } 325 return ret, nil 326}