Monorepo for Tangled
at f6ac2d9789063b566b9b50795c2edf1a72f33172 315 lines 8.8 kB view raw
1// heavily inspired by gitea's model (basically copy-pasted) 2package issues_indexer 3 4import ( 5 "context" 6 "errors" 7 "log" 8 "os" 9 10 "github.com/blevesearch/bleve/v2" 11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" 12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase" 13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase" 14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" 15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" 16 "github.com/blevesearch/bleve/v2/index/upsidedown" 17 "github.com/blevesearch/bleve/v2/mapping" 18 "github.com/blevesearch/bleve/v2/search/query" 19 "tangled.org/core/appview/db" 20 "tangled.org/core/appview/indexer/base36" 21 "tangled.org/core/appview/indexer/bleve" 22 "tangled.org/core/appview/models" 23 "tangled.org/core/appview/pagination" 24 tlog "tangled.org/core/log" 25) 26 27const ( 28 issueIndexerAnalyzer = "issueIndexer" 29 issueIndexerDocType = "issueIndexerDocType" 30 31 unicodeNormalizeName = "uicodeNormalize" 32 33 // Bump this when the index mapping changes to trigger a rebuild. 34 issueIndexerVersion = 2 35) 36 37type Indexer struct { 38 indexer bleve.Index 39 path string 40} 41 42func NewIndexer(indexDir string) *Indexer { 43 return &Indexer{ 44 path: indexDir, 45 } 46} 47 48// Init initializes the indexer 49func (ix *Indexer) Init(ctx context.Context, e db.Execer) { 50 l := tlog.FromContext(ctx) 51 existed, err := ix.intialize(ctx) 52 if err != nil { 53 log.Fatalln("failed to initialize issue indexer", err) 54 } 55 if !existed { 56 l.Debug("Populating the issue indexer") 57 err := PopulateIndexer(ctx, ix, e) 58 if err != nil { 59 log.Fatalln("failed to populate issue indexer", err) 60 } 61 } 62 63 count, _ := ix.indexer.DocCount() 64 l.Info("Initialized the issue indexer", "docCount", count) 65} 66 67func generateIssueIndexMapping() (mapping.IndexMapping, error) { 68 mapping := bleve.NewIndexMapping() 69 docMapping := bleve.NewDocumentMapping() 70 71 textFieldMapping := bleve.NewTextFieldMapping() 72 textFieldMapping.Store = false 73 textFieldMapping.IncludeInAll = false 74 75 boolFieldMapping := bleve.NewBooleanFieldMapping() 76 boolFieldMapping.Store = false 77 boolFieldMapping.IncludeInAll = false 78 79 keywordFieldMapping := bleve.NewKeywordFieldMapping() 80 keywordFieldMapping.Store = false 81 keywordFieldMapping.IncludeInAll = false 82 83 // numericFieldMapping := bleve.NewNumericFieldMapping() 84 85 docMapping.AddFieldMappingsAt("title", textFieldMapping) 86 docMapping.AddFieldMappingsAt("body", textFieldMapping) 87 88 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping) 89 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping) 90 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping) 91 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping) 92 93 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{ 94 "type": unicodenorm.Name, 95 "form": unicodenorm.NFC, 96 }) 97 if err != nil { 98 return nil, err 99 } 100 101 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{ 102 "type": custom.Name, 103 "char_filters": []string{}, 104 "tokenizer": unicode.Name, 105 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name}, 106 }) 107 if err != nil { 108 return nil, err 109 } 110 111 mapping.DefaultAnalyzer = issueIndexerAnalyzer 112 mapping.AddDocumentMapping(issueIndexerDocType, docMapping) 113 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) 114 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() 115 116 return mapping, nil 117} 118 119func (ix *Indexer) intialize(ctx context.Context) (bool, error) { 120 if ix.indexer != nil { 121 return false, errors.New("indexer is already initialized") 122 } 123 124 indexer, err := openIndexer(ctx, ix.path, issueIndexerVersion) 125 if err != nil { 126 return false, err 127 } 128 if indexer != nil { 129 ix.indexer = indexer 130 return true, nil 131 } 132 133 mapping, err := generateIssueIndexMapping() 134 if err != nil { 135 return false, err 136 } 137 indexer, err = bleve.New(ix.path, mapping) 138 if err != nil { 139 return false, err 140 } 141 indexer.SetInternal([]byte("mapping_version"), []byte{byte(issueIndexerVersion)}) 142 143 ix.indexer = indexer 144 145 return false, nil 146} 147 148func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) { 149 l := tlog.FromContext(ctx) 150 indexer, err := bleve.Open(path) 151 if err != nil { 152 if errors.Is(err, upsidedown.IncompatibleVersion) { 153 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding") 154 return nil, os.RemoveAll(path) 155 } 156 return nil, nil 157 } 158 159 storedVersion, _ := indexer.GetInternal([]byte("mapping_version")) 160 if storedVersion == nil || int(storedVersion[0]) != version { 161 l.Info("Indexer mapping version changed, deleting and rebuilding") 162 indexer.Close() 163 return nil, os.RemoveAll(path) 164 } 165 166 return indexer, nil 167} 168 169func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error { 170 l := tlog.FromContext(ctx) 171 count := 0 172 err := pagination.IterateAll( 173 func(page pagination.Page) ([]models.Issue, error) { 174 return db.GetIssuesPaginated(e, page) 175 }, 176 func(issues []models.Issue) error { 177 count += len(issues) 178 return ix.Index(ctx, issues...) 179 }, 180 ) 181 l.Info("issues indexed", "count", count) 182 return err 183} 184 185type issueData struct { 186 ID int64 `json:"id"` 187 RepoAt string `json:"repo_at"` 188 IssueID int `json:"issue_id"` 189 Title string `json:"title"` 190 Body string `json:"body"` 191 IsOpen bool `json:"is_open"` 192 AuthorDid string `json:"author_did"` 193 Labels []string `json:"labels"` 194 195 Comments []IssueCommentData `json:"comments"` 196} 197 198func makeIssueData(issue *models.Issue) *issueData { 199 return &issueData{ 200 ID: issue.Id, 201 RepoAt: issue.RepoAt.String(), 202 IssueID: issue.IssueId, 203 Title: issue.Title, 204 Body: issue.Body, 205 IsOpen: issue.Open, 206 AuthorDid: issue.Did, 207 Labels: issue.Labels.LabelNames(), 208 } 209} 210 211// Type returns the document type, for bleve's mapping.Classifier interface. 212func (i *issueData) Type() string { 213 return issueIndexerDocType 214} 215 216type IssueCommentData struct { 217 Body string `json:"body"` 218} 219 220type SearchResult struct { 221 Hits []int64 222 Total uint64 223} 224 225const maxBatchSize = 20 226 227func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error { 228 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize) 229 for _, issue := range issues { 230 issueData := makeIssueData(&issue) 231 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil { 232 return err 233 } 234 } 235 return batch.Flush() 236} 237 238func (ix *Indexer) Delete(ctx context.Context, issueId int64) error { 239 return ix.indexer.Delete(base36.Encode(issueId)) 240} 241 242func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) { 243 var musts []query.Query 244 var mustNots []query.Query 245 246 for _, keyword := range opts.Keywords { 247 musts = append(musts, bleve.NewDisjunctionQuery( 248 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 249 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 250 )) 251 } 252 253 for _, phrase := range opts.Phrases { 254 musts = append(musts, bleve.NewDisjunctionQuery( 255 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 256 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 257 )) 258 } 259 260 for _, keyword := range opts.NegatedKeywords { 261 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 262 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0), 263 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0), 264 )) 265 } 266 267 for _, phrase := range opts.NegatedPhrases { 268 mustNots = append(mustNots, bleve.NewDisjunctionQuery( 269 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer), 270 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer), 271 )) 272 } 273 274 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt)) 275 if opts.IsOpen != nil { 276 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen)) 277 } 278 279 if opts.AuthorDid != "" { 280 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid)) 281 } 282 283 for _, label := range opts.Labels { 284 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label)) 285 } 286 287 if opts.NegatedAuthorDid != "" { 288 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid)) 289 } 290 291 for _, label := range opts.NegatedLabels { 292 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label)) 293 } 294 295 indexerQuery := bleve.NewBooleanQuery() 296 indexerQuery.AddMust(musts...) 297 indexerQuery.AddMustNot(mustNots...) 298 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false) 299 res, err := ix.indexer.SearchInContext(ctx, searchReq) 300 if err != nil { 301 return nil, nil 302 } 303 ret := &SearchResult{ 304 Total: res.Total, 305 Hits: make([]int64, len(res.Hits)), 306 } 307 for i, hit := range res.Hits { 308 id, err := base36.Decode(hit.ID) 309 if err != nil { 310 return nil, err 311 } 312 ret.Hits[i] = id 313 } 314 return ret, nil 315}