Monorepo for Tangled
1// heavily inspired by gitea's model (basically copy-pasted)
2package issues_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 bleveutil "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 "tangled.org/core/appview/pagination"
24 tlog "tangled.org/core/log"
25)
26
27const (
28 issueIndexerAnalyzer = "issueIndexer"
29 issueIndexerDocType = "issueIndexerDocType"
30
31 unicodeNormalizeName = "uicodeNormalize"
32
33 // Bump this when the index mapping changes to trigger a rebuild.
34 issueIndexerVersion = 3
35)
36
37type Indexer struct {
38 indexer bleve.Index
39 path string
40}
41
42func NewIndexer(indexDir string) *Indexer {
43 return &Indexer{
44 path: indexDir,
45 }
46}
47
48// Init initializes the indexer
49func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
50 l := tlog.FromContext(ctx)
51 existed, err := ix.intialize(ctx)
52 if err != nil {
53 log.Fatalln("failed to initialize issue indexer", err)
54 }
55 if !existed {
56 l.Debug("Populating the issue indexer")
57 err := PopulateIndexer(ctx, ix, e)
58 if err != nil {
59 log.Fatalln("failed to populate issue indexer", err)
60 }
61 }
62
63 count, _ := ix.indexer.DocCount()
64 l.Info("Initialized the issue indexer", "docCount", count)
65}
66
67func generateIssueIndexMapping() (mapping.IndexMapping, error) {
68 mapping := bleve.NewIndexMapping()
69 docMapping := bleve.NewDocumentMapping()
70
71 textFieldMapping := bleve.NewTextFieldMapping()
72 textFieldMapping.Store = false
73 textFieldMapping.IncludeInAll = false
74
75 boolFieldMapping := bleve.NewBooleanFieldMapping()
76 boolFieldMapping.Store = false
77 boolFieldMapping.IncludeInAll = false
78
79 keywordFieldMapping := bleve.NewKeywordFieldMapping()
80 keywordFieldMapping.Store = false
81 keywordFieldMapping.IncludeInAll = false
82
83 // numericFieldMapping := bleve.NewNumericFieldMapping()
84
85 docMapping.AddFieldMappingsAt("title", textFieldMapping)
86 docMapping.AddFieldMappingsAt("body", textFieldMapping)
87
88 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
89 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
90 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping)
91 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping)
92 docMapping.AddFieldMappingsAt("label_values", keywordFieldMapping)
93
94 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
95 "type": unicodenorm.Name,
96 "form": unicodenorm.NFC,
97 })
98 if err != nil {
99 return nil, err
100 }
101
102 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
103 "type": custom.Name,
104 "char_filters": []string{},
105 "tokenizer": unicode.Name,
106 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
107 })
108 if err != nil {
109 return nil, err
110 }
111
112 mapping.DefaultAnalyzer = issueIndexerAnalyzer
113 mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
114 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
115 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
116
117 return mapping, nil
118}
119
120func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
121 if ix.indexer != nil {
122 return false, errors.New("indexer is already initialized")
123 }
124
125 indexer, err := openIndexer(ctx, ix.path, issueIndexerVersion)
126 if err != nil {
127 return false, err
128 }
129 if indexer != nil {
130 ix.indexer = indexer
131 return true, nil
132 }
133
134 mapping, err := generateIssueIndexMapping()
135 if err != nil {
136 return false, err
137 }
138 indexer, err = bleve.New(ix.path, mapping)
139 if err != nil {
140 return false, err
141 }
142 indexer.SetInternal([]byte("mapping_version"), []byte{byte(issueIndexerVersion)})
143
144 ix.indexer = indexer
145
146 return false, nil
147}
148
149func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) {
150 l := tlog.FromContext(ctx)
151 indexer, err := bleve.Open(path)
152 if err != nil {
153 if errors.Is(err, upsidedown.IncompatibleVersion) {
154 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
155 return nil, os.RemoveAll(path)
156 }
157 return nil, nil
158 }
159
160 storedVersion, _ := indexer.GetInternal([]byte("mapping_version"))
161 if storedVersion == nil || int(storedVersion[0]) != version {
162 l.Info("Indexer mapping version changed, deleting and rebuilding")
163 indexer.Close()
164 return nil, os.RemoveAll(path)
165 }
166
167 return indexer, nil
168}
169
170func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
171 l := tlog.FromContext(ctx)
172 count := 0
173 err := pagination.IterateAll(
174 func(page pagination.Page) ([]models.Issue, error) {
175 return db.GetIssuesPaginated(e, page)
176 },
177 func(issues []models.Issue) error {
178 count += len(issues)
179 return ix.Index(ctx, issues...)
180 },
181 )
182 l.Info("issues indexed", "count", count)
183 return err
184}
185
186type issueData struct {
187 ID int64 `json:"id"`
188 RepoAt string `json:"repo_at"`
189 IssueID int `json:"issue_id"`
190 Title string `json:"title"`
191 Body string `json:"body"`
192 IsOpen bool `json:"is_open"`
193 AuthorDid string `json:"author_did"`
194 Labels []string `json:"labels"`
195 LabelValues []string `json:"label_values"`
196
197 Comments []IssueCommentData `json:"comments"`
198}
199
200func makeIssueData(issue *models.Issue) *issueData {
201 return &issueData{
202 ID: issue.Id,
203 RepoAt: issue.RepoAt.String(),
204 IssueID: issue.IssueId,
205 Title: issue.Title,
206 Body: issue.Body,
207 IsOpen: issue.Open,
208 AuthorDid: issue.Did,
209 Labels: issue.Labels.LabelNames(),
210 LabelValues: issue.Labels.LabelNameValues(),
211 }
212}
213
214// Type returns the document type, for bleve's mapping.Classifier interface.
215func (i *issueData) Type() string {
216 return issueIndexerDocType
217}
218
219type IssueCommentData struct {
220 Body string `json:"body"`
221}
222
223type SearchResult struct {
224 Hits []int64
225 Total uint64
226}
227
228const maxBatchSize = 20
229
230func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error {
231 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
232 for _, issue := range issues {
233 issueData := makeIssueData(&issue)
234 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil {
235 return err
236 }
237 }
238 return batch.Flush()
239}
240
241func (ix *Indexer) Delete(ctx context.Context, issueId int64) error {
242 return ix.indexer.Delete(base36.Encode(issueId))
243}
244
245func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) {
246 var musts []query.Query
247 var mustNots []query.Query
248
249 for _, keyword := range opts.Keywords {
250 musts = append(musts, bleve.NewDisjunctionQuery(
251 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
252 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
253 ))
254 }
255
256 for _, phrase := range opts.Phrases {
257 musts = append(musts, bleve.NewDisjunctionQuery(
258 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
259 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
260 ))
261 }
262
263 for _, keyword := range opts.NegatedKeywords {
264 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
265 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
266 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
267 ))
268 }
269
270 for _, phrase := range opts.NegatedPhrases {
271 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
272 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
273 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
274 ))
275 }
276
277 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
278 if opts.IsOpen != nil {
279 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen))
280 }
281
282 if opts.AuthorDid != "" {
283 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid))
284 }
285
286 for _, label := range opts.Labels {
287 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label))
288 }
289
290 for _, did := range opts.NegatedAuthorDids {
291 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", did))
292 }
293
294 for _, label := range opts.NegatedLabels {
295 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label))
296 }
297
298 for _, lv := range opts.LabelValues {
299 musts = append(musts, bleveutil.KeywordFieldQuery("label_values", lv))
300 }
301
302 for _, lv := range opts.NegatedLabelValues {
303 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("label_values", lv))
304 }
305
306 indexerQuery := bleve.NewBooleanQuery()
307 indexerQuery.AddMust(musts...)
308 indexerQuery.AddMustNot(mustNots...)
309 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
310 res, err := ix.indexer.SearchInContext(ctx, searchReq)
311 if err != nil {
312 return nil, nil
313 }
314 ret := &SearchResult{
315 Total: res.Total,
316 Hits: make([]int64, len(res.Hits)),
317 }
318 for i, hit := range res.Hits {
319 id, err := base36.Decode(hit.ID)
320 if err != nil {
321 return nil, err
322 }
323 ret.Hits[i] = id
324 }
325 return ret, nil
326}