Monorepo for Tangled
1// heavily inspired by gitea's model (basically copy-pasted)
2package issues_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 "tangled.org/core/appview/pagination"
24 tlog "tangled.org/core/log"
25)
26
27const (
28 issueIndexerAnalyzer = "issueIndexer"
29 issueIndexerDocType = "issueIndexerDocType"
30
31 unicodeNormalizeName = "uicodeNormalize"
32
33 // Bump this when the index mapping changes to trigger a rebuild.
34 issueIndexerVersion = 2
35)
36
37type Indexer struct {
38 indexer bleve.Index
39 path string
40}
41
42func NewIndexer(indexDir string) *Indexer {
43 return &Indexer{
44 path: indexDir,
45 }
46}
47
48// Init initializes the indexer
49func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
50 l := tlog.FromContext(ctx)
51 existed, err := ix.intialize(ctx)
52 if err != nil {
53 log.Fatalln("failed to initialize issue indexer", err)
54 }
55 if !existed {
56 l.Debug("Populating the issue indexer")
57 err := PopulateIndexer(ctx, ix, e)
58 if err != nil {
59 log.Fatalln("failed to populate issue indexer", err)
60 }
61 }
62
63 count, _ := ix.indexer.DocCount()
64 l.Info("Initialized the issue indexer", "docCount", count)
65}
66
67func generateIssueIndexMapping() (mapping.IndexMapping, error) {
68 mapping := bleve.NewIndexMapping()
69 docMapping := bleve.NewDocumentMapping()
70
71 textFieldMapping := bleve.NewTextFieldMapping()
72 textFieldMapping.Store = false
73 textFieldMapping.IncludeInAll = false
74
75 boolFieldMapping := bleve.NewBooleanFieldMapping()
76 boolFieldMapping.Store = false
77 boolFieldMapping.IncludeInAll = false
78
79 keywordFieldMapping := bleve.NewKeywordFieldMapping()
80 keywordFieldMapping.Store = false
81 keywordFieldMapping.IncludeInAll = false
82
83 // numericFieldMapping := bleve.NewNumericFieldMapping()
84
85 docMapping.AddFieldMappingsAt("title", textFieldMapping)
86 docMapping.AddFieldMappingsAt("body", textFieldMapping)
87
88 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
89 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
90 docMapping.AddFieldMappingsAt("author_did", keywordFieldMapping)
91 docMapping.AddFieldMappingsAt("labels", keywordFieldMapping)
92
93 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
94 "type": unicodenorm.Name,
95 "form": unicodenorm.NFC,
96 })
97 if err != nil {
98 return nil, err
99 }
100
101 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
102 "type": custom.Name,
103 "char_filters": []string{},
104 "tokenizer": unicode.Name,
105 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
106 })
107 if err != nil {
108 return nil, err
109 }
110
111 mapping.DefaultAnalyzer = issueIndexerAnalyzer
112 mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
113 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
114 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
115
116 return mapping, nil
117}
118
119func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
120 if ix.indexer != nil {
121 return false, errors.New("indexer is already initialized")
122 }
123
124 indexer, err := openIndexer(ctx, ix.path, issueIndexerVersion)
125 if err != nil {
126 return false, err
127 }
128 if indexer != nil {
129 ix.indexer = indexer
130 return true, nil
131 }
132
133 mapping, err := generateIssueIndexMapping()
134 if err != nil {
135 return false, err
136 }
137 indexer, err = bleve.New(ix.path, mapping)
138 if err != nil {
139 return false, err
140 }
141 indexer.SetInternal([]byte("mapping_version"), []byte{byte(issueIndexerVersion)})
142
143 ix.indexer = indexer
144
145 return false, nil
146}
147
148func openIndexer(ctx context.Context, path string, version int) (bleve.Index, error) {
149 l := tlog.FromContext(ctx)
150 indexer, err := bleve.Open(path)
151 if err != nil {
152 if errors.Is(err, upsidedown.IncompatibleVersion) {
153 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
154 return nil, os.RemoveAll(path)
155 }
156 return nil, nil
157 }
158
159 storedVersion, _ := indexer.GetInternal([]byte("mapping_version"))
160 if storedVersion == nil || int(storedVersion[0]) != version {
161 l.Info("Indexer mapping version changed, deleting and rebuilding")
162 indexer.Close()
163 return nil, os.RemoveAll(path)
164 }
165
166 return indexer, nil
167}
168
169func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
170 l := tlog.FromContext(ctx)
171 count := 0
172 err := pagination.IterateAll(
173 func(page pagination.Page) ([]models.Issue, error) {
174 return db.GetIssuesPaginated(e, page)
175 },
176 func(issues []models.Issue) error {
177 count += len(issues)
178 return ix.Index(ctx, issues...)
179 },
180 )
181 l.Info("issues indexed", "count", count)
182 return err
183}
184
185type issueData struct {
186 ID int64 `json:"id"`
187 RepoAt string `json:"repo_at"`
188 IssueID int `json:"issue_id"`
189 Title string `json:"title"`
190 Body string `json:"body"`
191 IsOpen bool `json:"is_open"`
192 AuthorDid string `json:"author_did"`
193 Labels []string `json:"labels"`
194
195 Comments []IssueCommentData `json:"comments"`
196}
197
198func makeIssueData(issue *models.Issue) *issueData {
199 return &issueData{
200 ID: issue.Id,
201 RepoAt: issue.RepoAt.String(),
202 IssueID: issue.IssueId,
203 Title: issue.Title,
204 Body: issue.Body,
205 IsOpen: issue.Open,
206 AuthorDid: issue.Did,
207 Labels: issue.Labels.LabelNames(),
208 }
209}
210
211// Type returns the document type, for bleve's mapping.Classifier interface.
212func (i *issueData) Type() string {
213 return issueIndexerDocType
214}
215
216type IssueCommentData struct {
217 Body string `json:"body"`
218}
219
220type SearchResult struct {
221 Hits []int64
222 Total uint64
223}
224
225const maxBatchSize = 20
226
227func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error {
228 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
229 for _, issue := range issues {
230 issueData := makeIssueData(&issue)
231 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil {
232 return err
233 }
234 }
235 return batch.Flush()
236}
237
238func (ix *Indexer) Delete(ctx context.Context, issueId int64) error {
239 return ix.indexer.Delete(base36.Encode(issueId))
240}
241
242func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) {
243 var musts []query.Query
244 var mustNots []query.Query
245
246 for _, keyword := range opts.Keywords {
247 musts = append(musts, bleve.NewDisjunctionQuery(
248 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
249 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
250 ))
251 }
252
253 for _, phrase := range opts.Phrases {
254 musts = append(musts, bleve.NewDisjunctionQuery(
255 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
256 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
257 ))
258 }
259
260 for _, keyword := range opts.NegatedKeywords {
261 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
262 bleveutil.MatchAndQuery("title", keyword, issueIndexerAnalyzer, 0),
263 bleveutil.MatchAndQuery("body", keyword, issueIndexerAnalyzer, 0),
264 ))
265 }
266
267 for _, phrase := range opts.NegatedPhrases {
268 mustNots = append(mustNots, bleve.NewDisjunctionQuery(
269 bleveutil.MatchPhraseQuery("title", phrase, issueIndexerAnalyzer),
270 bleveutil.MatchPhraseQuery("body", phrase, issueIndexerAnalyzer),
271 ))
272 }
273
274 musts = append(musts, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
275 if opts.IsOpen != nil {
276 musts = append(musts, bleveutil.BoolFieldQuery("is_open", *opts.IsOpen))
277 }
278
279 if opts.AuthorDid != "" {
280 musts = append(musts, bleveutil.KeywordFieldQuery("author_did", opts.AuthorDid))
281 }
282
283 for _, label := range opts.Labels {
284 musts = append(musts, bleveutil.KeywordFieldQuery("labels", label))
285 }
286
287 if opts.NegatedAuthorDid != "" {
288 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("author_did", opts.NegatedAuthorDid))
289 }
290
291 for _, label := range opts.NegatedLabels {
292 mustNots = append(mustNots, bleveutil.KeywordFieldQuery("labels", label))
293 }
294
295 indexerQuery := bleve.NewBooleanQuery()
296 indexerQuery.AddMust(musts...)
297 indexerQuery.AddMustNot(mustNots...)
298 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
299 res, err := ix.indexer.SearchInContext(ctx, searchReq)
300 if err != nil {
301 return nil, nil
302 }
303 ret := &SearchResult{
304 Total: res.Total,
305 Hits: make([]int64, len(res.Hits)),
306 }
307 for i, hit := range res.Hits {
308 id, err := base36.Decode(hit.ID)
309 if err != nil {
310 return nil, err
311 }
312 ret.Hits[i] = id
313 }
314 return ret, nil
315}