this repo has no description
1// heavily inspired by gitea's model (basically copy-pasted)
2package issues_indexer
3
4import (
5 "context"
6 "errors"
7 "log"
8 "os"
9
10 "github.com/blevesearch/bleve/v2"
11 "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
12 "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
13 "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
14 "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
15 "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
16 "github.com/blevesearch/bleve/v2/index/upsidedown"
17 "github.com/blevesearch/bleve/v2/mapping"
18 "github.com/blevesearch/bleve/v2/search/query"
19 "tangled.org/core/appview/db"
20 "tangled.org/core/appview/indexer/base36"
21 "tangled.org/core/appview/indexer/bleve"
22 "tangled.org/core/appview/models"
23 "tangled.org/core/appview/pagination"
24 tlog "tangled.org/core/log"
25)
26
27const (
28 issueIndexerAnalyzer = "issueIndexer"
29 issueIndexerDocType = "issueIndexerDocType"
30
31 unicodeNormalizeName = "uicodeNormalize"
32)
33
34type Indexer struct {
35 indexer bleve.Index
36 path string
37}
38
39func NewIndexer(indexDir string) *Indexer {
40 return &Indexer{
41 path: indexDir,
42 }
43}
44
45// Init initializes the indexer
46func (ix *Indexer) Init(ctx context.Context, e db.Execer) {
47 l := tlog.FromContext(ctx)
48 existed, err := ix.intialize(ctx)
49 if err != nil {
50 log.Fatalln("failed to initialize issue indexer", err)
51 }
52 if !existed {
53 l.Debug("Populating the issue indexer")
54 err := PopulateIndexer(ctx, ix, e)
55 if err != nil {
56 log.Fatalln("failed to populate issue indexer", err)
57 }
58 }
59 l.Info("Initialized the issue indexer")
60}
61
62func generateIssueIndexMapping() (mapping.IndexMapping, error) {
63 mapping := bleve.NewIndexMapping()
64 docMapping := bleve.NewDocumentMapping()
65
66 textFieldMapping := bleve.NewTextFieldMapping()
67 textFieldMapping.Store = false
68 textFieldMapping.IncludeInAll = false
69
70 boolFieldMapping := bleve.NewBooleanFieldMapping()
71 boolFieldMapping.Store = false
72 boolFieldMapping.IncludeInAll = false
73
74 keywordFieldMapping := bleve.NewKeywordFieldMapping()
75 keywordFieldMapping.Store = false
76 keywordFieldMapping.IncludeInAll = false
77
78 // numericFieldMapping := bleve.NewNumericFieldMapping()
79
80 docMapping.AddFieldMappingsAt("title", textFieldMapping)
81 docMapping.AddFieldMappingsAt("body", textFieldMapping)
82
83 docMapping.AddFieldMappingsAt("repo_at", keywordFieldMapping)
84 docMapping.AddFieldMappingsAt("is_open", boolFieldMapping)
85
86 err := mapping.AddCustomTokenFilter(unicodeNormalizeName, map[string]any{
87 "type": unicodenorm.Name,
88 "form": unicodenorm.NFC,
89 })
90 if err != nil {
91 return nil, err
92 }
93
94 err = mapping.AddCustomAnalyzer(issueIndexerAnalyzer, map[string]any{
95 "type": custom.Name,
96 "char_filters": []string{},
97 "tokenizer": unicode.Name,
98 "token_filters": []string{unicodeNormalizeName, camelcase.Name, lowercase.Name},
99 })
100 if err != nil {
101 return nil, err
102 }
103
104 mapping.DefaultAnalyzer = issueIndexerAnalyzer
105 mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
106 mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
107 mapping.DefaultMapping = bleve.NewDocumentDisabledMapping()
108
109 return mapping, nil
110}
111
112func (ix *Indexer) intialize(ctx context.Context) (bool, error) {
113 if ix.indexer != nil {
114 return false, errors.New("indexer is already initialized")
115 }
116
117 indexer, err := openIndexer(ctx, ix.path)
118 if err != nil {
119 return false, err
120 }
121 if indexer != nil {
122 ix.indexer = indexer
123 return true, nil
124 }
125
126 mapping, err := generateIssueIndexMapping()
127 if err != nil {
128 return false, err
129 }
130 indexer, err = bleve.New(ix.path, mapping)
131 if err != nil {
132 return false, err
133 }
134
135 ix.indexer = indexer
136
137 return false, nil
138}
139
140func openIndexer(ctx context.Context, path string) (bleve.Index, error) {
141 l := tlog.FromContext(ctx)
142 indexer, err := bleve.Open(path)
143 if err != nil {
144 if errors.Is(err, upsidedown.IncompatibleVersion) {
145 l.Info("Indexer was built with a previous version of bleve, deleting and rebuilding")
146 return nil, os.RemoveAll(path)
147 }
148 return nil, nil
149 }
150 return indexer, nil
151}
152
153func PopulateIndexer(ctx context.Context, ix *Indexer, e db.Execer) error {
154 l := tlog.FromContext(ctx)
155 count := 0
156 err := pagination.IterateAll(
157 func(page pagination.Page) ([]models.Issue, error) {
158 return db.GetIssuesPaginated(e, page)
159 },
160 func(issues []models.Issue) error {
161 count += len(issues)
162 return ix.Index(ctx, issues...)
163 },
164 )
165 l.Info("issues indexed", "count", count)
166 return err
167}
168
169// issueData data stored and will be indexed
170type issueData struct {
171 ID int64 `json:"id"`
172 RepoAt string `json:"repo_at"`
173 IssueID int `json:"issue_id"`
174 Title string `json:"title"`
175 Body string `json:"body"`
176
177 IsOpen bool `json:"is_open"`
178 Comments []IssueCommentData `json:"comments"`
179}
180
181func makeIssueData(issue *models.Issue) *issueData {
182 return &issueData{
183 ID: issue.Id,
184 RepoAt: issue.RepoAt.String(),
185 IssueID: issue.IssueId,
186 Title: issue.Title,
187 Body: issue.Body,
188 IsOpen: issue.Open,
189 }
190}
191
192// Type returns the document type, for bleve's mapping.Classifier interface.
193func (i *issueData) Type() string {
194 return issueIndexerDocType
195}
196
197type IssueCommentData struct {
198 Body string `json:"body"`
199}
200
201type SearchResult struct {
202 Hits []int64
203 Total uint64
204}
205
206const maxBatchSize = 20
207
208func (ix *Indexer) Index(ctx context.Context, issues ...models.Issue) error {
209 batch := bleveutil.NewFlushingBatch(ix.indexer, maxBatchSize)
210 for _, issue := range issues {
211 issueData := makeIssueData(&issue)
212 if err := batch.Index(base36.Encode(issue.Id), issueData); err != nil {
213 return err
214 }
215 }
216 return batch.Flush()
217}
218
219func (ix *Indexer) Delete(ctx context.Context, issueId int64) error {
220 return ix.indexer.Delete(base36.Encode(issueId))
221}
222
223// Search searches for issues
224func (ix *Indexer) Search(ctx context.Context, opts models.IssueSearchOptions) (*SearchResult, error) {
225 var queries []query.Query
226
227 if opts.Keyword != "" {
228 queries = append(queries, bleve.NewDisjunctionQuery(
229 bleveutil.MatchAndQuery("title", opts.Keyword, issueIndexerAnalyzer, 0),
230 bleveutil.MatchAndQuery("body", opts.Keyword, issueIndexerAnalyzer, 0),
231 ))
232 }
233 queries = append(queries, bleveutil.KeywordFieldQuery("repo_at", opts.RepoAt))
234 queries = append(queries, bleveutil.BoolFieldQuery("is_open", opts.IsOpen))
235 // TODO: append more queries
236
237 var indexerQuery query.Query = bleve.NewConjunctionQuery(queries...)
238 searchReq := bleve.NewSearchRequestOptions(indexerQuery, opts.Page.Limit, opts.Page.Offset, false)
239 res, err := ix.indexer.SearchInContext(ctx, searchReq)
240 if err != nil {
241 return nil, nil
242 }
243 ret := &SearchResult{
244 Total: res.Total,
245 Hits: make([]int64, len(res.Hits)),
246 }
247 for i, hit := range res.Hits {
248 id, err := base36.Decode(hit.ID)
249 if err != nil {
250 return nil, err
251 }
252 ret.Hits[i] = id
253 }
254 return ret, nil
255}