A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
1// SiYuan - Refactor your thinking
2// Copyright (c) 2020-present, b3log.org
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17package model
18
19import (
20 "bytes"
21 "fmt"
22 "io/fs"
23 "path/filepath"
24 "runtime"
25 "runtime/debug"
26 "strings"
27 "sync"
28 "time"
29
30 "github.com/88250/go-humanize"
31 "github.com/88250/gulu"
32 "github.com/88250/lute/ast"
33 "github.com/88250/lute/editor"
34 "github.com/88250/lute/html"
35 "github.com/88250/lute/parse"
36 "github.com/panjf2000/ants/v2"
37 "github.com/siyuan-note/eventbus"
38 "github.com/siyuan-note/filelock"
39 "github.com/siyuan-note/logging"
40 "github.com/siyuan-note/siyuan/kernel/av"
41 "github.com/siyuan-note/siyuan/kernel/cache"
42 "github.com/siyuan-note/siyuan/kernel/filesys"
43 "github.com/siyuan-note/siyuan/kernel/sql"
44 "github.com/siyuan-note/siyuan/kernel/task"
45 "github.com/siyuan-note/siyuan/kernel/treenode"
46 "github.com/siyuan-note/siyuan/kernel/util"
47)
48
49func UpsertIndexes(paths []string) {
50 var syFiles []string
51 for _, p := range paths {
52 if strings.HasSuffix(p, "/") {
53 syFiles = append(syFiles, listSyFiles(p)...)
54 continue
55 }
56
57 if strings.HasSuffix(p, ".sy") {
58 syFiles = append(syFiles, p)
59 }
60 }
61
62 syFiles = gulu.Str.RemoveDuplicatedElem(syFiles)
63 upsertIndexes(syFiles)
64}
65
66func RemoveIndexes(paths []string) {
67 var syFiles []string
68 for _, p := range paths {
69 if strings.HasSuffix(p, "/") {
70 syFiles = append(syFiles, listSyFiles(p)...)
71 continue
72 }
73
74 if strings.HasSuffix(p, ".sy") {
75 syFiles = append(syFiles, p)
76 }
77 }
78
79 syFiles = gulu.Str.RemoveDuplicatedElem(syFiles)
80 removeIndexes(syFiles)
81}
82
83func listSyFiles(dir string) (ret []string) {
84 dirPath := filepath.Join(util.DataDir, dir)
85 err := filelock.Walk(dirPath, func(path string, d fs.DirEntry, err error) error {
86 if err != nil {
87 logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err)
88 return err
89 }
90
91 if d.IsDir() {
92 return nil
93 }
94
95 if strings.HasSuffix(path, ".sy") {
96 p := filepath.ToSlash(strings.TrimPrefix(path, util.DataDir))
97 ret = append(ret, p)
98 }
99 return nil
100 })
101 if err != nil {
102 logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err)
103 }
104 return
105}
106
107func (box *Box) Unindex() {
108 task.AppendTask(task.DatabaseIndex, unindex, box.ID)
109 go func() {
110 sql.FlushQueue()
111 ResetVirtualBlockRefCache()
112 }()
113}
114
115func unindex(boxID string) {
116 ids := treenode.RemoveBlockTreesByBoxID(boxID)
117 RemoveRecentDoc(ids)
118 sql.DeleteBoxQueue(boxID)
119}
120
121func (box *Box) Index() {
122 task.AppendTask(task.DatabaseIndexRef, removeBoxRefs, box.ID)
123 task.AppendTask(task.DatabaseIndex, indexBox, box.ID)
124 task.AppendTask(task.DatabaseIndexRef, IndexRefs)
125 go func() {
126 sql.FlushQueue()
127 ResetVirtualBlockRefCache()
128 }()
129}
130
131func removeBoxRefs(boxID string) {
132 sql.DeleteBoxRefsQueue(boxID)
133}
134
135func indexBox(boxID string) {
136 box := Conf.Box(boxID)
137 if nil == box {
138 return
139 }
140
141 util.SetBootDetails("Listing files...")
142 files := box.ListFiles("/")
143 boxLen := len(Conf.GetOpenedBoxes())
144 if 1 > boxLen {
145 boxLen = 1
146 }
147 bootProgressPart := int32(30.0 / float64(boxLen) / float64(len(files)))
148
149 start := time.Now()
150 luteEngine := util.NewLute()
151 var treeCount int
152 var treeSize int64
153 lock := sync.Mutex{}
154 util.PushStatusBar(fmt.Sprintf("["+html.EscapeString(box.Name)+"] "+Conf.Language(64), len(files)))
155
156 poolSize := runtime.NumCPU()
157 if 4 < poolSize {
158 poolSize = 4
159 }
160 waitGroup := &sync.WaitGroup{}
161 var avNodes []*ast.Node
162 p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) {
163 defer waitGroup.Done()
164
165 file := arg.(*FileInfo)
166 lock.Lock()
167 treeSize += file.size
168 treeCount++
169 i := treeCount
170 lock.Unlock()
171 tree, err := filesys.LoadTree(box.ID, file.path, luteEngine)
172 if err != nil {
173 logging.LogErrorf("read box [%s] tree [%s] failed: %s", box.ID, file.path, err)
174 return
175 }
176
177 docIAL := parse.IAL2MapUnEsc(tree.Root.KramdownIAL)
178 if "" == docIAL["updated"] { // 早期的数据可能没有 updated 属性,这里进行订正
179 updated := util.TimeFromID(tree.Root.ID)
180 tree.Root.SetIALAttr("updated", updated)
181 docIAL["updated"] = updated
182 if _, writeErr := filesys.WriteTree(tree); nil != writeErr {
183 logging.LogErrorf("write tree [%s] failed: %s", tree.Path, writeErr)
184 }
185 }
186
187 lock.Lock()
188 avNodes = append(avNodes, tree.Root.ChildrenByType(ast.NodeAttributeView)...)
189 lock.Unlock()
190
191 cache.PutDocIAL(file.path, docIAL)
192 treenode.IndexBlockTree(tree)
193 sql.IndexTreeQueue(tree)
194 util.IncBootProgress(bootProgressPart, fmt.Sprintf(Conf.Language(92), util.ShortPathForBootingDisplay(tree.Path)))
195 if 1 < i && 0 == i%64 {
196 util.PushStatusBar(fmt.Sprintf(Conf.Language(88), i, (len(files))-i))
197 }
198 })
199 for _, file := range files {
200 if file.isdir || !strings.HasSuffix(file.name, ".sy") {
201 continue
202 }
203
204 if !ast.IsNodeIDPattern(strings.TrimSuffix(file.name, ".sy")) {
205 // 不以块 ID 命名的 .sy 文件不应该被加载到思源中 https://github.com/siyuan-note/siyuan/issues/16089
206 continue
207 }
208
209 waitGroup.Add(1)
210 invokeErr := p.Invoke(file)
211 if nil != invokeErr {
212 logging.LogErrorf("invoke [%s] failed: %s", file.path, invokeErr)
213 continue
214 }
215 }
216 waitGroup.Wait()
217 p.Release()
218
219 // 关联数据库和块
220 av.BatchUpsertBlockRel(avNodes)
221
222 box.UpdateHistoryGenerated() // 初始化历史生成时间为当前时间
223 end := time.Now()
224 elapsed := end.Sub(start).Seconds()
225 logging.LogInfof("rebuilt database for notebook [%s] in [%.2fs], tree [count=%d, size=%s]", box.ID, elapsed, treeCount, humanize.BytesCustomCeil(uint64(treeSize), 2))
226 debug.FreeOSMemory()
227 return
228}
229
230func IndexRefs() {
231 start := time.Now()
232 util.SetBootDetails("Resolving refs...")
233 util.PushStatusBar(Conf.Language(54))
234 util.SetBootDetails("Indexing refs...")
235
236 var defBlockIDs []string
237 luteEngine := util.NewLute()
238 boxes := Conf.GetOpenedBoxes()
239 for _, box := range boxes {
240 pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32)
241 for _, paths := range pages {
242 for _, treeAbsPath := range paths {
243 data, readErr := filelock.ReadFile(treeAbsPath)
244 if nil != readErr {
245 logging.LogWarnf("get data [path=%s] failed: %s", treeAbsPath, readErr)
246 continue
247 }
248
249 if !bytes.Contains(data, []byte("TextMarkBlockRefID")) && !bytes.Contains(data, []byte("TextMarkFileAnnotationRefID")) {
250 continue
251 }
252
253 p := filepath.ToSlash(strings.TrimPrefix(treeAbsPath, filepath.Join(util.DataDir, box.ID)))
254 tree, parseErr := filesys.LoadTreeByData(data, box.ID, p, luteEngine)
255 if nil != parseErr {
256 logging.LogWarnf("parse json to tree [%s] failed: %s", treeAbsPath, parseErr)
257 continue
258 }
259
260 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
261 if !entering {
262 return ast.WalkContinue
263 }
264
265 if treenode.IsBlockRef(n) || treenode.IsFileAnnotationRef(n) {
266 defBlockIDs = append(defBlockIDs, tree.Root.ID)
267 }
268 return ast.WalkContinue
269 })
270 }
271 }
272 }
273
274 defBlockIDs = gulu.Str.RemoveDuplicatedElem(defBlockIDs)
275
276 i := 0
277 size := len(defBlockIDs)
278 if 0 < size {
279 bootProgressPart := int32(10.0 / float64(size))
280
281 for _, defBlockID := range defBlockIDs {
282 defTree, loadErr := LoadTreeByBlockID(defBlockID)
283 if nil != loadErr {
284 continue
285 }
286
287 util.IncBootProgress(bootProgressPart, "Indexing ref "+defTree.ID)
288 sql.UpdateRefsTreeQueue(defTree)
289 if 1 < i && 0 == i%64 {
290 util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i))
291 }
292 i++
293 }
294 }
295 logging.LogInfof("resolved refs [%d] in [%dms]", size, time.Now().Sub(start).Milliseconds())
296 util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i))
297}
298
299var indexEmbedBlockLock = sync.Mutex{}
300
301// IndexEmbedBlockJob 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112
302func IndexEmbedBlockJob() {
303 task.AppendTaskWithTimeout(task.DatabaseIndexEmbedBlock, 30*time.Second, autoIndexEmbedBlock)
304}
305
306func autoIndexEmbedBlock() {
307 indexEmbedBlockLock.Lock()
308 defer indexEmbedBlockLock.Unlock()
309
310 embedBlocks := sql.QueryEmptyContentEmbedBlocks()
311 for i, embedBlock := range embedBlocks {
312 markdown := strings.TrimSpace(embedBlock.Markdown)
313 markdown = strings.TrimPrefix(markdown, "{{")
314 stmt := strings.TrimSuffix(markdown, "}}")
315
316 // 嵌入块的 Markdown 内容需要反转义
317 stmt = html.UnescapeString(stmt)
318 stmt = strings.ReplaceAll(stmt, editor.IALValEscNewLine, "\n")
319
320 // 需要移除首尾的空白字符以判断是否具有 //!js 标记
321 stmt = strings.TrimSpace(stmt)
322 if strings.HasPrefix(stmt, "//!js") {
323 // https://github.com/siyuan-note/siyuan/issues/9648
324 // js 嵌入块不支持自动索引,由前端主动调用 /api/search/updateEmbedBlock 接口更新内容 https://github.com/siyuan-note/siyuan/issues/9736
325 continue
326 }
327
328 if !strings.Contains(strings.ToLower(stmt), "select") {
329 continue
330 }
331
332 queryResultBlocks := sql.SelectBlocksRawStmtNoParse(stmt, 102400)
333 for _, block := range queryResultBlocks {
334 embedBlock.Content += block.Content
335 }
336 if "" == embedBlock.Content {
337 embedBlock.Content = "no query result"
338 }
339 sql.UpdateBlockContentQueue(embedBlock)
340
341 if 63 <= i { // 一次任务中最多处理 64 个嵌入块,防止卡顿
342 break
343 }
344 }
345}
346
347func updateEmbedBlockContent(embedBlockID string, queryResultBlocks []*EmbedBlock) {
348 embedBlock := sql.GetBlock(embedBlockID)
349 if nil == embedBlock {
350 return
351 }
352
353 embedBlock.Content = "" // 嵌入块每查询一次多一个结果 https://github.com/siyuan-note/siyuan/issues/7196
354 for _, block := range queryResultBlocks {
355 embedBlock.Content += block.Block.Markdown
356 }
357 if "" == embedBlock.Content {
358 embedBlock.Content = "no query result"
359 }
360 sql.UpdateBlockContentQueue(embedBlock)
361}
362
363func init() {
364 subscribeSQLEvents()
365}
366
367var (
368 pushSQLInsertBlocksFTSMsg bool
369 pushSQLDeleteBlocksMsg bool
370)
371
372func subscribeSQLEvents() {
373 // 使用下面的 EvtSQLInsertBlocksFTS 就可以了
374 //eventbus.Subscribe(eventbus.EvtSQLInsertBlocks, func(context map[string]interface{}, current, total, blockCount int, hash string) {
375 //
376 // msg := fmt.Sprintf(Conf.Language(89), current, total, blockCount, hash)
377 // util.SetBootDetails(msg)
378 // util.ContextPushMsg(context, msg)
379 //})
380 eventbus.Subscribe(eventbus.EvtSQLInsertBlocksFTS, func(context map[string]interface{}, blockCount int, hash string) {
381 if !pushSQLInsertBlocksFTSMsg {
382 return
383 }
384
385 current := context["current"].(int)
386 total := context["total"]
387 msg := fmt.Sprintf(Conf.Language(90), current, total, blockCount, hash)
388 util.SetBootDetails(msg)
389 util.ContextPushMsg(context, msg)
390 })
391 eventbus.Subscribe(eventbus.EvtSQLDeleteBlocks, func(context map[string]interface{}, rootID string) {
392 if !pushSQLDeleteBlocksMsg {
393 return
394 }
395
396 current := context["current"].(int)
397 total := context["total"]
398 msg := fmt.Sprintf(Conf.Language(93), current, total, rootID)
399 util.SetBootDetails(msg)
400 util.ContextPushMsg(context, msg)
401 })
402 eventbus.Subscribe(eventbus.EvtSQLUpdateBlocksHPaths, func(context map[string]interface{}, blockCount int, hash string) {
403 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container {
404 return
405 }
406
407 current := context["current"].(int)
408 total := context["total"]
409 msg := fmt.Sprintf(Conf.Language(234), current, total, blockCount, hash)
410 util.SetBootDetails(msg)
411 util.ContextPushMsg(context, msg)
412 })
413
414 eventbus.Subscribe(eventbus.EvtSQLInsertHistory, func(context map[string]interface{}) {
415 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container {
416 return
417 }
418
419 current := context["current"].(int)
420 total := context["total"]
421 msg := fmt.Sprintf(Conf.Language(191), current, total)
422 util.SetBootDetails(msg)
423 util.ContextPushMsg(context, msg)
424 })
425
426 eventbus.Subscribe(eventbus.EvtSQLInsertAssetContent, func(context map[string]interface{}) {
427 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container {
428 return
429 }
430
431 current := context["current"].(int)
432 total := context["total"]
433 msg := fmt.Sprintf(Conf.Language(217), current, total)
434 util.SetBootDetails(msg)
435 util.ContextPushMsg(context, msg)
436 })
437
438 eventbus.Subscribe(eventbus.EvtSQLIndexChanged, func() {
439 Conf.DataIndexState = 1
440 Conf.Save()
441 })
442
443 eventbus.Subscribe(eventbus.EvtSQLIndexFlushed, func() {
444 Conf.DataIndexState = 0
445 Conf.Save()
446 })
447}