A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
1// SiYuan - Refactor your thinking
2// Copyright (c) 2020-present, b3log.org
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU Affero General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU Affero General Public License for more details.
13//
14// You should have received a copy of the GNU Affero General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17package model
18
19import (
20 "fmt"
21 "io/fs"
22 "os"
23 "path"
24 "path/filepath"
25 "runtime/debug"
26 "strings"
27 "sync"
28 "time"
29
30 "github.com/88250/gulu"
31 "github.com/88250/lute"
32 "github.com/88250/lute/ast"
33 "github.com/88250/lute/html"
34 "github.com/88250/lute/parse"
35 "github.com/siyuan-note/filelock"
36 "github.com/siyuan-note/logging"
37 "github.com/siyuan-note/siyuan/kernel/filesys"
38 "github.com/siyuan-note/siyuan/kernel/sql"
39 "github.com/siyuan-note/siyuan/kernel/task"
40 "github.com/siyuan-note/siyuan/kernel/treenode"
41 "github.com/siyuan-note/siyuan/kernel/util"
42)
43
44var (
45 checkIndexOnce = sync.Once{}
46)
47
48// checkIndex 自动校验数据库索引,仅在数据同步执行完成后执行一次。
49func checkIndex() {
50 checkIndexOnce.Do(func() {
51 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container {
52 // 移动端不执行校验 https://ld246.com/article/1734939896061
53 return
54 }
55
56 logging.LogInfof("start checking index...")
57
58 removeDuplicateDatabaseIndex()
59 sql.FlushQueue()
60
61 resetDuplicateBlocksOnFileSys()
62 sql.FlushQueue()
63
64 fixBlockTreeByFileSys()
65 sql.FlushQueue()
66
67 fixDatabaseIndexByBlockTree()
68 sql.FlushQueue()
69
70 removeDuplicateDatabaseRefs()
71
72 // 后面要加任务的话记得修改推送任务栏的进度 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
73
74 debug.FreeOSMemory()
75 util.PushStatusBar(Conf.Language(185))
76 logging.LogInfof("finish checking index")
77 })
78}
79
80// removeDuplicateDatabaseRefs 删除重复的数据库引用关系。
81func removeDuplicateDatabaseRefs() {
82 defer logging.Recover()
83
84 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 5, 5))
85 duplicatedRootIDs := sql.GetRefDuplicatedDefRootIDs()
86 for _, rootID := range duplicatedRootIDs {
87 refreshRefsByDefID(rootID)
88 }
89
90 for _, rootID := range duplicatedRootIDs {
91 logging.LogWarnf("exist more than one ref duplicated [%s], reindex it", rootID)
92 }
93}
94
95// removeDuplicateDatabaseIndex 删除重复的数据库索引。
96func removeDuplicateDatabaseIndex() {
97 defer logging.Recover()
98
99 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5))
100 duplicatedRootIDs := sql.GetDuplicatedRootIDs("blocks")
101 if 1 > len(duplicatedRootIDs) {
102 duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts")
103 if 1 > len(duplicatedRootIDs) && !Conf.Search.CaseSensitive {
104 duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts_case_insensitive")
105 }
106 }
107
108 roots := sql.GetBlocks(duplicatedRootIDs)
109 rootMap := map[string]*sql.Block{}
110 for _, root := range roots {
111 if nil == root {
112 continue
113 }
114 rootMap[root.ID] = root
115 }
116
117 var toRemoveRootIDs []string
118 var deletes int
119 for _, rootID := range duplicatedRootIDs {
120 root := rootMap[rootID]
121 if nil == root {
122 continue
123 }
124 deletes++
125 toRemoveRootIDs = append(toRemoveRootIDs, rootID)
126 if util.IsExiting.Load() {
127 break
128 }
129 }
130 toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
131 sql.BatchRemoveTreeQueue(toRemoveRootIDs)
132
133 if 0 < deletes {
134 logging.LogWarnf("exist more than one tree duplicated [%d], reindex it", deletes)
135 }
136}
137
138// resetDuplicateBlocksOnFileSys 重置重复 ID 的块。 https://github.com/siyuan-note/siyuan/issues/7357
139func resetDuplicateBlocksOnFileSys() {
140 defer logging.Recover()
141
142 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 2, 5))
143 boxes := Conf.GetBoxes()
144 luteEngine := lute.New()
145 blockIDs := map[string]bool{}
146 needRefreshUI := false
147 for _, box := range boxes {
148 // 校验索引阶段自动删除历史遗留的笔记本 history 文件夹
149 legacyHistory := filepath.Join(util.DataDir, box.ID, ".siyuan", "history")
150 if gulu.File.IsDir(legacyHistory) {
151 if removeErr := os.RemoveAll(legacyHistory); nil != removeErr {
152 logging.LogErrorf("remove legacy history failed: %s", removeErr)
153 } else {
154 logging.LogInfof("removed legacy history [%s]", legacyHistory)
155 }
156 }
157
158 boxPath := filepath.Join(util.DataDir, box.ID)
159 var duplicatedTrees []*parse.Tree
160 filelock.Walk(boxPath, func(path string, d fs.DirEntry, err error) error {
161 if err != nil || nil == d {
162 return nil
163 }
164
165 if d.IsDir() {
166 if boxPath == path {
167 // 跳过笔记本文件夹
168 return nil
169 }
170
171 if strings.HasPrefix(d.Name(), ".") {
172 return filepath.SkipDir
173 }
174
175 if !ast.IsNodeIDPattern(d.Name()) {
176 return nil
177 }
178 return nil
179 }
180
181 if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
182 return nil
183 }
184
185 if !ast.IsNodeIDPattern(strings.TrimSuffix(d.Name(), ".sy")) {
186 logging.LogWarnf("invalid .sy file name [%s]", path)
187 box.moveCorruptedData(path)
188 return nil
189 }
190
191 p := path[len(boxPath):]
192 p = filepath.ToSlash(p)
193 tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine)
194 if nil != loadErr {
195 logging.LogErrorf("load tree [%s] failed: %s", p, loadErr)
196 return nil
197 }
198
199 needOverwrite := false
200 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus {
201 if !entering || !n.IsBlock() {
202 return ast.WalkContinue
203 }
204
205 if "" == n.ID {
206 needOverwrite = true
207 treenode.ResetNodeID(n)
208 return ast.WalkContinue
209 }
210
211 if !blockIDs[n.ID] {
212 blockIDs[n.ID] = true
213 return ast.WalkContinue
214 }
215
216 // 存在重复的块 ID
217
218 if ast.NodeDocument == n.Type {
219 // 如果是文档根节点,则重置这颗树
220 // 这里不能在迭代中重置,因为如果这个文档存在子文档的话,重置时会重命名子文档文件夹,后续迭代可能会导致子文档 ID 重复
221 duplicatedTrees = append(duplicatedTrees, tree)
222 return ast.WalkStop
223 }
224
225 // 其他情况,重置节点 ID
226 needOverwrite = true
227 treenode.ResetNodeID(n)
228 needRefreshUI = true
229 return ast.WalkContinue
230 })
231
232 if needOverwrite {
233 logging.LogWarnf("exist more than one node with the same id in tree [%s], reset it", box.ID+p)
234 if _, writeErr := filesys.WriteTree(tree); nil != writeErr {
235 logging.LogErrorf("write tree [%s] failed: %s", p, writeErr)
236 }
237 }
238 return nil
239 })
240
241 for _, tree := range duplicatedTrees {
242 absPath := filepath.Join(boxPath, tree.Path)
243 logging.LogWarnf("exist more than one tree with the same id [%s], reset it", absPath)
244 recreateTree(tree, absPath)
245 needRefreshUI = true
246 }
247 }
248
249 if needRefreshUI {
250 util.ReloadUI()
251 task.AppendAsyncTaskWithDelay(task.PushMsg, 3*time.Second, util.PushMsg, Conf.Language(190), 5000)
252 }
253}
254
255func recreateTree(tree *parse.Tree, absPath string) {
256 // 删除关于该树的所有块树数据,后面会调用 fixBlockTreeByFileSys() 进行订正补全
257 treenode.RemoveBlockTreesByPathPrefix(strings.TrimSuffix(tree.Path, ".sy"))
258 treenode.RemoveBlockTreesByRootID(tree.ID)
259
260 resetTree(tree, "", true)
261 if _, err := filesys.WriteTree(tree); err != nil {
262 logging.LogWarnf("write tree [%s] failed: %s", tree.Path, err)
263 return
264 }
265
266 if gulu.File.IsDir(strings.TrimSuffix(absPath, ".sy")) {
267 // 重命名子文档文件夹
268 from := strings.TrimSuffix(absPath, ".sy")
269 to := filepath.Join(filepath.Dir(absPath), tree.ID)
270 if renameErr := os.Rename(from, to); nil != renameErr {
271 logging.LogWarnf("rename [%s] failed: %s", from, renameErr)
272 return
273 }
274 }
275
276 if err := filelock.Remove(absPath); err != nil {
277 logging.LogWarnf("remove [%s] failed: %s", absPath, err)
278 return
279 }
280}
281
282// fixBlockTreeByFileSys 通过文件系统订正块树。
283func fixBlockTreeByFileSys() {
284 defer logging.Recover()
285
286 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 3, 5))
287 boxes := Conf.GetOpenedBoxes()
288 luteEngine := lute.New()
289 for _, box := range boxes {
290 boxPath := filepath.Join(util.DataDir, box.ID)
291 var paths []string
292 filelock.Walk(boxPath, func(path string, d fs.DirEntry, err error) error {
293 if nil != err || nil == d {
294 return nil
295 }
296
297 if boxPath == path {
298 // 跳过根路径(笔记本文件夹)
299 return nil
300 }
301
302 if d.IsDir() {
303 if strings.HasPrefix(d.Name(), ".") {
304 return filepath.SkipDir
305 }
306 return nil
307 }
308
309 if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") {
310 return nil
311 }
312
313 p := path[len(boxPath):]
314 p = filepath.ToSlash(p)
315 paths = append(paths, p)
316 return nil
317 })
318
319 size := len(paths)
320
321 // 清理块树中的冗余数据
322 treenode.ClearRedundantBlockTrees(box.ID, paths)
323
324 // 重新索引缺失的块树
325 missingPaths := treenode.GetNotExistPaths(box.ID, paths)
326 for i, p := range missingPaths {
327 id := path.Base(p)
328 id = strings.TrimSuffix(id, ".sy")
329 if !ast.IsNodeIDPattern(id) {
330 continue
331 }
332
333 reindexTreeByPath(box.ID, p, i, size, luteEngine)
334 if util.IsExiting.Load() {
335 break
336 }
337 }
338
339 if util.IsExiting.Load() {
340 break
341 }
342 }
343
344 // 清理已关闭的笔记本块树
345 boxes = Conf.GetClosedBoxes()
346 for _, box := range boxes {
347 treenode.RemoveBlockTreesByBoxID(box.ID)
348 }
349}
350
351// fixDatabaseIndexByBlockTree 通过块树订正数据库索引。
352func fixDatabaseIndexByBlockTree() {
353 defer logging.Recover()
354
355 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 4, 5))
356 rootUpdatedMap := treenode.GetRootUpdated()
357 dbRootUpdatedMap, err := sql.GetRootUpdated()
358 if err == nil {
359 reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap)
360 }
361}
362
363func reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap map[string]string) {
364 i := -1
365 size := len(rootUpdatedMap)
366 luteEngine := util.NewLute()
367 for rootID, updated := range rootUpdatedMap {
368 i++
369
370 if util.IsExiting.Load() {
371 break
372 }
373
374 rootUpdated := dbRootUpdatedMap[rootID]
375 if "" == rootUpdated {
376 //logging.LogWarnf("not found tree [%s] in database, reindex it", rootID)
377 reindexTree(rootID, i, size, luteEngine)
378 continue
379 }
380
381 if "" == updated {
382 // BlockTree 迁移,v2.6.3 之前没有 updated 字段
383 reindexTree(rootID, i, size, luteEngine)
384 continue
385 }
386
387 btUpdated, _ := time.Parse("20060102150405", updated)
388 dbUpdated, _ := time.Parse("20060102150405", rootUpdated)
389 if dbUpdated.Before(btUpdated.Add(-10 * time.Minute)) {
390 logging.LogWarnf("tree [%s] is not up to date, reindex it", rootID)
391 reindexTree(rootID, i, size, luteEngine)
392 continue
393 }
394
395 if util.IsExiting.Load() {
396 break
397 }
398 }
399
400 var rootIDs []string
401 for rootID := range dbRootUpdatedMap {
402 if _, ok := rootUpdatedMap[rootID]; !ok {
403 rootIDs = append(rootIDs, rootID)
404 }
405
406 if util.IsExiting.Load() {
407 break
408 }
409 }
410 rootIDs = gulu.Str.RemoveDuplicatedElem(rootIDs)
411 roots := map[string]*sql.Block{}
412 blocks := sql.GetBlocks(rootIDs)
413 for _, block := range blocks {
414 roots[block.RootID] = block
415 }
416 var toRemoveRootIDs []string
417 for id, root := range roots {
418 if nil == root {
419 continue
420 }
421
422 toRemoveRootIDs = append(toRemoveRootIDs, id)
423 if util.IsExiting.Load() {
424 break
425 }
426 }
427 toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs)
428 //logging.LogWarnf("tree [%s] is not in block tree, remove it from [%s]", id, root.Box)
429 sql.BatchRemoveTreeQueue(toRemoveRootIDs)
430}
431
432func reindexTreeByPath(box, p string, i, size int, luteEngine *lute.Lute) {
433 tree, err := filesys.LoadTree(box, p, luteEngine)
434 if err != nil {
435 return
436 }
437
438 reindexTree0(tree, i, size)
439}
440
441func reindexTree(rootID string, i, size int, luteEngine *lute.Lute) {
442 root := treenode.GetBlockTree(rootID)
443 if nil == root {
444 logging.LogWarnf("root block [%s] not found", rootID)
445 return
446 }
447
448 tree, err := filesys.LoadTree(root.BoxID, root.Path, luteEngine)
449 if err != nil {
450 if os.IsNotExist(err) {
451 // 文件系统上没有找到该 .sy 文件,则订正块树
452 treenode.RemoveBlockTreesByRootID(rootID)
453 }
454 return
455 }
456
457 reindexTree0(tree, i, size)
458}
459
460func reindexTree0(tree *parse.Tree, i, size int) {
461 updated := tree.Root.IALAttr("updated")
462 if "" == updated {
463 updated = util.TimeFromID(tree.Root.ID)
464 tree.Root.SetIALAttr("updated", updated)
465 indexWriteTreeUpsertQueue(tree)
466 } else {
467 treenode.UpsertBlockTree(tree)
468 sql.IndexTreeQueue(tree)
469 }
470
471 if 0 == i%64 {
472 util.PushStatusBar(fmt.Sprintf(Conf.Language(183), i, size, html.EscapeString(path.Base(tree.HPath))))
473 }
474}