A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
at lambda-fork/main 474 lines 13 kB view raw
1// SiYuan - Refactor your thinking 2// Copyright (c) 2020-present, b3log.org 3// 4// This program is free software: you can redistribute it and/or modify 5// it under the terms of the GNU Affero General Public License as published by 6// the Free Software Foundation, either version 3 of the License, or 7// (at your option) any later version. 8// 9// This program is distributed in the hope that it will be useful, 10// but WITHOUT ANY WARRANTY; without even the implied warranty of 11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12// GNU Affero General Public License for more details. 13// 14// You should have received a copy of the GNU Affero General Public License 15// along with this program. If not, see <https://www.gnu.org/licenses/>. 16 17package model 18 19import ( 20 "fmt" 21 "io/fs" 22 "os" 23 "path" 24 "path/filepath" 25 "runtime/debug" 26 "strings" 27 "sync" 28 "time" 29 30 "github.com/88250/gulu" 31 "github.com/88250/lute" 32 "github.com/88250/lute/ast" 33 "github.com/88250/lute/html" 34 "github.com/88250/lute/parse" 35 "github.com/siyuan-note/filelock" 36 "github.com/siyuan-note/logging" 37 "github.com/siyuan-note/siyuan/kernel/filesys" 38 "github.com/siyuan-note/siyuan/kernel/sql" 39 "github.com/siyuan-note/siyuan/kernel/task" 40 "github.com/siyuan-note/siyuan/kernel/treenode" 41 "github.com/siyuan-note/siyuan/kernel/util" 42) 43 44var ( 45 checkIndexOnce = sync.Once{} 46) 47 48// checkIndex 自动校验数据库索引,仅在数据同步执行完成后执行一次。 49func checkIndex() { 50 checkIndexOnce.Do(func() { 51 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container { 52 // 移动端不执行校验 https://ld246.com/article/1734939896061 53 return 54 } 55 56 logging.LogInfof("start checking index...") 57 58 removeDuplicateDatabaseIndex() 59 sql.FlushQueue() 60 61 resetDuplicateBlocksOnFileSys() 62 sql.FlushQueue() 63 64 fixBlockTreeByFileSys() 65 sql.FlushQueue() 66 67 fixDatabaseIndexByBlockTree() 68 sql.FlushQueue() 69 70 removeDuplicateDatabaseRefs() 71 72 // 后面要加任务的话记得修改推送任务栏的进度 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5)) 73 74 debug.FreeOSMemory() 75 util.PushStatusBar(Conf.Language(185)) 76 logging.LogInfof("finish checking index") 77 }) 78} 79 80// removeDuplicateDatabaseRefs 删除重复的数据库引用关系。 81func removeDuplicateDatabaseRefs() { 82 defer logging.Recover() 83 84 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 5, 5)) 85 duplicatedRootIDs := sql.GetRefDuplicatedDefRootIDs() 86 for _, rootID := range duplicatedRootIDs { 87 refreshRefsByDefID(rootID) 88 } 89 90 for _, rootID := range duplicatedRootIDs { 91 logging.LogWarnf("exist more than one ref duplicated [%s], reindex it", rootID) 92 } 93} 94 95// removeDuplicateDatabaseIndex 删除重复的数据库索引。 96func removeDuplicateDatabaseIndex() { 97 defer logging.Recover() 98 99 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 1, 5)) 100 duplicatedRootIDs := sql.GetDuplicatedRootIDs("blocks") 101 if 1 > len(duplicatedRootIDs) { 102 duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts") 103 if 1 > len(duplicatedRootIDs) && !Conf.Search.CaseSensitive { 104 duplicatedRootIDs = sql.GetDuplicatedRootIDs("blocks_fts_case_insensitive") 105 } 106 } 107 108 roots := sql.GetBlocks(duplicatedRootIDs) 109 rootMap := map[string]*sql.Block{} 110 for _, root := range roots { 111 if nil == root { 112 continue 113 } 114 rootMap[root.ID] = root 115 } 116 117 var toRemoveRootIDs []string 118 var deletes int 119 for _, rootID := range duplicatedRootIDs { 120 root := rootMap[rootID] 121 if nil == root { 122 continue 123 } 124 deletes++ 125 toRemoveRootIDs = append(toRemoveRootIDs, rootID) 126 if util.IsExiting.Load() { 127 break 128 } 129 } 130 toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs) 131 sql.BatchRemoveTreeQueue(toRemoveRootIDs) 132 133 if 0 < deletes { 134 logging.LogWarnf("exist more than one tree duplicated [%d], reindex it", deletes) 135 } 136} 137 138// resetDuplicateBlocksOnFileSys 重置重复 ID 的块。 https://github.com/siyuan-note/siyuan/issues/7357 139func resetDuplicateBlocksOnFileSys() { 140 defer logging.Recover() 141 142 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 2, 5)) 143 boxes := Conf.GetBoxes() 144 luteEngine := lute.New() 145 blockIDs := map[string]bool{} 146 needRefreshUI := false 147 for _, box := range boxes { 148 // 校验索引阶段自动删除历史遗留的笔记本 history 文件夹 149 legacyHistory := filepath.Join(util.DataDir, box.ID, ".siyuan", "history") 150 if gulu.File.IsDir(legacyHistory) { 151 if removeErr := os.RemoveAll(legacyHistory); nil != removeErr { 152 logging.LogErrorf("remove legacy history failed: %s", removeErr) 153 } else { 154 logging.LogInfof("removed legacy history [%s]", legacyHistory) 155 } 156 } 157 158 boxPath := filepath.Join(util.DataDir, box.ID) 159 var duplicatedTrees []*parse.Tree 160 filelock.Walk(boxPath, func(path string, d fs.DirEntry, err error) error { 161 if err != nil || nil == d { 162 return nil 163 } 164 165 if d.IsDir() { 166 if boxPath == path { 167 // 跳过笔记本文件夹 168 return nil 169 } 170 171 if strings.HasPrefix(d.Name(), ".") { 172 return filepath.SkipDir 173 } 174 175 if !ast.IsNodeIDPattern(d.Name()) { 176 return nil 177 } 178 return nil 179 } 180 181 if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") { 182 return nil 183 } 184 185 if !ast.IsNodeIDPattern(strings.TrimSuffix(d.Name(), ".sy")) { 186 logging.LogWarnf("invalid .sy file name [%s]", path) 187 box.moveCorruptedData(path) 188 return nil 189 } 190 191 p := path[len(boxPath):] 192 p = filepath.ToSlash(p) 193 tree, loadErr := filesys.LoadTree(box.ID, p, luteEngine) 194 if nil != loadErr { 195 logging.LogErrorf("load tree [%s] failed: %s", p, loadErr) 196 return nil 197 } 198 199 needOverwrite := false 200 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 201 if !entering || !n.IsBlock() { 202 return ast.WalkContinue 203 } 204 205 if "" == n.ID { 206 needOverwrite = true 207 treenode.ResetNodeID(n) 208 return ast.WalkContinue 209 } 210 211 if !blockIDs[n.ID] { 212 blockIDs[n.ID] = true 213 return ast.WalkContinue 214 } 215 216 // 存在重复的块 ID 217 218 if ast.NodeDocument == n.Type { 219 // 如果是文档根节点,则重置这颗树 220 // 这里不能在迭代中重置,因为如果这个文档存在子文档的话,重置时会重命名子文档文件夹,后续迭代可能会导致子文档 ID 重复 221 duplicatedTrees = append(duplicatedTrees, tree) 222 return ast.WalkStop 223 } 224 225 // 其他情况,重置节点 ID 226 needOverwrite = true 227 treenode.ResetNodeID(n) 228 needRefreshUI = true 229 return ast.WalkContinue 230 }) 231 232 if needOverwrite { 233 logging.LogWarnf("exist more than one node with the same id in tree [%s], reset it", box.ID+p) 234 if _, writeErr := filesys.WriteTree(tree); nil != writeErr { 235 logging.LogErrorf("write tree [%s] failed: %s", p, writeErr) 236 } 237 } 238 return nil 239 }) 240 241 for _, tree := range duplicatedTrees { 242 absPath := filepath.Join(boxPath, tree.Path) 243 logging.LogWarnf("exist more than one tree with the same id [%s], reset it", absPath) 244 recreateTree(tree, absPath) 245 needRefreshUI = true 246 } 247 } 248 249 if needRefreshUI { 250 util.ReloadUI() 251 task.AppendAsyncTaskWithDelay(task.PushMsg, 3*time.Second, util.PushMsg, Conf.Language(190), 5000) 252 } 253} 254 255func recreateTree(tree *parse.Tree, absPath string) { 256 // 删除关于该树的所有块树数据,后面会调用 fixBlockTreeByFileSys() 进行订正补全 257 treenode.RemoveBlockTreesByPathPrefix(strings.TrimSuffix(tree.Path, ".sy")) 258 treenode.RemoveBlockTreesByRootID(tree.ID) 259 260 resetTree(tree, "", true) 261 if _, err := filesys.WriteTree(tree); err != nil { 262 logging.LogWarnf("write tree [%s] failed: %s", tree.Path, err) 263 return 264 } 265 266 if gulu.File.IsDir(strings.TrimSuffix(absPath, ".sy")) { 267 // 重命名子文档文件夹 268 from := strings.TrimSuffix(absPath, ".sy") 269 to := filepath.Join(filepath.Dir(absPath), tree.ID) 270 if renameErr := os.Rename(from, to); nil != renameErr { 271 logging.LogWarnf("rename [%s] failed: %s", from, renameErr) 272 return 273 } 274 } 275 276 if err := filelock.Remove(absPath); err != nil { 277 logging.LogWarnf("remove [%s] failed: %s", absPath, err) 278 return 279 } 280} 281 282// fixBlockTreeByFileSys 通过文件系统订正块树。 283func fixBlockTreeByFileSys() { 284 defer logging.Recover() 285 286 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 3, 5)) 287 boxes := Conf.GetOpenedBoxes() 288 luteEngine := lute.New() 289 for _, box := range boxes { 290 boxPath := filepath.Join(util.DataDir, box.ID) 291 var paths []string 292 filelock.Walk(boxPath, func(path string, d fs.DirEntry, err error) error { 293 if nil != err || nil == d { 294 return nil 295 } 296 297 if boxPath == path { 298 // 跳过根路径(笔记本文件夹) 299 return nil 300 } 301 302 if d.IsDir() { 303 if strings.HasPrefix(d.Name(), ".") { 304 return filepath.SkipDir 305 } 306 return nil 307 } 308 309 if filepath.Ext(path) != ".sy" || strings.Contains(filepath.ToSlash(path), "/assets/") { 310 return nil 311 } 312 313 p := path[len(boxPath):] 314 p = filepath.ToSlash(p) 315 paths = append(paths, p) 316 return nil 317 }) 318 319 size := len(paths) 320 321 // 清理块树中的冗余数据 322 treenode.ClearRedundantBlockTrees(box.ID, paths) 323 324 // 重新索引缺失的块树 325 missingPaths := treenode.GetNotExistPaths(box.ID, paths) 326 for i, p := range missingPaths { 327 id := path.Base(p) 328 id = strings.TrimSuffix(id, ".sy") 329 if !ast.IsNodeIDPattern(id) { 330 continue 331 } 332 333 reindexTreeByPath(box.ID, p, i, size, luteEngine) 334 if util.IsExiting.Load() { 335 break 336 } 337 } 338 339 if util.IsExiting.Load() { 340 break 341 } 342 } 343 344 // 清理已关闭的笔记本块树 345 boxes = Conf.GetClosedBoxes() 346 for _, box := range boxes { 347 treenode.RemoveBlockTreesByBoxID(box.ID) 348 } 349} 350 351// fixDatabaseIndexByBlockTree 通过块树订正数据库索引。 352func fixDatabaseIndexByBlockTree() { 353 defer logging.Recover() 354 355 util.PushStatusBar(fmt.Sprintf(Conf.Language(58), 4, 5)) 356 rootUpdatedMap := treenode.GetRootUpdated() 357 dbRootUpdatedMap, err := sql.GetRootUpdated() 358 if err == nil { 359 reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap) 360 } 361} 362 363func reindexTreeByUpdated(rootUpdatedMap, dbRootUpdatedMap map[string]string) { 364 i := -1 365 size := len(rootUpdatedMap) 366 luteEngine := util.NewLute() 367 for rootID, updated := range rootUpdatedMap { 368 i++ 369 370 if util.IsExiting.Load() { 371 break 372 } 373 374 rootUpdated := dbRootUpdatedMap[rootID] 375 if "" == rootUpdated { 376 //logging.LogWarnf("not found tree [%s] in database, reindex it", rootID) 377 reindexTree(rootID, i, size, luteEngine) 378 continue 379 } 380 381 if "" == updated { 382 // BlockTree 迁移,v2.6.3 之前没有 updated 字段 383 reindexTree(rootID, i, size, luteEngine) 384 continue 385 } 386 387 btUpdated, _ := time.Parse("20060102150405", updated) 388 dbUpdated, _ := time.Parse("20060102150405", rootUpdated) 389 if dbUpdated.Before(btUpdated.Add(-10 * time.Minute)) { 390 logging.LogWarnf("tree [%s] is not up to date, reindex it", rootID) 391 reindexTree(rootID, i, size, luteEngine) 392 continue 393 } 394 395 if util.IsExiting.Load() { 396 break 397 } 398 } 399 400 var rootIDs []string 401 for rootID := range dbRootUpdatedMap { 402 if _, ok := rootUpdatedMap[rootID]; !ok { 403 rootIDs = append(rootIDs, rootID) 404 } 405 406 if util.IsExiting.Load() { 407 break 408 } 409 } 410 rootIDs = gulu.Str.RemoveDuplicatedElem(rootIDs) 411 roots := map[string]*sql.Block{} 412 blocks := sql.GetBlocks(rootIDs) 413 for _, block := range blocks { 414 roots[block.RootID] = block 415 } 416 var toRemoveRootIDs []string 417 for id, root := range roots { 418 if nil == root { 419 continue 420 } 421 422 toRemoveRootIDs = append(toRemoveRootIDs, id) 423 if util.IsExiting.Load() { 424 break 425 } 426 } 427 toRemoveRootIDs = gulu.Str.RemoveDuplicatedElem(toRemoveRootIDs) 428 //logging.LogWarnf("tree [%s] is not in block tree, remove it from [%s]", id, root.Box) 429 sql.BatchRemoveTreeQueue(toRemoveRootIDs) 430} 431 432func reindexTreeByPath(box, p string, i, size int, luteEngine *lute.Lute) { 433 tree, err := filesys.LoadTree(box, p, luteEngine) 434 if err != nil { 435 return 436 } 437 438 reindexTree0(tree, i, size) 439} 440 441func reindexTree(rootID string, i, size int, luteEngine *lute.Lute) { 442 root := treenode.GetBlockTree(rootID) 443 if nil == root { 444 logging.LogWarnf("root block [%s] not found", rootID) 445 return 446 } 447 448 tree, err := filesys.LoadTree(root.BoxID, root.Path, luteEngine) 449 if err != nil { 450 if os.IsNotExist(err) { 451 // 文件系统上没有找到该 .sy 文件,则订正块树 452 treenode.RemoveBlockTreesByRootID(rootID) 453 } 454 return 455 } 456 457 reindexTree0(tree, i, size) 458} 459 460func reindexTree0(tree *parse.Tree, i, size int) { 461 updated := tree.Root.IALAttr("updated") 462 if "" == updated { 463 updated = util.TimeFromID(tree.Root.ID) 464 tree.Root.SetIALAttr("updated", updated) 465 indexWriteTreeUpsertQueue(tree) 466 } else { 467 treenode.UpsertBlockTree(tree) 468 sql.IndexTreeQueue(tree) 469 } 470 471 if 0 == i%64 { 472 util.PushStatusBar(fmt.Sprintf(Conf.Language(183), i, size, html.EscapeString(path.Base(tree.HPath)))) 473 } 474}