A privacy-first, self-hosted, fully open source personal knowledge management software, written in typescript and golang. (PERSONAL FORK)
at lambda-fork/main 447 lines 13 kB view raw
1// SiYuan - Refactor your thinking 2// Copyright (c) 2020-present, b3log.org 3// 4// This program is free software: you can redistribute it and/or modify 5// it under the terms of the GNU Affero General Public License as published by 6// the Free Software Foundation, either version 3 of the License, or 7// (at your option) any later version. 8// 9// This program is distributed in the hope that it will be useful, 10// but WITHOUT ANY WARRANTY; without even the implied warranty of 11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12// GNU Affero General Public License for more details. 13// 14// You should have received a copy of the GNU Affero General Public License 15// along with this program. If not, see <https://www.gnu.org/licenses/>. 16 17package model 18 19import ( 20 "bytes" 21 "fmt" 22 "io/fs" 23 "path/filepath" 24 "runtime" 25 "runtime/debug" 26 "strings" 27 "sync" 28 "time" 29 30 "github.com/88250/go-humanize" 31 "github.com/88250/gulu" 32 "github.com/88250/lute/ast" 33 "github.com/88250/lute/editor" 34 "github.com/88250/lute/html" 35 "github.com/88250/lute/parse" 36 "github.com/panjf2000/ants/v2" 37 "github.com/siyuan-note/eventbus" 38 "github.com/siyuan-note/filelock" 39 "github.com/siyuan-note/logging" 40 "github.com/siyuan-note/siyuan/kernel/av" 41 "github.com/siyuan-note/siyuan/kernel/cache" 42 "github.com/siyuan-note/siyuan/kernel/filesys" 43 "github.com/siyuan-note/siyuan/kernel/sql" 44 "github.com/siyuan-note/siyuan/kernel/task" 45 "github.com/siyuan-note/siyuan/kernel/treenode" 46 "github.com/siyuan-note/siyuan/kernel/util" 47) 48 49func UpsertIndexes(paths []string) { 50 var syFiles []string 51 for _, p := range paths { 52 if strings.HasSuffix(p, "/") { 53 syFiles = append(syFiles, listSyFiles(p)...) 54 continue 55 } 56 57 if strings.HasSuffix(p, ".sy") { 58 syFiles = append(syFiles, p) 59 } 60 } 61 62 syFiles = gulu.Str.RemoveDuplicatedElem(syFiles) 63 upsertIndexes(syFiles) 64} 65 66func RemoveIndexes(paths []string) { 67 var syFiles []string 68 for _, p := range paths { 69 if strings.HasSuffix(p, "/") { 70 syFiles = append(syFiles, listSyFiles(p)...) 71 continue 72 } 73 74 if strings.HasSuffix(p, ".sy") { 75 syFiles = append(syFiles, p) 76 } 77 } 78 79 syFiles = gulu.Str.RemoveDuplicatedElem(syFiles) 80 removeIndexes(syFiles) 81} 82 83func listSyFiles(dir string) (ret []string) { 84 dirPath := filepath.Join(util.DataDir, dir) 85 err := filelock.Walk(dirPath, func(path string, d fs.DirEntry, err error) error { 86 if err != nil { 87 logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err) 88 return err 89 } 90 91 if d.IsDir() { 92 return nil 93 } 94 95 if strings.HasSuffix(path, ".sy") { 96 p := filepath.ToSlash(strings.TrimPrefix(path, util.DataDir)) 97 ret = append(ret, p) 98 } 99 return nil 100 }) 101 if err != nil { 102 logging.LogWarnf("walk dir [%s] failed: %s", dirPath, err) 103 } 104 return 105} 106 107func (box *Box) Unindex() { 108 task.AppendTask(task.DatabaseIndex, unindex, box.ID) 109 go func() { 110 sql.FlushQueue() 111 ResetVirtualBlockRefCache() 112 }() 113} 114 115func unindex(boxID string) { 116 ids := treenode.RemoveBlockTreesByBoxID(boxID) 117 RemoveRecentDoc(ids) 118 sql.DeleteBoxQueue(boxID) 119} 120 121func (box *Box) Index() { 122 task.AppendTask(task.DatabaseIndexRef, removeBoxRefs, box.ID) 123 task.AppendTask(task.DatabaseIndex, indexBox, box.ID) 124 task.AppendTask(task.DatabaseIndexRef, IndexRefs) 125 go func() { 126 sql.FlushQueue() 127 ResetVirtualBlockRefCache() 128 }() 129} 130 131func removeBoxRefs(boxID string) { 132 sql.DeleteBoxRefsQueue(boxID) 133} 134 135func indexBox(boxID string) { 136 box := Conf.Box(boxID) 137 if nil == box { 138 return 139 } 140 141 util.SetBootDetails("Listing files...") 142 files := box.ListFiles("/") 143 boxLen := len(Conf.GetOpenedBoxes()) 144 if 1 > boxLen { 145 boxLen = 1 146 } 147 bootProgressPart := int32(30.0 / float64(boxLen) / float64(len(files))) 148 149 start := time.Now() 150 luteEngine := util.NewLute() 151 var treeCount int 152 var treeSize int64 153 lock := sync.Mutex{} 154 util.PushStatusBar(fmt.Sprintf("["+html.EscapeString(box.Name)+"] "+Conf.Language(64), len(files))) 155 156 poolSize := runtime.NumCPU() 157 if 4 < poolSize { 158 poolSize = 4 159 } 160 waitGroup := &sync.WaitGroup{} 161 var avNodes []*ast.Node 162 p, _ := ants.NewPoolWithFunc(poolSize, func(arg interface{}) { 163 defer waitGroup.Done() 164 165 file := arg.(*FileInfo) 166 lock.Lock() 167 treeSize += file.size 168 treeCount++ 169 i := treeCount 170 lock.Unlock() 171 tree, err := filesys.LoadTree(box.ID, file.path, luteEngine) 172 if err != nil { 173 logging.LogErrorf("read box [%s] tree [%s] failed: %s", box.ID, file.path, err) 174 return 175 } 176 177 docIAL := parse.IAL2MapUnEsc(tree.Root.KramdownIAL) 178 if "" == docIAL["updated"] { // 早期的数据可能没有 updated 属性,这里进行订正 179 updated := util.TimeFromID(tree.Root.ID) 180 tree.Root.SetIALAttr("updated", updated) 181 docIAL["updated"] = updated 182 if _, writeErr := filesys.WriteTree(tree); nil != writeErr { 183 logging.LogErrorf("write tree [%s] failed: %s", tree.Path, writeErr) 184 } 185 } 186 187 lock.Lock() 188 avNodes = append(avNodes, tree.Root.ChildrenByType(ast.NodeAttributeView)...) 189 lock.Unlock() 190 191 cache.PutDocIAL(file.path, docIAL) 192 treenode.IndexBlockTree(tree) 193 sql.IndexTreeQueue(tree) 194 util.IncBootProgress(bootProgressPart, fmt.Sprintf(Conf.Language(92), util.ShortPathForBootingDisplay(tree.Path))) 195 if 1 < i && 0 == i%64 { 196 util.PushStatusBar(fmt.Sprintf(Conf.Language(88), i, (len(files))-i)) 197 } 198 }) 199 for _, file := range files { 200 if file.isdir || !strings.HasSuffix(file.name, ".sy") { 201 continue 202 } 203 204 if !ast.IsNodeIDPattern(strings.TrimSuffix(file.name, ".sy")) { 205 // 不以块 ID 命名的 .sy 文件不应该被加载到思源中 https://github.com/siyuan-note/siyuan/issues/16089 206 continue 207 } 208 209 waitGroup.Add(1) 210 invokeErr := p.Invoke(file) 211 if nil != invokeErr { 212 logging.LogErrorf("invoke [%s] failed: %s", file.path, invokeErr) 213 continue 214 } 215 } 216 waitGroup.Wait() 217 p.Release() 218 219 // 关联数据库和块 220 av.BatchUpsertBlockRel(avNodes) 221 222 box.UpdateHistoryGenerated() // 初始化历史生成时间为当前时间 223 end := time.Now() 224 elapsed := end.Sub(start).Seconds() 225 logging.LogInfof("rebuilt database for notebook [%s] in [%.2fs], tree [count=%d, size=%s]", box.ID, elapsed, treeCount, humanize.BytesCustomCeil(uint64(treeSize), 2)) 226 debug.FreeOSMemory() 227 return 228} 229 230func IndexRefs() { 231 start := time.Now() 232 util.SetBootDetails("Resolving refs...") 233 util.PushStatusBar(Conf.Language(54)) 234 util.SetBootDetails("Indexing refs...") 235 236 var defBlockIDs []string 237 luteEngine := util.NewLute() 238 boxes := Conf.GetOpenedBoxes() 239 for _, box := range boxes { 240 pages := pagedPaths(filepath.Join(util.DataDir, box.ID), 32) 241 for _, paths := range pages { 242 for _, treeAbsPath := range paths { 243 data, readErr := filelock.ReadFile(treeAbsPath) 244 if nil != readErr { 245 logging.LogWarnf("get data [path=%s] failed: %s", treeAbsPath, readErr) 246 continue 247 } 248 249 if !bytes.Contains(data, []byte("TextMarkBlockRefID")) && !bytes.Contains(data, []byte("TextMarkFileAnnotationRefID")) { 250 continue 251 } 252 253 p := filepath.ToSlash(strings.TrimPrefix(treeAbsPath, filepath.Join(util.DataDir, box.ID))) 254 tree, parseErr := filesys.LoadTreeByData(data, box.ID, p, luteEngine) 255 if nil != parseErr { 256 logging.LogWarnf("parse json to tree [%s] failed: %s", treeAbsPath, parseErr) 257 continue 258 } 259 260 ast.Walk(tree.Root, func(n *ast.Node, entering bool) ast.WalkStatus { 261 if !entering { 262 return ast.WalkContinue 263 } 264 265 if treenode.IsBlockRef(n) || treenode.IsFileAnnotationRef(n) { 266 defBlockIDs = append(defBlockIDs, tree.Root.ID) 267 } 268 return ast.WalkContinue 269 }) 270 } 271 } 272 } 273 274 defBlockIDs = gulu.Str.RemoveDuplicatedElem(defBlockIDs) 275 276 i := 0 277 size := len(defBlockIDs) 278 if 0 < size { 279 bootProgressPart := int32(10.0 / float64(size)) 280 281 for _, defBlockID := range defBlockIDs { 282 defTree, loadErr := LoadTreeByBlockID(defBlockID) 283 if nil != loadErr { 284 continue 285 } 286 287 util.IncBootProgress(bootProgressPart, "Indexing ref "+defTree.ID) 288 sql.UpdateRefsTreeQueue(defTree) 289 if 1 < i && 0 == i%64 { 290 util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i)) 291 } 292 i++ 293 } 294 } 295 logging.LogInfof("resolved refs [%d] in [%dms]", size, time.Now().Sub(start).Milliseconds()) 296 util.PushStatusBar(fmt.Sprintf(Conf.Language(55), i)) 297} 298 299var indexEmbedBlockLock = sync.Mutex{} 300 301// IndexEmbedBlockJob 嵌入块支持搜索 https://github.com/siyuan-note/siyuan/issues/7112 302func IndexEmbedBlockJob() { 303 task.AppendTaskWithTimeout(task.DatabaseIndexEmbedBlock, 30*time.Second, autoIndexEmbedBlock) 304} 305 306func autoIndexEmbedBlock() { 307 indexEmbedBlockLock.Lock() 308 defer indexEmbedBlockLock.Unlock() 309 310 embedBlocks := sql.QueryEmptyContentEmbedBlocks() 311 for i, embedBlock := range embedBlocks { 312 markdown := strings.TrimSpace(embedBlock.Markdown) 313 markdown = strings.TrimPrefix(markdown, "{{") 314 stmt := strings.TrimSuffix(markdown, "}}") 315 316 // 嵌入块的 Markdown 内容需要反转义 317 stmt = html.UnescapeString(stmt) 318 stmt = strings.ReplaceAll(stmt, editor.IALValEscNewLine, "\n") 319 320 // 需要移除首尾的空白字符以判断是否具有 //!js 标记 321 stmt = strings.TrimSpace(stmt) 322 if strings.HasPrefix(stmt, "//!js") { 323 // https://github.com/siyuan-note/siyuan/issues/9648 324 // js 嵌入块不支持自动索引,由前端主动调用 /api/search/updateEmbedBlock 接口更新内容 https://github.com/siyuan-note/siyuan/issues/9736 325 continue 326 } 327 328 if !strings.Contains(strings.ToLower(stmt), "select") { 329 continue 330 } 331 332 queryResultBlocks := sql.SelectBlocksRawStmtNoParse(stmt, 102400) 333 for _, block := range queryResultBlocks { 334 embedBlock.Content += block.Content 335 } 336 if "" == embedBlock.Content { 337 embedBlock.Content = "no query result" 338 } 339 sql.UpdateBlockContentQueue(embedBlock) 340 341 if 63 <= i { // 一次任务中最多处理 64 个嵌入块,防止卡顿 342 break 343 } 344 } 345} 346 347func updateEmbedBlockContent(embedBlockID string, queryResultBlocks []*EmbedBlock) { 348 embedBlock := sql.GetBlock(embedBlockID) 349 if nil == embedBlock { 350 return 351 } 352 353 embedBlock.Content = "" // 嵌入块每查询一次多一个结果 https://github.com/siyuan-note/siyuan/issues/7196 354 for _, block := range queryResultBlocks { 355 embedBlock.Content += block.Block.Markdown 356 } 357 if "" == embedBlock.Content { 358 embedBlock.Content = "no query result" 359 } 360 sql.UpdateBlockContentQueue(embedBlock) 361} 362 363func init() { 364 subscribeSQLEvents() 365} 366 367var ( 368 pushSQLInsertBlocksFTSMsg bool 369 pushSQLDeleteBlocksMsg bool 370) 371 372func subscribeSQLEvents() { 373 // 使用下面的 EvtSQLInsertBlocksFTS 就可以了 374 //eventbus.Subscribe(eventbus.EvtSQLInsertBlocks, func(context map[string]interface{}, current, total, blockCount int, hash string) { 375 // 376 // msg := fmt.Sprintf(Conf.Language(89), current, total, blockCount, hash) 377 // util.SetBootDetails(msg) 378 // util.ContextPushMsg(context, msg) 379 //}) 380 eventbus.Subscribe(eventbus.EvtSQLInsertBlocksFTS, func(context map[string]interface{}, blockCount int, hash string) { 381 if !pushSQLInsertBlocksFTSMsg { 382 return 383 } 384 385 current := context["current"].(int) 386 total := context["total"] 387 msg := fmt.Sprintf(Conf.Language(90), current, total, blockCount, hash) 388 util.SetBootDetails(msg) 389 util.ContextPushMsg(context, msg) 390 }) 391 eventbus.Subscribe(eventbus.EvtSQLDeleteBlocks, func(context map[string]interface{}, rootID string) { 392 if !pushSQLDeleteBlocksMsg { 393 return 394 } 395 396 current := context["current"].(int) 397 total := context["total"] 398 msg := fmt.Sprintf(Conf.Language(93), current, total, rootID) 399 util.SetBootDetails(msg) 400 util.ContextPushMsg(context, msg) 401 }) 402 eventbus.Subscribe(eventbus.EvtSQLUpdateBlocksHPaths, func(context map[string]interface{}, blockCount int, hash string) { 403 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container { 404 return 405 } 406 407 current := context["current"].(int) 408 total := context["total"] 409 msg := fmt.Sprintf(Conf.Language(234), current, total, blockCount, hash) 410 util.SetBootDetails(msg) 411 util.ContextPushMsg(context, msg) 412 }) 413 414 eventbus.Subscribe(eventbus.EvtSQLInsertHistory, func(context map[string]interface{}) { 415 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container { 416 return 417 } 418 419 current := context["current"].(int) 420 total := context["total"] 421 msg := fmt.Sprintf(Conf.Language(191), current, total) 422 util.SetBootDetails(msg) 423 util.ContextPushMsg(context, msg) 424 }) 425 426 eventbus.Subscribe(eventbus.EvtSQLInsertAssetContent, func(context map[string]interface{}) { 427 if util.ContainerAndroid == util.Container || util.ContainerIOS == util.Container || util.ContainerHarmony == util.Container { 428 return 429 } 430 431 current := context["current"].(int) 432 total := context["total"] 433 msg := fmt.Sprintf(Conf.Language(217), current, total) 434 util.SetBootDetails(msg) 435 util.ContextPushMsg(context, msg) 436 }) 437 438 eventbus.Subscribe(eventbus.EvtSQLIndexChanged, func() { 439 Conf.DataIndexState = 1 440 Conf.Save() 441 }) 442 443 eventbus.Subscribe(eventbus.EvtSQLIndexFlushed, func() { 444 Conf.DataIndexState = 0 445 Conf.Save() 446 }) 447}