online Minecraft written book viewer

feat(slurp,serve): save to and read from BookContainer

kokirigla.de d16ea67e 4164ccff

verified
+290 -52
+1
.gitignore
··· 3 3 # my local input data 4 4 /worlds 5 5 realm.nbt 6 + container.json.gz
+1
Cargo.lock
··· 1060 1060 "crab_nbt", 1061 1061 "flate2", 1062 1062 "serde", 1063 + "serde_json", 1063 1064 "thiserror", 1064 1065 ] 1065 1066
+28 -6
nara_core/src/book.rs
··· 1 - use serde::{Deserialize, Serialize}; 1 + use serde::{Deserialize, Deserializer, Serialize, Serializer}; 2 2 use sha1::Digest as _; 3 3 use uuid::Uuid; 4 4 5 - use crate::component::Component; 5 + use crate::component::{Component, ComponentObject}; 6 6 7 7 pub type BookHash = [u8; 20]; 8 8 ··· 36 36 PlayerData { 37 37 uuid: Uuid, 38 38 inventory: PlayerInventoryKind, 39 - slot: u8, 39 + slot: i8, 40 40 }, 41 41 /// From a placed block entity, such as a chest. 42 42 BlockEntity { ··· 45 45 x: i32, 46 46 y: i32, 47 47 z: i32, 48 - slot: u8, 48 + slot: i8, 49 49 }, 50 50 Entity { 51 51 dimension: String, ··· 60 60 x: f64, 61 61 y: f64, 62 62 z: f64, 63 - slot: u8, 63 + slot: i8, 64 64 }, 65 65 /// Inside an item at `slot` within another container. 66 66 ItemBlockEntity { 67 67 id: String, 68 - slot: u8, 68 + slot: i8, 69 69 within: Box<BookSource>, 70 70 }, 71 71 } ··· 81 81 #[serde(rename_all = "snake_case")] 82 82 pub struct BookContent { 83 83 pub author: String, 84 + #[serde( 85 + serialize_with = "serialize_pages", 86 + deserialize_with = "deserialize_pages" 87 + )] 84 88 pub pages: Vec<Component>, 85 89 pub title: String, 86 90 #[serde(default)] 87 91 pub generation: BookGeneration, 88 92 #[serde(default)] 89 93 pub resolved: bool, 94 + } 95 + 96 + fn serialize_pages<S: Serializer>( 97 + pages: &[Component], 98 + serializer: S, 99 + ) -> Result<S::Ok, S::Error> { 100 + pages 101 + .iter() 102 + .map(|c| c.clone().into_object()) 103 + .collect::<Vec<ComponentObject>>() 104 + .serialize(serializer) 105 + } 106 + 107 + fn deserialize_pages<'de, D: Deserializer<'de>>( 108 + deserializer: D, 109 + ) -> Result<Vec<Component>, D::Error> { 110 + Vec::<ComponentObject>::deserialize(deserializer) 111 + .map(|objs| objs.into_iter().map(|o| Component::Object(Box::new(o))).collect()) 90 112 } 91 113 92 114 impl BookContent {
+41 -4
nara_core/src/component.rs
··· 133 133 } 134 134 135 135 impl Component { 136 + /// Recursively converts any `Component` into its `ComponentObject` form, 137 + /// collapsing `String` variants into `{text: "..."}` objects and flattening 138 + /// `Array` variants so the first element becomes the root and the rest 139 + /// become children. 140 + pub fn into_object(self) -> ComponentObject { 141 + match self { 142 + Component::String(s) => raw_text_object(s, None), 143 + Component::Object(mut obj) => { 144 + obj.children = obj 145 + .children 146 + .into_iter() 147 + .map(|c| Component::Object(Box::new(c.into_object()))) 148 + .collect(); 149 + *obj 150 + } 151 + Component::Array(mut items) => { 152 + if items.is_empty() { 153 + return raw_text_object(String::new(), None); 154 + } 155 + let mut obj = items.remove(0).into_object(); 156 + obj.children.extend( 157 + items 158 + .into_iter() 159 + .map(|c| Component::Object(Box::new(c.into_object()))), 160 + ); 161 + obj 162 + } 163 + } 164 + } 165 + 136 166 pub fn to_plain_text(&self) -> String { 137 167 match self { 138 168 Component::String(s) => s.clone(), ··· 330 360 } 331 361 } 332 362 333 - fn make_text_object( 363 + fn raw_text_object( 334 364 text: String, 335 365 color: Option<crate::color::Color>, 336 - ) -> Component { 337 - Component::Object(Box::new(ComponentObject { 366 + ) -> ComponentObject { 367 + ComponentObject { 338 368 text: Some(TextComponent { text }), 339 369 translation: None, 340 370 score: None, ··· 352 382 obfuscated: None, 353 383 }, 354 384 children: Vec::new(), 355 - })) 385 + } 386 + } 387 + 388 + fn make_text_object( 389 + text: String, 390 + color: Option<crate::color::Color>, 391 + ) -> Component { 392 + Component::Object(Box::new(raw_text_object(text, color))) 356 393 } 357 394 358 395 fn parse_legacy_section_text(s: &str) -> Component {
+41 -4
nara_core/src/lib.rs
··· 20 20 } 21 21 22 22 impl BookContainer { 23 + pub fn new() -> Self { 24 + Self { 25 + books: HashMap::new(), 26 + } 27 + } 28 + 29 + /// Opens an existing container at `path`, or returns an empty one if the 30 + /// file does not exist yet. 31 + pub fn open_or_new<P>(path: P) -> nara_io::Result<BookContainer> 32 + where 33 + P: AsRef<Path>, 34 + { 35 + if path.as_ref().exists() { 36 + let buffer = std::fs::read(path)?; 37 + nara_io::read_json_gz(&buffer) 38 + } else { 39 + Ok(Self::new()) 40 + } 41 + } 42 + 23 43 pub fn read<P>(path: P) -> nara_io::Result<BookContainer> 24 44 where 25 45 P: AsRef<Path>, 26 46 { 27 47 let buffer = std::fs::read(path)?; 28 - nara_io::read_nbt(&buffer) 48 + nara_io::read_json_gz(&buffer) 29 49 } 30 50 31 - pub fn add(&mut self, book: Book) { 51 + /// Adds a book. Returns `true` if the book was new, `false` if it was 52 + /// already present (duplicate hash). 53 + pub fn add(&mut self, book: Book) -> bool { 32 54 let hash = book.hash(); 33 - if !self.books.contains_key(&hash) { 34 - self.books.insert(hash, book); 55 + if self.books.contains_key(&hash) { 56 + return false; 35 57 } 58 + self.books.insert(hash, book); 59 + true 36 60 } 37 61 38 62 pub fn remove(&mut self, book: Book) { 39 63 self.books.remove(&book.hash()); 64 + } 65 + 66 + pub fn books(&self) -> impl Iterator<Item = &Book> { 67 + self.books.values() 68 + } 69 + 70 + pub fn save<P>(&self, path: P) -> nara_io::Result<()> 71 + where 72 + P: AsRef<Path>, 73 + { 74 + let bytes = nara_io::write_json_gz(self)?; 75 + std::fs::write(path, bytes)?; 76 + Ok(()) 40 77 } 41 78 } 42 79
+1
nara_io/Cargo.toml
··· 7 7 crab_nbt.workspace = true 8 8 flate2.workspace = true 9 9 serde.workspace = true 10 + serde_json.workspace = true 10 11 thiserror.workspace = true
+37 -3
nara_io/src/lib.rs
··· 1 - use flate2::read::{GzDecoder, ZlibDecoder}; 2 - use serde::Deserialize; 3 - use std::io::{Cursor, Read}; 1 + use flate2::{ 2 + Compression, 3 + read::{GzDecoder, ZlibDecoder}, 4 + write::GzEncoder, 5 + }; 6 + use serde::{Deserialize, Serialize}; 7 + use std::io::{Cursor, Read, Write}; 4 8 5 9 #[derive(Debug, thiserror::Error)] 6 10 pub enum Error { ··· 8 12 Io(#[from] std::io::Error), 9 13 #[error(transparent)] 10 14 Nbt(#[from] crab_nbt::error::Error), 15 + #[error(transparent)] 16 + Json(#[from] serde_json::Error), 11 17 } 12 18 13 19 pub type Result<T> = std::result::Result<T, Error>; ··· 20 26 let mut cur = Cursor::new(decomp.as_slice()); 21 27 22 28 Ok(crab_nbt::serde::de::from_cursor::<T>(&mut cur)?) 29 + } 30 + 31 + pub fn write_nbt<T>(value: &T) -> Result<Vec<u8>> 32 + where 33 + T: Serialize, 34 + { 35 + let nbt_bytes = crab_nbt::serde::ser::to_bytes_unnamed(value)?; 36 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 37 + encoder.write_all(&nbt_bytes)?; 38 + Ok(encoder.finish()?) 39 + } 40 + 41 + pub fn write_json_gz<T>(value: &T) -> Result<Vec<u8>> 42 + where 43 + T: Serialize, 44 + { 45 + let json = serde_json::to_vec(value)?; 46 + let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); 47 + encoder.write_all(&json)?; 48 + Ok(encoder.finish()?) 49 + } 50 + 51 + pub fn read_json_gz<T>(src: &[u8]) -> Result<T> 52 + where 53 + T: for<'de> Deserialize<'de>, 54 + { 55 + let decompressed = maybe_decompress(src)?; 56 + Ok(serde_json::from_slice(&decompressed)?) 23 57 } 24 58 25 59 pub fn maybe_decompress(src: &[u8]) -> Result<Vec<u8>> {
+3 -3
nara_slurper_1_12_core/src/lib.rs
··· 29 29 inventory: &[InventoryItemStack], 30 30 nested_block_entity_id: Option<String>, 31 31 nested_source: Option<BookSource>, 32 - make_root: &impl Fn(u8) -> BookSource, 32 + make_root: &impl Fn(i8) -> BookSource, 33 33 ) -> Vec<Book> { 34 34 let mut books = Vec::new(); 35 35 for item in inventory { 36 36 let item_source = match &nested_source { 37 37 Some(outer) => BookSource::ItemBlockEntity { 38 38 id: nested_block_entity_id.clone().unwrap(), 39 - slot: item.slot as u8, 39 + slot: item.slot, 40 40 within: Box::new(outer.clone()), 41 41 }, 42 - None => make_root(item.slot as u8), 42 + None => make_root(item.slot), 43 43 }; 44 44 match &item.item { 45 45 ItemStack::WrittenBook(written_book) => {
+5 -5
nara_slurper_1_12_world/src/lib.rs
··· 112 112 let playerdata = read_nbt::<PlayerData>(&buffer)?; 113 113 let books = playerdata.slurp(); 114 114 if !books.is_empty() { 115 - tracing::info!( 115 + tracing::debug!( 116 116 "Slurped {} book(s) from {}", 117 117 books.len(), 118 118 playerdata.uuid() ··· 122 122 } 123 123 124 124 fn slurp_region_file(path: &Path, dimension: &str) -> Result<Vec<Book>> { 125 - tracing::info!("Slurping {path:?}..."); 125 + tracing::debug!("Slurping {path:?}..."); 126 126 let data = std::fs::read(path)?; 127 127 let region = McRegion::<Chunk>::from_bytes(&data)?; 128 128 ··· 149 149 }, 150 150 ); 151 151 if !slurped.is_empty() { 152 - tracing::info!( 152 + tracing::debug!( 153 153 "Slurped a book from a {} at {:?} in chunk ({}, {}) of {path:?}", 154 154 item_holder_entity.base.id, 155 155 item_holder_entity.base.position, ··· 174 174 }, 175 175 ); 176 176 if !slurped.is_empty() { 177 - tracing::info!( 177 + tracing::debug!( 178 178 "Slurped {} book(s) from a {} at {:?} in chunk ({}, {}) of {path:?}", 179 179 slurped.len(), 180 180 inventoried_entity.base.id, ··· 201 201 } 202 202 }); 203 203 if !slurped.is_empty() { 204 - tracing::info!( 204 + tracing::debug!( 205 205 "Slurped {} book(s) from a {} at ({},{},{}) in chunk ({},{}) of {path:?}", 206 206 slurped.len(), 207 207 te.id,
+24
src/cli.rs
··· 35 35 /// The path to the `realm.nbt` file created by Infinity Item Editor. 36 36 #[arg(short = 'r', long = "realm", default_value = "realm.nbt")] 37 37 pub realm_path: PathBuf, 38 + 39 + /// Path to the book container to create or update. 40 + #[arg( 41 + short = 'c', 42 + long = "container", 43 + default_value = "container.json.gz" 44 + )] 45 + pub container_path: PathBuf, 38 46 } 39 47 40 48 #[derive(Args, Debug)] ··· 50 58 /// Defaults to the number of logical CPUs. 51 59 #[arg(short = 'j', long = "workers")] 52 60 pub workers: Option<usize>, 61 + 62 + /// Path to the book container to create or update. 63 + #[arg( 64 + short = 'c', 65 + long = "container", 66 + default_value = "container.json.gz" 67 + )] 68 + pub container_path: PathBuf, 53 69 } 54 70 55 71 #[derive(Args, Debug)] ··· 73 89 /// The score threshold for fuzzy finding book authors. 74 90 #[arg(short = 'A', long, default_value_t = 0.82)] 75 91 pub author_threshold: f64, 92 + 93 + /// Path to the book container to load. 94 + #[arg( 95 + short = 'c', 96 + long = "container", 97 + default_value = "container.json.gz" 98 + )] 99 + pub container_path: PathBuf, 76 100 77 101 #[arg(short = 's', long = "dont-start-webserver", action = ArgAction::SetFalse)] 78 102 pub start_webserver: bool,
+51 -1
src/library.rs
··· 20 20 by_category: AHashMap<SmolStr, Vec<BookId>>, 21 21 by_author_lc: AHashMap<SmolStr, Vec<BookId>>, 22 22 23 + // per-book data, parallel to `books` 24 + category_by_id: Vec<SmolStr>, 25 + 23 26 // normalized blobs for scoring (same index as `books`) 24 27 norm_title: Vec<String>, 25 28 norm_author: Vec<String>, ··· 52 55 limit: usize, 53 56 } 54 57 58 + /// Assigns a category to a book. Currently always returns "Uncategorized". 59 + fn categorize(_book: &Book) -> SmolStr { 60 + SmolStr::new_static("Uncategorized") 61 + } 62 + 55 63 impl Library { 64 + pub fn new( 65 + content_threshold: f64, 66 + title_threshold: f64, 67 + author_threshold: f64, 68 + ) -> Self { 69 + Self { 70 + books: Vec::new(), 71 + by_hash: AHashMap::new(), 72 + source_by_hash: AHashMap::new(), 73 + by_category: AHashMap::new(), 74 + by_author_lc: AHashMap::new(), 75 + category_by_id: Vec::new(), 76 + norm_title: Vec::new(), 77 + norm_author: Vec::new(), 78 + norm_contents: Vec::new(), 79 + tri_title: AHashMap::new(), 80 + tri_author: AHashMap::new(), 81 + tri_contents: AHashMap::new(), 82 + content_threshold, 83 + title_threshold, 84 + author_threshold, 85 + cache_books_by_author: RefCell::new(new_lru(CACHE_BY_AUTHOR_CAP)), 86 + cache_books_in_category: RefCell::new(new_lru(CACHE_BY_CATEGORY_CAP)), 87 + cache_fuzzy_title: RefCell::new(new_lru(CACHE_FUZZY_CAP)), 88 + cache_fuzzy_author: RefCell::new(new_lru(CACHE_FUZZY_CAP)), 89 + cache_fuzzy_contents: RefCell::new(new_lru(CACHE_FUZZY_CAP)), 90 + cache_fuzzy_all: RefCell::new(new_lru(CACHE_FUZZY_CAP)), 91 + duplicate_books_filtered: 0, 92 + empty_books_filtered: 0, 93 + } 94 + } 95 + 56 96 /// Inserts a book 57 97 pub fn add_book( 58 98 &mut self, ··· 96 136 let source = book.metadata.source.clone(); 97 137 self.books.push(book); 98 138 99 - let category: SmolStr = "todo".into(); // TODO(kokiriglade): classify from contents 139 + let category = categorize(&self.books[id]); 100 140 101 141 // indices... 102 142 self.by_hash.insert(h, id); ··· 105 145 .or_default() 106 146 .push(id); 107 147 self.source_by_hash.insert(h, source); 148 + self.category_by_id.push(category); 108 149 109 150 let author_lc = SmolStr::new(normalize(&self.books[id].content.author)); 110 151 if !author_lc.is_empty() { ··· 399 440 #[inline] 400 441 pub fn source_for_hash(&self, hash: &BookHash) -> Option<&BookSource> { 401 442 self.source_by_hash.get(hash) 443 + } 444 + 445 + /// Returns the category for a book hash, or an empty string if not found. 446 + #[inline] 447 + pub fn category_for_hash(&self, hash: &BookHash) -> &str { 448 + self.by_hash 449 + .get(hash) 450 + .map(|&id| self.category_by_id[id].as_str()) 451 + .unwrap_or("") 402 452 } 403 453 } 404 454
+52 -22
src/main.rs
··· 1 + use std::time::Instant; 2 + 1 3 use anyhow::Context; 2 4 use clap::Parser as _; 5 + use nara_core::BookContainer; 3 6 use nara_slurper_1_12_infinity::Realm; 4 7 5 8 use crate::{ ··· 37 40 .map(|n| n.get()) 38 41 .unwrap_or(4) 39 42 }); 43 + let mut container = 44 + BookContainer::open_or_new(&args.container_path) 45 + .context("Opening container")?; 46 + let start = Instant::now(); 40 47 let books = slurp_world(&args.world_path, num_workers) 41 48 .await 42 49 .context("Slurping world")?; 43 - books.iter().for_each(|book| { 44 - println!( 45 - "{} [{:?}]: {:#?}", 46 - book.content.title, 47 - book.hash(), 48 - book.metadata.source 49 - ) 50 - }); 51 - println!("found {} books", books.len()); 50 + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; 51 + let total = books.len(); 52 + let dupes = books 53 + .into_iter() 54 + .filter(|b| !container.add(b.clone())) 55 + .count(); 56 + container 57 + .save(&args.container_path) 58 + .context("Saving container")?; 59 + tracing::info!( 60 + "Found {total} books, of which {dupes} were duplicates in {elapsed_ms:.3} ms" 61 + ); 52 62 } 53 63 }, 54 64 Command::SlurpRealm(args) => match args.version { 55 65 cli::MinecraftVersion::V1_12 => { 56 - let realm = Realm::read(args.realm_path)?; 57 - let books = realm.slurp(); 58 - 59 - books.iter().for_each(|book| { 60 - println!( 61 - "{} [{:?}]: {:#?}", 62 - book.content.title, 63 - book.hash(), 64 - book.metadata.source 65 - ) 66 - }); 67 - println!("found {} books", books.len()); 66 + let mut container = 67 + BookContainer::open_or_new(&args.container_path) 68 + .context("Opening container")?; 69 + let start = Instant::now(); 70 + let books = Realm::read(args.realm_path)?.slurp(); 71 + let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0; 72 + let total = books.len(); 73 + let dupes = books 74 + .into_iter() 75 + .filter(|b| !container.add(b.clone())) 76 + .count(); 77 + container 78 + .save(&args.container_path) 79 + .context("Saving container")?; 80 + tracing::info!( 81 + "Found {total} books, of which {dupes} were duplicates in {elapsed_ms:.3} ms" 82 + ); 68 83 } 69 84 }, 70 85 } ··· 73 88 } 74 89 75 90 fn build_library(args: &ServeArgs) -> anyhow::Result<Library> { 76 - todo!() 91 + let container = BookContainer::open_or_new(&args.container_path) 92 + .context("Opening container")?; 93 + let mut library = Library::new( 94 + args.content_threshold, 95 + args.title_threshold, 96 + args.author_threshold, 97 + ); 98 + for book in container.books() { 99 + library.add_book( 100 + book.clone(), 101 + args.warn_duplicates, 102 + args.warn_empty, 103 + args.filter_empty_books, 104 + ); 105 + } 106 + Ok(library) 77 107 }
+5 -4
src/web/pages.rs
··· 554 554 } 555 555 556 556 fn book_meta(book: &Book, library: &Library) -> BookMeta { 557 - // let hash = book.hash(); 558 - let category: String = todo!(); 557 + let category = library.category_for_hash(&book.hash()).to_string(); 559 558 560 559 let category_href = 561 560 format!("/?category={0}", encode_query_component(&category)); ··· 600 599 title: book.content.title.clone(), 601 600 author: book.content.author.clone(), 602 601 author_href: meta.author_href, 603 - source: todo!(), 602 + source: book.metadata.source.clone(), 604 603 category: meta.category, 605 604 category_href: meta.category_href, 606 605 pages, ··· 711 710 let Some(category_norm) = category_norm else { 712 711 return true; 713 712 }; 714 - todo!() 713 + library 714 + .category_for_hash(&book.hash()) 715 + .eq_ignore_ascii_case(category_norm) 715 716 } 716 717 717 718 fn parse_hash(hex_str: &str) -> Option<[u8; 20]> {