tangled
alpha
login
or
join now
kokirigla.de
/
nara
0
fork
atom
online Minecraft written book viewer
0
fork
atom
overview
issues
pulls
pipelines
feat(slurp,serve): save to and read from BookContainer
kokirigla.de
2 weeks ago
d16ea67e
4164ccff
verified
This commit was signed with the committer's
known signature
.
kokirigla.de
SSH Key Fingerprint:
SHA256:BlSEtD3ZoKT3iKveofI8gba+lZ9CEolKRM1Pzy3pAwg=
+290
-52
13 changed files
expand all
collapse all
unified
split
.gitignore
Cargo.lock
nara_core
src
book.rs
component.rs
lib.rs
nara_io
Cargo.toml
src
lib.rs
nara_slurper_1_12_core
src
lib.rs
nara_slurper_1_12_world
src
lib.rs
src
cli.rs
library.rs
main.rs
web
pages.rs
+1
.gitignore
···
3
3
# my local input data
4
4
/worlds
5
5
realm.nbt
6
6
+
container.json.gz
+1
Cargo.lock
···
1060
1060
"crab_nbt",
1061
1061
"flate2",
1062
1062
"serde",
1063
1063
+
"serde_json",
1063
1064
"thiserror",
1064
1065
]
1065
1066
+28
-6
nara_core/src/book.rs
···
1
1
-
use serde::{Deserialize, Serialize};
1
1
+
use serde::{Deserialize, Deserializer, Serialize, Serializer};
2
2
use sha1::Digest as _;
3
3
use uuid::Uuid;
4
4
5
5
-
use crate::component::Component;
5
5
+
use crate::component::{Component, ComponentObject};
6
6
7
7
pub type BookHash = [u8; 20];
8
8
···
36
36
PlayerData {
37
37
uuid: Uuid,
38
38
inventory: PlayerInventoryKind,
39
39
-
slot: u8,
39
39
+
slot: i8,
40
40
},
41
41
/// From a placed block entity, such as a chest.
42
42
BlockEntity {
···
45
45
x: i32,
46
46
y: i32,
47
47
z: i32,
48
48
-
slot: u8,
48
48
+
slot: i8,
49
49
},
50
50
Entity {
51
51
dimension: String,
···
60
60
x: f64,
61
61
y: f64,
62
62
z: f64,
63
63
-
slot: u8,
63
63
+
slot: i8,
64
64
},
65
65
/// Inside an item at `slot` within another container.
66
66
ItemBlockEntity {
67
67
id: String,
68
68
-
slot: u8,
68
68
+
slot: i8,
69
69
within: Box<BookSource>,
70
70
},
71
71
}
···
81
81
#[serde(rename_all = "snake_case")]
82
82
pub struct BookContent {
83
83
pub author: String,
84
84
+
#[serde(
85
85
+
serialize_with = "serialize_pages",
86
86
+
deserialize_with = "deserialize_pages"
87
87
+
)]
84
88
pub pages: Vec<Component>,
85
89
pub title: String,
86
90
#[serde(default)]
87
91
pub generation: BookGeneration,
88
92
#[serde(default)]
89
93
pub resolved: bool,
94
94
+
}
95
95
+
96
96
+
fn serialize_pages<S: Serializer>(
97
97
+
pages: &[Component],
98
98
+
serializer: S,
99
99
+
) -> Result<S::Ok, S::Error> {
100
100
+
pages
101
101
+
.iter()
102
102
+
.map(|c| c.clone().into_object())
103
103
+
.collect::<Vec<ComponentObject>>()
104
104
+
.serialize(serializer)
105
105
+
}
106
106
+
107
107
+
fn deserialize_pages<'de, D: Deserializer<'de>>(
108
108
+
deserializer: D,
109
109
+
) -> Result<Vec<Component>, D::Error> {
110
110
+
Vec::<ComponentObject>::deserialize(deserializer)
111
111
+
.map(|objs| objs.into_iter().map(|o| Component::Object(Box::new(o))).collect())
90
112
}
91
113
92
114
impl BookContent {
+41
-4
nara_core/src/component.rs
···
133
133
}
134
134
135
135
impl Component {
136
136
+
/// Recursively converts any `Component` into its `ComponentObject` form,
137
137
+
/// collapsing `String` variants into `{text: "..."}` objects and flattening
138
138
+
/// `Array` variants so the first element becomes the root and the rest
139
139
+
/// become children.
140
140
+
pub fn into_object(self) -> ComponentObject {
141
141
+
match self {
142
142
+
Component::String(s) => raw_text_object(s, None),
143
143
+
Component::Object(mut obj) => {
144
144
+
obj.children = obj
145
145
+
.children
146
146
+
.into_iter()
147
147
+
.map(|c| Component::Object(Box::new(c.into_object())))
148
148
+
.collect();
149
149
+
*obj
150
150
+
}
151
151
+
Component::Array(mut items) => {
152
152
+
if items.is_empty() {
153
153
+
return raw_text_object(String::new(), None);
154
154
+
}
155
155
+
let mut obj = items.remove(0).into_object();
156
156
+
obj.children.extend(
157
157
+
items
158
158
+
.into_iter()
159
159
+
.map(|c| Component::Object(Box::new(c.into_object()))),
160
160
+
);
161
161
+
obj
162
162
+
}
163
163
+
}
164
164
+
}
165
165
+
136
166
pub fn to_plain_text(&self) -> String {
137
167
match self {
138
168
Component::String(s) => s.clone(),
···
330
360
}
331
361
}
332
362
333
333
-
fn make_text_object(
363
363
+
fn raw_text_object(
334
364
text: String,
335
365
color: Option<crate::color::Color>,
336
336
-
) -> Component {
337
337
-
Component::Object(Box::new(ComponentObject {
366
366
+
) -> ComponentObject {
367
367
+
ComponentObject {
338
368
text: Some(TextComponent { text }),
339
369
translation: None,
340
370
score: None,
···
352
382
obfuscated: None,
353
383
},
354
384
children: Vec::new(),
355
355
-
}))
385
385
+
}
386
386
+
}
387
387
+
388
388
+
fn make_text_object(
389
389
+
text: String,
390
390
+
color: Option<crate::color::Color>,
391
391
+
) -> Component {
392
392
+
Component::Object(Box::new(raw_text_object(text, color)))
356
393
}
357
394
358
395
fn parse_legacy_section_text(s: &str) -> Component {
+41
-4
nara_core/src/lib.rs
···
20
20
}
21
21
22
22
impl BookContainer {
23
23
+
pub fn new() -> Self {
24
24
+
Self {
25
25
+
books: HashMap::new(),
26
26
+
}
27
27
+
}
28
28
+
29
29
+
/// Opens an existing container at `path`, or returns an empty one if the
30
30
+
/// file does not exist yet.
31
31
+
pub fn open_or_new<P>(path: P) -> nara_io::Result<BookContainer>
32
32
+
where
33
33
+
P: AsRef<Path>,
34
34
+
{
35
35
+
if path.as_ref().exists() {
36
36
+
let buffer = std::fs::read(path)?;
37
37
+
nara_io::read_json_gz(&buffer)
38
38
+
} else {
39
39
+
Ok(Self::new())
40
40
+
}
41
41
+
}
42
42
+
23
43
pub fn read<P>(path: P) -> nara_io::Result<BookContainer>
24
44
where
25
45
P: AsRef<Path>,
26
46
{
27
47
let buffer = std::fs::read(path)?;
28
28
-
nara_io::read_nbt(&buffer)
48
48
+
nara_io::read_json_gz(&buffer)
29
49
}
30
50
31
31
-
pub fn add(&mut self, book: Book) {
51
51
+
/// Adds a book. Returns `true` if the book was new, `false` if it was
52
52
+
/// already present (duplicate hash).
53
53
+
pub fn add(&mut self, book: Book) -> bool {
32
54
let hash = book.hash();
33
33
-
if !self.books.contains_key(&hash) {
34
34
-
self.books.insert(hash, book);
55
55
+
if self.books.contains_key(&hash) {
56
56
+
return false;
35
57
}
58
58
+
self.books.insert(hash, book);
59
59
+
true
36
60
}
37
61
38
62
pub fn remove(&mut self, book: Book) {
39
63
self.books.remove(&book.hash());
64
64
+
}
65
65
+
66
66
+
pub fn books(&self) -> impl Iterator<Item = &Book> {
67
67
+
self.books.values()
68
68
+
}
69
69
+
70
70
+
pub fn save<P>(&self, path: P) -> nara_io::Result<()>
71
71
+
where
72
72
+
P: AsRef<Path>,
73
73
+
{
74
74
+
let bytes = nara_io::write_json_gz(self)?;
75
75
+
std::fs::write(path, bytes)?;
76
76
+
Ok(())
40
77
}
41
78
}
42
79
+1
nara_io/Cargo.toml
···
7
7
crab_nbt.workspace = true
8
8
flate2.workspace = true
9
9
serde.workspace = true
10
10
+
serde_json.workspace = true
10
11
thiserror.workspace = true
+37
-3
nara_io/src/lib.rs
···
1
1
-
use flate2::read::{GzDecoder, ZlibDecoder};
2
2
-
use serde::Deserialize;
3
3
-
use std::io::{Cursor, Read};
1
1
+
use flate2::{
2
2
+
Compression,
3
3
+
read::{GzDecoder, ZlibDecoder},
4
4
+
write::GzEncoder,
5
5
+
};
6
6
+
use serde::{Deserialize, Serialize};
7
7
+
use std::io::{Cursor, Read, Write};
4
8
5
9
#[derive(Debug, thiserror::Error)]
6
10
pub enum Error {
···
8
12
Io(#[from] std::io::Error),
9
13
#[error(transparent)]
10
14
Nbt(#[from] crab_nbt::error::Error),
15
15
+
#[error(transparent)]
16
16
+
Json(#[from] serde_json::Error),
11
17
}
12
18
13
19
pub type Result<T> = std::result::Result<T, Error>;
···
20
26
let mut cur = Cursor::new(decomp.as_slice());
21
27
22
28
Ok(crab_nbt::serde::de::from_cursor::<T>(&mut cur)?)
29
29
+
}
30
30
+
31
31
+
pub fn write_nbt<T>(value: &T) -> Result<Vec<u8>>
32
32
+
where
33
33
+
T: Serialize,
34
34
+
{
35
35
+
let nbt_bytes = crab_nbt::serde::ser::to_bytes_unnamed(value)?;
36
36
+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
37
37
+
encoder.write_all(&nbt_bytes)?;
38
38
+
Ok(encoder.finish()?)
39
39
+
}
40
40
+
41
41
+
pub fn write_json_gz<T>(value: &T) -> Result<Vec<u8>>
42
42
+
where
43
43
+
T: Serialize,
44
44
+
{
45
45
+
let json = serde_json::to_vec(value)?;
46
46
+
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
47
47
+
encoder.write_all(&json)?;
48
48
+
Ok(encoder.finish()?)
49
49
+
}
50
50
+
51
51
+
pub fn read_json_gz<T>(src: &[u8]) -> Result<T>
52
52
+
where
53
53
+
T: for<'de> Deserialize<'de>,
54
54
+
{
55
55
+
let decompressed = maybe_decompress(src)?;
56
56
+
Ok(serde_json::from_slice(&decompressed)?)
23
57
}
24
58
25
59
pub fn maybe_decompress(src: &[u8]) -> Result<Vec<u8>> {
+3
-3
nara_slurper_1_12_core/src/lib.rs
···
29
29
inventory: &[InventoryItemStack],
30
30
nested_block_entity_id: Option<String>,
31
31
nested_source: Option<BookSource>,
32
32
-
make_root: &impl Fn(u8) -> BookSource,
32
32
+
make_root: &impl Fn(i8) -> BookSource,
33
33
) -> Vec<Book> {
34
34
let mut books = Vec::new();
35
35
for item in inventory {
36
36
let item_source = match &nested_source {
37
37
Some(outer) => BookSource::ItemBlockEntity {
38
38
id: nested_block_entity_id.clone().unwrap(),
39
39
-
slot: item.slot as u8,
39
39
+
slot: item.slot,
40
40
within: Box::new(outer.clone()),
41
41
},
42
42
-
None => make_root(item.slot as u8),
42
42
+
None => make_root(item.slot),
43
43
};
44
44
match &item.item {
45
45
ItemStack::WrittenBook(written_book) => {
+5
-5
nara_slurper_1_12_world/src/lib.rs
···
112
112
let playerdata = read_nbt::<PlayerData>(&buffer)?;
113
113
let books = playerdata.slurp();
114
114
if !books.is_empty() {
115
115
-
tracing::info!(
115
115
+
tracing::debug!(
116
116
"Slurped {} book(s) from {}",
117
117
books.len(),
118
118
playerdata.uuid()
···
122
122
}
123
123
124
124
fn slurp_region_file(path: &Path, dimension: &str) -> Result<Vec<Book>> {
125
125
-
tracing::info!("Slurping {path:?}...");
125
125
+
tracing::debug!("Slurping {path:?}...");
126
126
let data = std::fs::read(path)?;
127
127
let region = McRegion::<Chunk>::from_bytes(&data)?;
128
128
···
149
149
},
150
150
);
151
151
if !slurped.is_empty() {
152
152
-
tracing::info!(
152
152
+
tracing::debug!(
153
153
"Slurped a book from a {} at {:?} in chunk ({}, {}) of {path:?}",
154
154
item_holder_entity.base.id,
155
155
item_holder_entity.base.position,
···
174
174
},
175
175
);
176
176
if !slurped.is_empty() {
177
177
-
tracing::info!(
177
177
+
tracing::debug!(
178
178
"Slurped {} book(s) from a {} at {:?} in chunk ({}, {}) of {path:?}",
179
179
slurped.len(),
180
180
inventoried_entity.base.id,
···
201
201
}
202
202
});
203
203
if !slurped.is_empty() {
204
204
-
tracing::info!(
204
204
+
tracing::debug!(
205
205
"Slurped {} book(s) from a {} at ({},{},{}) in chunk ({},{}) of {path:?}",
206
206
slurped.len(),
207
207
te.id,
+24
src/cli.rs
···
35
35
/// The path to the `realm.nbt` file created by Infinity Item Editor.
36
36
#[arg(short = 'r', long = "realm", default_value = "realm.nbt")]
37
37
pub realm_path: PathBuf,
38
38
+
39
39
+
/// Path to the book container to create or update.
40
40
+
#[arg(
41
41
+
short = 'c',
42
42
+
long = "container",
43
43
+
default_value = "container.json.gz"
44
44
+
)]
45
45
+
pub container_path: PathBuf,
38
46
}
39
47
40
48
#[derive(Args, Debug)]
···
50
58
/// Defaults to the number of logical CPUs.
51
59
#[arg(short = 'j', long = "workers")]
52
60
pub workers: Option<usize>,
61
61
+
62
62
+
/// Path to the book container to create or update.
63
63
+
#[arg(
64
64
+
short = 'c',
65
65
+
long = "container",
66
66
+
default_value = "container.json.gz"
67
67
+
)]
68
68
+
pub container_path: PathBuf,
53
69
}
54
70
55
71
#[derive(Args, Debug)]
···
73
89
/// The score threshold for fuzzy finding book authors.
74
90
#[arg(short = 'A', long, default_value_t = 0.82)]
75
91
pub author_threshold: f64,
92
92
+
93
93
+
/// Path to the book container to load.
94
94
+
#[arg(
95
95
+
short = 'c',
96
96
+
long = "container",
97
97
+
default_value = "container.json.gz"
98
98
+
)]
99
99
+
pub container_path: PathBuf,
76
100
77
101
#[arg(short = 's', long = "dont-start-webserver", action = ArgAction::SetFalse)]
78
102
pub start_webserver: bool,
+51
-1
src/library.rs
···
20
20
by_category: AHashMap<SmolStr, Vec<BookId>>,
21
21
by_author_lc: AHashMap<SmolStr, Vec<BookId>>,
22
22
23
23
+
// per-book data, parallel to `books`
24
24
+
category_by_id: Vec<SmolStr>,
25
25
+
23
26
// normalized blobs for scoring (same index as `books`)
24
27
norm_title: Vec<String>,
25
28
norm_author: Vec<String>,
···
52
55
limit: usize,
53
56
}
54
57
58
58
+
/// Assigns a category to a book. Currently always returns "Uncategorized".
59
59
+
fn categorize(_book: &Book) -> SmolStr {
60
60
+
SmolStr::new_static("Uncategorized")
61
61
+
}
62
62
+
55
63
impl Library {
64
64
+
pub fn new(
65
65
+
content_threshold: f64,
66
66
+
title_threshold: f64,
67
67
+
author_threshold: f64,
68
68
+
) -> Self {
69
69
+
Self {
70
70
+
books: Vec::new(),
71
71
+
by_hash: AHashMap::new(),
72
72
+
source_by_hash: AHashMap::new(),
73
73
+
by_category: AHashMap::new(),
74
74
+
by_author_lc: AHashMap::new(),
75
75
+
category_by_id: Vec::new(),
76
76
+
norm_title: Vec::new(),
77
77
+
norm_author: Vec::new(),
78
78
+
norm_contents: Vec::new(),
79
79
+
tri_title: AHashMap::new(),
80
80
+
tri_author: AHashMap::new(),
81
81
+
tri_contents: AHashMap::new(),
82
82
+
content_threshold,
83
83
+
title_threshold,
84
84
+
author_threshold,
85
85
+
cache_books_by_author: RefCell::new(new_lru(CACHE_BY_AUTHOR_CAP)),
86
86
+
cache_books_in_category: RefCell::new(new_lru(CACHE_BY_CATEGORY_CAP)),
87
87
+
cache_fuzzy_title: RefCell::new(new_lru(CACHE_FUZZY_CAP)),
88
88
+
cache_fuzzy_author: RefCell::new(new_lru(CACHE_FUZZY_CAP)),
89
89
+
cache_fuzzy_contents: RefCell::new(new_lru(CACHE_FUZZY_CAP)),
90
90
+
cache_fuzzy_all: RefCell::new(new_lru(CACHE_FUZZY_CAP)),
91
91
+
duplicate_books_filtered: 0,
92
92
+
empty_books_filtered: 0,
93
93
+
}
94
94
+
}
95
95
+
56
96
/// Inserts a book
57
97
pub fn add_book(
58
98
&mut self,
···
96
136
let source = book.metadata.source.clone();
97
137
self.books.push(book);
98
138
99
99
-
let category: SmolStr = "todo".into(); // TODO(kokiriglade): classify from contents
139
139
+
let category = categorize(&self.books[id]);
100
140
101
141
// indices...
102
142
self.by_hash.insert(h, id);
···
105
145
.or_default()
106
146
.push(id);
107
147
self.source_by_hash.insert(h, source);
148
148
+
self.category_by_id.push(category);
108
149
109
150
let author_lc = SmolStr::new(normalize(&self.books[id].content.author));
110
151
if !author_lc.is_empty() {
···
399
440
#[inline]
400
441
pub fn source_for_hash(&self, hash: &BookHash) -> Option<&BookSource> {
401
442
self.source_by_hash.get(hash)
443
443
+
}
444
444
+
445
445
+
/// Returns the category for a book hash, or an empty string if not found.
446
446
+
#[inline]
447
447
+
pub fn category_for_hash(&self, hash: &BookHash) -> &str {
448
448
+
self.by_hash
449
449
+
.get(hash)
450
450
+
.map(|&id| self.category_by_id[id].as_str())
451
451
+
.unwrap_or("")
402
452
}
403
453
}
404
454
+52
-22
src/main.rs
···
1
1
+
use std::time::Instant;
2
2
+
1
3
use anyhow::Context;
2
4
use clap::Parser as _;
5
5
+
use nara_core::BookContainer;
3
6
use nara_slurper_1_12_infinity::Realm;
4
7
5
8
use crate::{
···
37
40
.map(|n| n.get())
38
41
.unwrap_or(4)
39
42
});
43
43
+
let mut container =
44
44
+
BookContainer::open_or_new(&args.container_path)
45
45
+
.context("Opening container")?;
46
46
+
let start = Instant::now();
40
47
let books = slurp_world(&args.world_path, num_workers)
41
48
.await
42
49
.context("Slurping world")?;
43
43
-
books.iter().for_each(|book| {
44
44
-
println!(
45
45
-
"{} [{:?}]: {:#?}",
46
46
-
book.content.title,
47
47
-
book.hash(),
48
48
-
book.metadata.source
49
49
-
)
50
50
-
});
51
51
-
println!("found {} books", books.len());
50
50
+
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
51
51
+
let total = books.len();
52
52
+
let dupes = books
53
53
+
.into_iter()
54
54
+
.filter(|b| !container.add(b.clone()))
55
55
+
.count();
56
56
+
container
57
57
+
.save(&args.container_path)
58
58
+
.context("Saving container")?;
59
59
+
tracing::info!(
60
60
+
"Found {total} books, of which {dupes} were duplicates in {elapsed_ms:.3} ms"
61
61
+
);
52
62
}
53
63
},
54
64
Command::SlurpRealm(args) => match args.version {
55
65
cli::MinecraftVersion::V1_12 => {
56
56
-
let realm = Realm::read(args.realm_path)?;
57
57
-
let books = realm.slurp();
58
58
-
59
59
-
books.iter().for_each(|book| {
60
60
-
println!(
61
61
-
"{} [{:?}]: {:#?}",
62
62
-
book.content.title,
63
63
-
book.hash(),
64
64
-
book.metadata.source
65
65
-
)
66
66
-
});
67
67
-
println!("found {} books", books.len());
66
66
+
let mut container =
67
67
+
BookContainer::open_or_new(&args.container_path)
68
68
+
.context("Opening container")?;
69
69
+
let start = Instant::now();
70
70
+
let books = Realm::read(args.realm_path)?.slurp();
71
71
+
let elapsed_ms = start.elapsed().as_secs_f64() * 1000.0;
72
72
+
let total = books.len();
73
73
+
let dupes = books
74
74
+
.into_iter()
75
75
+
.filter(|b| !container.add(b.clone()))
76
76
+
.count();
77
77
+
container
78
78
+
.save(&args.container_path)
79
79
+
.context("Saving container")?;
80
80
+
tracing::info!(
81
81
+
"Found {total} books, of which {dupes} were duplicates in {elapsed_ms:.3} ms"
82
82
+
);
68
83
}
69
84
},
70
85
}
···
73
88
}
74
89
75
90
fn build_library(args: &ServeArgs) -> anyhow::Result<Library> {
76
76
-
todo!()
91
91
+
let container = BookContainer::open_or_new(&args.container_path)
92
92
+
.context("Opening container")?;
93
93
+
let mut library = Library::new(
94
94
+
args.content_threshold,
95
95
+
args.title_threshold,
96
96
+
args.author_threshold,
97
97
+
);
98
98
+
for book in container.books() {
99
99
+
library.add_book(
100
100
+
book.clone(),
101
101
+
args.warn_duplicates,
102
102
+
args.warn_empty,
103
103
+
args.filter_empty_books,
104
104
+
);
105
105
+
}
106
106
+
Ok(library)
77
107
}
+5
-4
src/web/pages.rs
···
554
554
}
555
555
556
556
fn book_meta(book: &Book, library: &Library) -> BookMeta {
557
557
-
// let hash = book.hash();
558
558
-
let category: String = todo!();
557
557
+
let category = library.category_for_hash(&book.hash()).to_string();
559
558
560
559
let category_href =
561
560
format!("/?category={0}", encode_query_component(&category));
···
600
599
title: book.content.title.clone(),
601
600
author: book.content.author.clone(),
602
601
author_href: meta.author_href,
603
603
-
source: todo!(),
602
602
+
source: book.metadata.source.clone(),
604
603
category: meta.category,
605
604
category_href: meta.category_href,
606
605
pages,
···
711
710
let Some(category_norm) = category_norm else {
712
711
return true;
713
712
};
714
714
-
todo!()
713
713
+
library
714
714
+
.category_for_hash(&book.hash())
715
715
+
.eq_ignore_ascii_case(category_norm)
715
716
}
716
717
717
718
fn parse_hash(hex_str: &str) -> Option<[u8; 20]> {