Lightweight tagged data library.

feat: Initial commit.

Signed-off-by: Andrew Lilley Brinker <alilleybrinker@gmail.com>

+2295
+9
.gitignore
··· 1 + /target 2 + 3 + 4 + # Added by cargo 5 + # 6 + # already existing elements were commented out 7 + 8 + #/target 9 + /Cargo.lock
+49
Cargo.toml
··· 1 + [package] 2 + name = "tagbuddy" 3 + version = "0.1.0" 4 + edition = "2021" 5 + publish = false 6 + 7 + 8 + #============================================================================ 9 + # Dependencies 10 + 11 + #---------------------------------------------------------------------------- 12 + # Normal Dependencies 13 + 14 + [dependencies.convert_case] 15 + version = "0.6.0" 16 + optional = true 17 + 18 + [dependencies.either] 19 + version = "1.8.1" 20 + optional = true 21 + 22 + [dependencies.itertools] 23 + version = "0.11.0" 24 + 25 + [dependencies.regex] 26 + version = "1.8.4" 27 + optional = true 28 + 29 + [dependencies.string-interner] 30 + version = "0.14.0" 31 + default-features = false 32 + # Omit the `serde` dependency to improve compilation speed. 33 + features = ["std", "inline-more", "backends"] 34 + 35 + [dependencies.typed-builder] 36 + version = "0.14.0" 37 + 38 + #---------------------------------------------------------------------------- 39 + # Development Dependencies 40 + 41 + [dev-dependencies] 42 + anyhow = "1.0.71" 43 + 44 + 45 + #============================================================================ 46 + # Features 47 + 48 + [features] 49 + default = ["convert_case", "either", "regex"]
+160
src/error.rs
··· 1 + //! Errors for producing and consuming tags. 2 + 3 + #[cfg(doc)] 4 + use crate::parse::Or; 5 + #[cfg(doc)] 6 + use crate::storage::Storage; 7 + #[cfg(doc)] 8 + use crate::tag::MultipartTag; 9 + #[cfg(doc)] 10 + use crate::tag::Tag; 11 + #[cfg(doc)] 12 + use crate::TagManager; 13 + use std::error::Error as StdError; 14 + use std::fmt::Display; 15 + use std::fmt::Formatter; 16 + use std::fmt::Result as FmtResult; 17 + #[cfg(doc)] 18 + use std::sync::Mutex; 19 + 20 + /// Error arising during parsing of new [`Tag`]s. 21 + #[derive(Debug)] 22 + #[non_exhaustive] 23 + pub enum ParseError { 24 + /// Can't create an empty tag. 25 + EmptyTag, 26 + 27 + /// Key-value tag is missing a key. 28 + MissingKey, 29 + 30 + /// Key-value tag is missing a value. 31 + MissingValue, 32 + 33 + /// Key-value tag is ambiguous; key-value tags must have one separator. 34 + AmbiguousKeyValueTag, 35 + 36 + /// Tag didn't match a regular expression. 37 + TagDidntMatchRegex, 38 + 39 + /// Tag is more characters long than allowed. 40 + TagTooManyChars, 41 + 42 + /// Tag is more bytes long than allowed. 43 + TagTooManyBytes, 44 + 45 + /// Could not lock the parser prior to parsing. 46 + CouldNotLock, 47 + 48 + /// Tried to parse a single-part [`MultipartTag`]. 49 + SinglePartMultipart, 50 + 51 + /// Failed an [`Or`] match. 52 + FailedOr(Box<ParseError>, Box<ParseError>), 53 + 54 + /// An underlying storage error arose. 55 + StorageError(StorageError), 56 + } 57 + 58 + impl From<StorageError> for ParseError { 59 + fn from(e: StorageError) -> Self { 60 + ParseError::StorageError(e) 61 + } 62 + } 63 + 64 + impl Display for ParseError { 65 + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { 66 + match self { 67 + ParseError::EmptyTag => write!(f, "can't create an empty tag"), 68 + ParseError::MissingKey => write!(f, "missing key in a key-value tag"), 69 + ParseError::MissingValue => write!(f, "missing value in a key-value tag"), 70 + ParseError::AmbiguousKeyValueTag => { 71 + write!(f, "ambiguous key-tag value; should have just one separator") 72 + } 73 + ParseError::TagDidntMatchRegex => write!(f, "tag didn't match the regular expression"), 74 + ParseError::TagTooManyChars => write!(f, "tag is too many characters long"), 75 + ParseError::TagTooManyBytes => write!(f, "tag is too many bytes long"), 76 + ParseError::CouldNotLock => write!(f, "could not lock parser"), 77 + ParseError::SinglePartMultipart => { 78 + write!(f, "can't accept a single-part multipart tag") 79 + } 80 + ParseError::FailedOr(e1, e2) => { 81 + write!(f, "failed two parsers with errors '{e1}' and '{e2}'") 82 + } 83 + ParseError::StorageError(e) => write!(f, "{e}"), 84 + } 85 + } 86 + } 87 + 88 + impl StdError for ParseError { 89 + fn source(&self) -> Option<&(dyn StdError + 'static)> { 90 + match self { 91 + ParseError::StorageError(e) => Some(e), 92 + _ => None, 93 + } 94 + } 95 + } 96 + 97 + /// Errors arising when resolving [`Tag`]s. 98 + #[derive(Debug)] 99 + #[non_exhaustive] 100 + pub enum ResolveError { 101 + /// Tag wasn't found in the [`TagManager`]. 102 + TagNotFound, 103 + 104 + /// Key wasn't found in the [`TagManager`]. 105 + KeyNotFound, 106 + 107 + /// Value wasn't found in the [`TagManager`]. 108 + ValueNotFound, 109 + 110 + /// Part wasn't found in the [`TagManager`]. 111 + PartNotFound, 112 + 113 + /// An underlying [`Storage`] error occurred. 114 + StorageError(StorageError), 115 + } 116 + 117 + impl From<StorageError> for ResolveError { 118 + fn from(e: StorageError) -> Self { 119 + ResolveError::StorageError(e) 120 + } 121 + } 122 + 123 + impl Display for ResolveError { 124 + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { 125 + match self { 126 + ResolveError::TagNotFound => write!(f, "tag wasn't found in tag manager"), 127 + ResolveError::KeyNotFound => write!(f, "key wasn't found in tag manager"), 128 + ResolveError::ValueNotFound => write!(f, "value wasn't found in tag manager"), 129 + ResolveError::PartNotFound => write!(f, "part wasn't found in tag manager"), 130 + ResolveError::StorageError(e) => write!(f, "{e}"), 131 + } 132 + } 133 + } 134 + 135 + impl StdError for ResolveError { 136 + fn source(&self) -> Option<&(dyn StdError + 'static)> { 137 + match self { 138 + ResolveError::StorageError(e) => Some(e), 139 + _ => None, 140 + } 141 + } 142 + } 143 + 144 + /// Errors arising when interacting with [`Storage`]s. 145 + #[derive(Debug)] 146 + #[non_exhaustive] 147 + pub enum StorageError { 148 + /// Failed to lock the storage, likely because the [`Mutex`] is poisoned. 149 + CouldNotLock, 150 + } 151 + 152 + impl Display for StorageError { 153 + fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult { 154 + match self { 155 + StorageError::CouldNotLock => write!(f, "could not lock storage"), 156 + } 157 + } 158 + } 159 + 160 + impl StdError for StorageError {}
+59
src/label.rs
··· 1 + //! Enables compile-time separation of tags from different managers. 2 + 3 + #[cfg(doc)] 4 + use crate::generate_label; 5 + #[cfg(doc)] 6 + use crate::tag::Tag; 7 + #[cfg(doc)] 8 + use crate::TagManager; 9 + 10 + /// A type for compile-time separation of [`Tag`]s from different [`TagManager`]s. 11 + /// 12 + /// This enables the compile-time guarantee that a [`Tag`] generated by one 13 + /// [`TagManager`] is never resolved through another [`TagManager`] (which might 14 + /// succeed but produce incorrect or nonsensical results at runtime if permitted). 15 + /// 16 + /// # Safety 17 + /// 18 + /// Note that the usage of [`Label`] types in this crate never actually instantiates 19 + /// any value of that type, and in general we recommend using a zero-sized type 20 + /// (this is what's done by the [`generate_label`] macro). 21 + /// 22 + /// There aren't actual safety concerns around its use, and no choice to implement 23 + /// or not implement this marker trait can actually break safety guarantees of the 24 + /// crate, but it's marked `unsafe` to hint toward the special guarantees around 25 + /// the marker trait and encourage its construction through [`generate_label`]. 26 + pub unsafe trait Label: Copy {} 27 + 28 + /// Generate a new type implementing [`Label`]. 29 + /// 30 + /// # Example 31 + /// 32 + /// ``` 33 + /// # use tagbuddy::generate_label; 34 + /// 35 + /// generate_label! { 36 + /// /// Tags used for blog posts in this blog implementation. 37 + /// pub BlogPostTags {} 38 + /// } 39 + /// ``` 40 + #[macro_export] 41 + macro_rules! generate_label { 42 + ( $( $( #[$($attrss:meta)*] )* $visibility:vis $struct_name:ident {} )* ) => { 43 + $( 44 + $crate::generate_label! { @single $( #[$($attrss)*] )* $visibility $struct_name {} } 45 + )* 46 + }; 47 + 48 + ( @single $( #[$($attrss:meta)*] )* $visibility:vis $struct_name:ident {} ) => { 49 + $( #[$($attrss)*] )* 50 + #[derive(Debug, Copy, Clone)] 51 + $visibility struct $struct_name; 52 + unsafe impl $crate::label::Label for $struct_name {} 53 + }; 54 + } 55 + 56 + generate_label! { 57 + /// The default label applied when no other label is provided. 58 + pub DefaultLabel {} 59 + }
+30
src/lib.rs
··· 1 + //! A Rust crate for creating and managing tags and their relationships. 2 + //! 3 + //! "Tags" are string data which may or may not contain some structure, like 4 + //! key-value pairs or multipart segments, and which are attached as metadata 5 + //! to annotate data for organization. 6 + //! 7 + //! This crate defines a set of mechanisms for generically parsing, storing, 8 + //! comparing, and querying sets of tags according to configured policies. 9 + 10 + pub mod error; 11 + pub mod label; 12 + mod manager; 13 + pub mod parse; 14 + pub mod query; 15 + pub mod storage; 16 + pub mod tag; 17 + #[cfg(test)] 18 + mod test; 19 + 20 + pub use crate::manager::TagManager; 21 + 22 + pub mod builder { 23 + //! Contains a builder type for the [`TagManager`]. 24 + 25 + #[cfg(doc)] 26 + use crate::TagManager; 27 + 28 + #[doc(inline)] 29 + pub use crate::manager::TagManagerBuilder; 30 + }
+258
src/manager.rs
··· 1 + //! Produce and resolve tags. 2 + 3 + use crate::error::ResolveError; 4 + use crate::label::DefaultLabel; 5 + use crate::label::Label; 6 + use crate::parse::*; 7 + use crate::storage::Storage; 8 + use crate::tag::KeyValueSep; 9 + #[cfg(doc)] 10 + use crate::tag::KeyValueTag; 11 + #[cfg(doc)] 12 + use crate::tag::MultipartTag; 13 + use crate::tag::PathSep; 14 + use crate::tag::PlainTag; 15 + use crate::tag::Tag; 16 + use crate::{error::ParseError, tag::TagKind}; 17 + #[cfg(doc)] 18 + use std::sync::Mutex; 19 + use std::{convert::identity, hash::BuildHasher}; 20 + use string_interner::backend::Backend as InternerBackend; 21 + use string_interner::DefaultBackend; 22 + use string_interner::DefaultHashBuilder; 23 + use string_interner::DefaultSymbol; 24 + #[cfg(doc)] 25 + use string_interner::StringInterner; 26 + use string_interner::Symbol; 27 + use typed_builder::TypedBuilder; 28 + 29 + /// Constructs [`Tag`]s according to the configured parser and interner. 30 + /// 31 + /// A single [`TagManager`] is responsible for parsing and resolving tags that 32 + /// match the rules of a single configured parser, with storage handled by 33 + /// an underlying [`StringInterner`]. The [`StringInterner`] may be shared 34 + /// with other [`TagManager`]s. 35 + /// 36 + /// [`TagManager`] is designed to be generic over: 37 + /// 38 + /// - The parser used to produce tags. 39 + /// - The interner used to store tag data. 40 + /// 41 + /// The trait bounds on [`TagManager`] ensure that the parser and interner 42 + /// agree on the [`Symbol`] used as handles for the stored string data. 43 + /// This is required because the parser produces [`Tag`]s which store 44 + /// [`Symbol`]s so they can later be resolved into [`String`]s to recover 45 + /// the full originally-input tag data. 46 + /// 47 + /// The manner in which tag data is stored depends on the `T` parameter. 48 + /// [`PlainTag`] stores the full string data in the interner. [`KeyValueTag`] 49 + /// stores the key and value data separately, on the expectation that keys 50 + /// especially will be repeated, and thus a lot of space saving is achieved by 51 + /// deduplicating them through separate interning. [`MultipartTag`] stores 52 + /// each part separately, again on the expectation that individual parts will 53 + /// be frequently repeated across tags, resulting in space savings from interning. 54 + #[derive(TypedBuilder)] 55 + pub struct TagManager< 56 + L = DefaultLabel, 57 + S = DefaultSymbol, 58 + T = PlainTag<L, S>, 59 + P = Plain<L, S>, 60 + B = DefaultBackend<S>, 61 + H = DefaultHashBuilder, 62 + > where 63 + L: Label, 64 + S: Symbol, 65 + T: Tag<Label = L, Symbol = S>, 66 + P: Parser<Tag = T> + Send + Sync, 67 + B: InternerBackend<Symbol = S>, 68 + H: BuildHasher, 69 + { 70 + /// Defines how key-value tags are parsed, if key-value tags are permitted. 71 + pub(crate) parser: P, 72 + 73 + /// The separator used for separating key and values in key-value tags. 74 + #[builder(default)] 75 + pub(crate) key_value_separator: KeyValueSep, 76 + 77 + /// The separator used for separating parts in multipart tags. 78 + #[builder(default)] 79 + pub(crate) path_separator: PathSep, 80 + 81 + /// Interns and stores string data for tags, to reduce memory usage. 82 + pub(crate) storage: Storage<L, B, H>, 83 + } 84 + 85 + // These `Send` and `Sync` impls are safe _because_: 86 + // 87 + // 1. `key_value_separator` and `path_separator` are just read-only string slices, so they are 88 + // trivially `Send` and `Sync`. 89 + // 2. `parser` is constrained to be `Send` and `Sync`, either trivially-so, or by being wrapped 90 + // in an `Arc<Mutex<_>>` (in which case it takes advantage of an auto-impl for `Parser` 91 + // that tries to lock the parser before parsing can proceed). 92 + // 3. `storage` is _always_ wrapped in an `Arc<Mutex<_>>`, so it is always `Send` and `Sync`. 93 + // 94 + // Given the above, `TagManager` is _always_ safe to send and sync, and can implement these traits. 95 + 96 + unsafe impl<L, S, T, P, B, H> Send for TagManager<L, S, T, P, B, H> 97 + where 98 + L: Label, 99 + S: Symbol, 100 + T: Tag<Label = L, Symbol = S>, 101 + P: Parser<Tag = T> + Send + Sync, 102 + B: InternerBackend<Symbol = S>, 103 + H: BuildHasher, 104 + { 105 + } 106 + 107 + unsafe impl<L, S, T, P, B, H> Sync for TagManager<L, S, T, P, B, H> 108 + where 109 + L: Label, 110 + S: Symbol, 111 + T: Tag<Label = L, Symbol = S>, 112 + P: Parser<Tag = T> + Send + Sync, 113 + B: InternerBackend<Symbol = S>, 114 + H: BuildHasher, 115 + { 116 + } 117 + 118 + impl< 119 + L: Label, 120 + S: Symbol, 121 + T: Tag<Label = L, Symbol = S>, 122 + P: Parser<Tag = T> + Send + Sync, 123 + B: InternerBackend<Symbol = S>, 124 + H: BuildHasher, 125 + > TagManager<L, S, T, P, B, H> 126 + { 127 + /// Attempt to parse a structured tag from the provided "raw" tag. 128 + /// 129 + /// This may fail if the tag is empty, or if it violates the configured [`Parser`]'s rules. 130 + pub fn parse_tag(&self, raw: &str) -> Result<P::Tag, ParseError> { 131 + self.parser.parse( 132 + &mut self.storage.lock()?, 133 + self.key_value_separator, 134 + self.path_separator, 135 + raw, 136 + ) 137 + } 138 + 139 + /// Parse tags into a collection of your choosing. 140 + /// 141 + /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 142 + /// storage before starting to parse _any_ tags, and holds it for the duration. 143 + pub fn parse_tags_into<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C 144 + where 145 + C: FromIterator<Result<P::Tag, ParseError>>, 146 + { 147 + self.parse_tags_into_with(src, identity) 148 + } 149 + 150 + /// Parse tags into a collection of your choosing. 151 + /// 152 + /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 153 + /// storage before starting to parse _any_ tags, and holds it for the duration. 154 + pub fn parse_tags_into_with_kind<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C 155 + where 156 + C: FromIterator<Result<(P::Tag, TagKind), ParseError>>, 157 + { 158 + self.parse_tags_into_with(src, |t| { 159 + let kind = t.kind(); 160 + (t, kind) 161 + }) 162 + } 163 + 164 + /// Parse tags into a collection of your choosing. 165 + /// 166 + /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 167 + /// storage before starting to parse _any_ tags, and holds it for the duration. 168 + pub fn parse_tags_into_with<'raw, O, C>( 169 + &self, 170 + src: impl IntoIterator<Item = &'raw str>, 171 + f: impl FnOnce(P::Tag) -> O + Copy, 172 + ) -> C 173 + where 174 + C: FromIterator<Result<O, ParseError>>, 175 + { 176 + src.into_iter() 177 + .map(move |raw| { 178 + self.parser 179 + .parse( 180 + &mut self.storage.lock()?, 181 + self.key_value_separator, 182 + self.path_separator, 183 + raw, 184 + ) 185 + .map(f) 186 + }) 187 + .collect() 188 + } 189 + 190 + /// Get a string representation of a [`Tag`]. 191 + /// 192 + /// Note that this may fail to resolve a tag if the tag wasn't interned 193 + /// in the current [`TagManager`]. It may alternatively resolve an incorrect tag. 194 + pub fn resolve_tag(&self, tag: &P::Tag) -> Result<String, ResolveError> { 195 + tag.resolve( 196 + &self.storage.lock()?, 197 + self.key_value_separator, 198 + self.path_separator, 199 + ) 200 + } 201 + 202 + /// Get the string representation of a set of [`Tag`]s. 203 + /// 204 + /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock 205 + /// before beginning iteration, and holds it for the duration. 206 + pub fn resolve_tags_into<'tag, C>(&self, src: impl IntoIterator<Item = &'tag P::Tag>) -> C 207 + where 208 + P::Tag: 'tag, 209 + C: FromIterator<Result<String, ResolveError>>, 210 + { 211 + self.resolve_tags_into_with(src, identity) 212 + } 213 + 214 + /// Get the string representation of a set of [`Tag`]s. 215 + /// 216 + /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock 217 + /// before beginning iteration, and holds it for the duration. 218 + pub fn resolve_tags_into_with<'tag, O, C>( 219 + &self, 220 + src: impl IntoIterator<Item = &'tag P::Tag>, 221 + f: impl FnOnce(String) -> O + Copy, 222 + ) -> C 223 + where 224 + P::Tag: 'tag, 225 + C: FromIterator<Result<O, ResolveError>>, 226 + { 227 + src.into_iter() 228 + .map(move |tag| { 229 + tag.resolve( 230 + &self.storage.lock()?, 231 + self.key_value_separator, 232 + self.path_separator, 233 + ) 234 + .map(f) 235 + }) 236 + .collect() 237 + } 238 + 239 + /// Get the inner [`Storage`] of the [`TagManager`]. 240 + pub fn storage(&self) -> &Storage<L, B, H> { 241 + &self.storage 242 + } 243 + 244 + /// Get the [`Parser`] applied by the [`TagManager`]. 245 + pub fn parser(&self) -> &P { 246 + &self.parser 247 + } 248 + 249 + /// Get the key-value separator (default `":"`) used by the [`TagManager`] for [`KeyValueTag`]s. 250 + pub fn key_value_separator(&self) -> KeyValueSep { 251 + self.key_value_separator 252 + } 253 + 254 + /// Get the path separator (default `"/"`) used by the [`TagManager`] for [`MultipartTag`]s. 255 + pub fn path_separator(&self) -> PathSep { 256 + self.path_separator 257 + } 258 + }
+288
src/parse/adapters.rs
··· 1 + //! Types which augment or modify the behavior of an underlying parser. 2 + 3 + use crate::error::ParseError; 4 + #[cfg(feature = "either")] 5 + use crate::label::Label; 6 + use crate::parse::Parser; 7 + use crate::storage::StorageLock; 8 + use crate::tag::KeyValueSep; 9 + use crate::tag::PathSep; 10 + #[cfg(feature = "either")] 11 + use crate::tag::Tag; 12 + #[cfg(feature = "convert_case")] 13 + pub use convert_case::Case; 14 + #[cfg(feature = "convert_case")] 15 + use convert_case::Casing as _; 16 + #[cfg(feature = "either")] 17 + use either::Either; 18 + #[cfg(feature = "regex")] 19 + use regex::Regex; 20 + #[cfg(feature = "regex")] 21 + use regex::Replacer; 22 + use std::hash::BuildHasher; 23 + use string_interner::backend::Backend as InternerBackend; 24 + #[cfg(feature = "either")] 25 + use string_interner::Symbol; 26 + 27 + // Helper macro to generate parser adapters. 28 + macro_rules! adapters { 29 + ( 30 + $( 31 + $( #[$($attrss:meta)*] )* 32 + $struct:ident $(< $($type_var:ident: $type_bound:ident),* >)? $(($($field_ty:ty),*))? => { $adapter:expr } 33 + )* 34 + ) => { 35 + $( 36 + adapters! { 37 + @single 38 + $( #[$($attrss)*] )* 39 + $struct $(< $($type_var: $type_bound),* >)* $(($($field_ty),*))* => { $adapter } 40 + } 41 + )* 42 + }; 43 + 44 + ( 45 + @single 46 + $(#[$($attrss:meta)*] )* 47 + $struct:ident $(< $($type_var:ident: $type_bound:ident),* >)? $(($($field_ty:ty),* ))? => { $adapter:expr } 48 + ) => { 49 + $( #[$($attrss)*] )* 50 + #[derive(Debug, Clone)] 51 + pub struct $struct<$($($type_var: $type_bound),*,)* P: Parser>($($(pub $field_ty),*,)* pub P); 52 + 53 + impl<$($($type_var: $type_bound),*,)* P: Parser> $struct<$($($type_var),*,)* P> { 54 + /// Parse a token with the given `interner` and `separator`. 55 + #[allow(clippy::redundant_closure_call)] 56 + fn parse<B, H>( 57 + &self, 58 + storage: &mut StorageLock<'_, <P::Tag as Tag>::Label, B, H>, 59 + key_value_separator: KeyValueSep, 60 + path_separator: PathSep, 61 + raw: &str, 62 + ) -> Result<P::Tag, ParseError> 63 + where 64 + B: InternerBackend<Symbol = <P::Tag as Tag>::Symbol>, 65 + H: BuildHasher 66 + { 67 + ($adapter)(self, storage, key_value_separator, path_separator, raw) 68 + } 69 + 70 + } 71 + 72 + impl<$($($type_var: $type_bound),*,)* P: Parser> Parser for $struct<$($($type_var),*,)* P> { 73 + type Tag = P::Tag; 74 + 75 + fn parse<B, H>( 76 + &self, 77 + storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 78 + key_value_separator: KeyValueSep, 79 + path_separator: PathSep, 80 + raw: &str, 81 + ) -> Result<Self::Tag, ParseError> 82 + where 83 + B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 84 + H: BuildHasher 85 + { 86 + self.parse(storage, key_value_separator, path_separator, raw) 87 + } 88 + } 89 + }; 90 + } 91 + 92 + adapters! { 93 + /// Trim whitespace from one or both sides of the tag. 94 + Trim(TrimBounds) => { 95 + |this: &Trim<P>, interner, kv_sep, path_sep, raw: &str| { 96 + let Trim::<P>(bounds, sub_parser) = this; 97 + let raw = match bounds { 98 + TrimBounds::Both => raw.trim(), 99 + TrimBounds::Start => raw.trim_start(), 100 + TrimBounds::End => raw.trim_end(), 101 + }; 102 + sub_parser.parse(interner, kv_sep, path_sep, raw) 103 + } 104 + } 105 + 106 + /// Filter out tags longer than a maximum number of characters. 107 + MaxChar(usize) => { 108 + |this: &MaxChar<P>, interner, kv_sep, path_sep, raw: &str| { 109 + let MaxChar::<P>(limit, sub_parser) = this; 110 + 111 + if raw.chars().count() > *limit { 112 + return Err(ParseError::TagTooManyChars); 113 + } 114 + 115 + sub_parser.parse(interner, kv_sep, path_sep, raw) 116 + } 117 + } 118 + 119 + /// Filter out tags longer than a maximum number of bytes. 120 + MaxBytes(usize) => { 121 + |this: &MaxBytes<P>, interner, kv_sep, path_sep, raw: &str| { 122 + let MaxBytes::<P>(limit, sub_parser) = this; 123 + 124 + if raw.len() > *limit { 125 + return Err(ParseError::TagTooManyBytes); 126 + } 127 + 128 + sub_parser.parse(interner, kv_sep, path_sep, raw) 129 + } 130 + } 131 + } 132 + 133 + #[cfg(feature = "convert_case")] 134 + adapters! { 135 + /// Change the case of the tag. 136 + ChangeCase(Case) => { 137 + |this: &ChangeCase<P>, interner, kv_sep, path_sep, raw: &str| { 138 + let ChangeCase::<P>(case, sub_parser) = this; 139 + let raw = raw.to_case(*case); 140 + sub_parser.parse(interner, kv_sep, path_sep, &raw) 141 + } 142 + } 143 + } 144 + 145 + #[cfg(feature = "regex")] 146 + adapters! { 147 + /// Filter tags by matching against a regex. 148 + Match(Regex) => { 149 + |this: &Match<P>, interner, kv_sep, path_sep, raw: &str| { 150 + let Match::<P>(regex, sub_parser) = this; 151 + 152 + let raw = regex 153 + .is_match(raw) 154 + .then_some(raw) 155 + .ok_or(ParseError::TagDidntMatchRegex)?; 156 + 157 + sub_parser.parse(interner, kv_sep, path_sep, raw) 158 + } 159 + } 160 + 161 + /// Replace the content of a tag according to a regex. 162 + Replace<R: CloneableReplacer>(Regex, R, ReplaceCount) => { 163 + |this: &Replace<R, P>, interner, kv_sep, path_sep, raw: &str| { 164 + let Replace::<R, P>(regex, replacer, count, sub_parser) = this; 165 + 166 + let raw = match count { 167 + ReplaceCount::First => regex.replace(raw, replacer.clone()), 168 + ReplaceCount::N(count) => regex.replacen(raw, *count, replacer.clone()), 169 + ReplaceCount::All => regex.replace_all(raw, replacer.clone()), 170 + }; 171 + 172 + sub_parser.parse(interner, kv_sep, path_sep, &raw) 173 + } 174 + } 175 + } 176 + 177 + /// A `regex::Replacer` that can be [`Clone`]d. 178 + /// 179 + /// This is automatically implemented for any type that implements both 180 + /// `regex::Replacer` and [`Clone`]. 181 + pub trait CloneableReplacer: Replacer + Clone {} 182 + 183 + impl<T: Replacer + Clone> CloneableReplacer for T {} 184 + 185 + // The `Or` adapter is implemented by hand, because making the adapter-generating 186 + // macro learn how to handle all of these bounds and everything isn't worth it. 187 + 188 + /// Apply one parser, and if it fails, apply the other one. 189 + /// 190 + /// Note that the tokens produced by the two parsers have to support the same underlying 191 + /// symbol type, as they're both being backed by the same interner for storage. 192 + #[cfg(feature = "either")] 193 + #[derive(Debug)] 194 + pub struct Or<L, S, T1, T2, P1, P2>(pub P1, pub P2) 195 + where 196 + L: Label, 197 + S: Symbol, 198 + T1: Tag<Label = L, Symbol = S>, 199 + T2: Tag<Label = L, Symbol = S>, 200 + P1: Parser<Tag = T1>, 201 + P2: Parser<Tag = T2>; 202 + 203 + #[cfg(feature = "either")] 204 + impl<L, S, T1, T2, P1, P2> Or<L, S, T1, T2, P1, P2> 205 + where 206 + L: Label, 207 + S: Symbol, 208 + T1: Tag<Label = L, Symbol = S>, 209 + T2: Tag<Label = L, Symbol = S>, 210 + P1: Parser<Tag = T1>, 211 + P2: Parser<Tag = T2>, 212 + { 213 + /// Parse a token with the given `interner` and `separator`. 214 + fn parse<B, H>( 215 + &self, 216 + storage: &mut StorageLock<'_, L, B, H>, 217 + key_value_separator: KeyValueSep, 218 + path_separator: PathSep, 219 + raw: &str, 220 + ) -> Result<Either<T1, T2>, ParseError> 221 + where 222 + B: InternerBackend<Symbol = S>, 223 + H: BuildHasher, 224 + { 225 + self.0 226 + .parse(storage, key_value_separator, path_separator, raw) 227 + .map(Either::Left) 228 + .or_else(|err1| { 229 + self.1 230 + .parse(storage, key_value_separator, path_separator, raw) 231 + .map(Either::Right) 232 + .map_err(|err2| ParseError::FailedOr(Box::new(err1), Box::new(err2))) 233 + }) 234 + } 235 + } 236 + 237 + #[cfg(feature = "either")] 238 + impl<L, S, T1, T2, P1, P2> Parser for Or<L, S, T1, T2, P1, P2> 239 + where 240 + L: Label, 241 + S: Symbol, 242 + T1: Tag<Label = L, Symbol = S>, 243 + T2: Tag<Label = L, Symbol = S>, 244 + P1: Parser<Tag = T1>, 245 + P2: Parser<Tag = T2>, 246 + { 247 + type Tag = Either<T1, T2>; 248 + 249 + fn parse<B, H>( 250 + &self, 251 + storage: &mut StorageLock<'_, L, B, H>, 252 + key_value_separator: KeyValueSep, 253 + path_separator: PathSep, 254 + raw: &str, 255 + ) -> Result<Self::Tag, ParseError> 256 + where 257 + B: InternerBackend<Symbol = S>, 258 + H: BuildHasher, 259 + { 260 + self.parse(storage, key_value_separator, path_separator, raw) 261 + } 262 + } 263 + 264 + /// Sets which side(s) of the raw tag should be trimmed of whitespace. 265 + #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 266 + pub enum TrimBounds { 267 + /// Both sides should be trimmed. 268 + Both, 269 + 270 + /// Just the starting side should be trimmed. 271 + Start, 272 + 273 + /// Just the ending side should be trimmed. 274 + End, 275 + } 276 + 277 + /// Sets how many replacements should be done when using the [`Replace`] adapter. 278 + #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 279 + pub enum ReplaceCount { 280 + /// Replace just the first instance of the regex match. 281 + First, 282 + 283 + /// Replace the first `N` instances of the regex match. 284 + N(usize), 285 + 286 + /// Replace all instances of the regex match. 287 + All, 288 + }
+275
src/parse/mod.rs
··· 1 + //! Different tag parsers and their strategies. 2 + 3 + mod adapters; 4 + 5 + use crate::error::ParseError; 6 + use crate::label::DefaultLabel; 7 + use crate::label::Label; 8 + pub use crate::parse::adapters::*; 9 + #[cfg(doc)] 10 + use crate::storage::Storage; 11 + use crate::storage::StorageLock; 12 + use crate::tag::*; 13 + #[cfg(doc)] 14 + use crate::TagManager; 15 + use std::hash::BuildHasher; 16 + use std::marker::PhantomData; 17 + use std::ops::Not as _; 18 + use std::sync::Arc; 19 + use std::sync::Mutex; 20 + use string_interner::backend::Backend as InternerBackend; 21 + use string_interner::DefaultSymbol; 22 + use string_interner::Symbol; 23 + 24 + /// Types that provide a strategy for parsing tags. 25 + /// 26 + /// `Parser`s are required to be [`Send`] and [`Sync`] as we want [`TagManager`] 27 + /// to be [`Send`] and [`Sync`]. For basic parsers that don't maintain 28 + /// any internal state, this is trivial, but more complex parsers may 29 + /// need to establish internal synchronization of their state in the case 30 + /// that they are performing concurrent parses. 31 + pub trait Parser { 32 + /// The type of [`Tag`] produced by the [`Parser`]. 33 + type Tag: Tag; 34 + 35 + /// Parse a given string to produce a new [`Tag`]. 36 + fn parse<B, H>( 37 + &self, 38 + storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 39 + key_value_separator: KeyValueSep, 40 + path_separator: PathSep, 41 + raw: &str, 42 + ) -> Result<Self::Tag, ParseError> 43 + where 44 + B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 45 + H: BuildHasher; 46 + } 47 + 48 + // Implement Parser for any Parser wrapped in `Arc<Mutex<_>>`, to enable 49 + // passing externally-synchronized parsers in addition to trivially-synchronized ones, 50 + // in cases where the parsers maintain internal state. 51 + impl<P> Parser for Arc<Mutex<P>> 52 + where 53 + P: Parser, 54 + { 55 + type Tag = P::Tag; 56 + 57 + fn parse<B, H>( 58 + &self, 59 + storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 60 + key_value_separator: KeyValueSep, 61 + path_separator: PathSep, 62 + raw: &str, 63 + ) -> Result<Self::Tag, ParseError> 64 + where 65 + B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 66 + H: BuildHasher, 67 + { 68 + let internal_parser = self.lock().map_err(|_| ParseError::CouldNotLock)?; 69 + internal_parser.parse(storage, key_value_separator, path_separator, raw) 70 + } 71 + } 72 + 73 + /// The policy to use for splitting on separators in a [`KeyValue`]. 74 + #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 75 + pub enum KvPolicy { 76 + /// Don't allow ambiguous separators. Only one separator is permitted. 77 + NoAmbiguousSep, 78 + 79 + /// Split keys and values on the first occurence of the separator. 80 + SplitOnFirstSep, 81 + 82 + /// Split keys and values on the last occurence of the separator. 83 + SplitOnLastSep, 84 + } 85 + 86 + /// The policy to use for permitting "single-part" [`MultipartTag`]s. 87 + #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 88 + pub enum MultipartPolicy { 89 + /// Permit single-part tags. 90 + PermitOnePart, 91 + 92 + /// Do not permit single-part tags. 93 + RequireMultipart, 94 + } 95 + 96 + /// Helper macro to construct tag parsers. 97 + /// 98 + /// This macro: 99 + /// 100 + /// 1. Defines each parser as either an empty struct or tuple struct with only public fields. 101 + /// 2. Implements a `parse` inherent method, which calls `check_empty` and then whatever closure 102 + /// is provided by the macro to implement the actual parsing behavior. 103 + /// 3. Implements the `Parser` trait, with `Parser::parse` just delegating to the `parse` 104 + /// inherent method. 105 + /// 106 + /// The syntax of each parser-defining pattern is: 107 + /// 108 + /// ```text 109 + /// <doc_comment> 110 + /// <struct_name>(<field_types>)? => <tag_type> { 111 + /// <parser_closure> 112 + /// } 113 + /// ``` 114 + macro_rules! parsers { 115 + ( 116 + $( 117 + $( #[$($attrss:meta)*] )* 118 + $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident { 119 + $parser:expr 120 + } 121 + )* 122 + ) => { 123 + $( 124 + parsers! { 125 + @single 126 + $( #[$($attrss)*] )* 127 + $struct { $($field_name: $field_ty),* } => $tag { 128 + $parser 129 + } 130 + } 131 + )* 132 + }; 133 + 134 + ( 135 + @single 136 + $(#[$($attrss:meta)*] )* 137 + $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident { 138 + $parser:expr 139 + } 140 + ) => { 141 + $( #[$($attrss)*] )* 142 + #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)] 143 + pub struct $struct<L: Label = DefaultLabel, S: Symbol = DefaultSymbol> { 144 + _label: PhantomData<L>, 145 + _symbol: PhantomData<S>, 146 + $( $field_name: $field_ty ),* 147 + } 148 + 149 + impl<L, S> $struct<L, S> where L: Label, S: Symbol { 150 + /// Construct a new parser. 151 + #[allow(clippy::new_without_default)] 152 + pub fn new($( $field_name: $field_ty ),*) -> Self { 153 + Self { 154 + _label: PhantomData, 155 + _symbol: PhantomData, 156 + $($field_name),* 157 + } 158 + } 159 + 160 + /// Parse a token with the given `interner` and `separator`. 161 + #[allow(clippy::redundant_closure_call)] 162 + pub fn parse<B, H>( 163 + &self, 164 + storage: &mut StorageLock<'_, L, B, H>, 165 + key_value_separator: KeyValueSep, 166 + path_separator: PathSep, 167 + raw: &str 168 + ) -> Result<$tag<L, S>, ParseError> 169 + where 170 + S: Symbol, 171 + B: InternerBackend<Symbol = S>, 172 + H: BuildHasher 173 + { 174 + check_empty(raw)?; 175 + ($parser)(self, storage, key_value_separator, path_separator, raw) 176 + } 177 + } 178 + 179 + impl<L: Label, S: Symbol> Parser for $struct<L, S> { 180 + type Tag = $tag<L, S>; 181 + 182 + fn parse<B, H>( 183 + &self, 184 + storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>, 185 + key_value_separator: KeyValueSep, 186 + path_separator: PathSep, 187 + raw: &str 188 + ) -> Result<Self::Tag, ParseError> 189 + where 190 + B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>, 191 + H: BuildHasher 192 + { 193 + self.parse(storage, key_value_separator, path_separator, raw) 194 + } 195 + } 196 + }; 197 + } 198 + 199 + /// Validate that the raw tag isn't empty, error out if it is. 200 + fn check_empty(raw: &str) -> Result<(), ParseError> { 201 + raw.is_empty() 202 + .not() 203 + .then_some(()) 204 + .ok_or(ParseError::EmptyTag) 205 + } 206 + 207 + parsers! { 208 + /// No internal structure, `':'` default separator. 209 + Plain {} => PlainTag { 210 + |_this, interner, _key_value_separator, _path_separator, raw| Ok(PlainTag::new(interner, raw)) 211 + } 212 + 213 + /// Key-value parser, `':'` default separator. 214 + KeyValue { policy: KvPolicy } => KeyValueTag { 215 + |this: &KeyValue<L, S>, interner, key_value_separator: KeyValueSep, _path_separator, raw: &str| { 216 + match this.policy { 217 + KvPolicy::NoAmbiguousSep => { 218 + let mut parts_iter = raw.split(key_value_separator.0); 219 + let key = parts_iter.next().ok_or(ParseError::MissingKey)?; 220 + let value = parts_iter.next().ok_or(ParseError::MissingValue)?; 221 + match parts_iter.next() { 222 + Some(_) => Err(ParseError::AmbiguousKeyValueTag), 223 + None => Ok(KeyValueTag::new(interner, key, value)) 224 + } 225 + } 226 + KvPolicy::SplitOnFirstSep => { 227 + match raw.split_once(key_value_separator.0) { 228 + None => Err(ParseError::MissingValue), 229 + Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)), 230 + } 231 + } 232 + KvPolicy::SplitOnLastSep => { 233 + match raw.rsplit_once(key_value_separator.0) { 234 + None => Err(ParseError::MissingValue), 235 + Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)), 236 + } 237 + } 238 + } 239 + } 240 + } 241 + 242 + /// Multipart parser, splits parts on separator, `':'` default separator. 243 + Multipart { policy: MultipartPolicy } => MultipartTag { 244 + |this: &Multipart<L, S>, interner, _key_value_separator, path_separator: PathSep, raw: &str| { 245 + match this.policy { 246 + MultipartPolicy::PermitOnePart => Ok(MultipartTag::new(interner, raw.split(path_separator.0))), 247 + MultipartPolicy::RequireMultipart => { 248 + let parts = raw.split(path_separator.0); 249 + 250 + if parts.clone().count() < 2 { 251 + return Err(ParseError::SinglePartMultipart); 252 + } 253 + 254 + Ok(MultipartTag::new(interner, parts)) 255 + }, 256 + } 257 + } 258 + } 259 + } 260 + 261 + /* # SAFETY 262 + * 263 + * There's no data to sync for any of these; the only fields involved 264 + * are read-only once the type is created (they just set configuration). 265 + * Since there's nothing to sync, there's no worry about deriving this. 266 + */ 267 + 268 + unsafe impl<L: Label, S: Symbol> Send for Plain<L, S> {} 269 + unsafe impl<L: Label, S: Symbol> Sync for Plain<L, S> {} 270 + 271 + unsafe impl<L: Label, S: Symbol> Send for KeyValue<L, S> {} 272 + unsafe impl<L: Label, S: Symbol> Sync for KeyValue<L, S> {} 273 + 274 + unsafe impl<L: Label, S: Symbol> Send for Multipart<L, S> {} 275 + unsafe impl<L: Label, S: Symbol> Sync for Multipart<L, S> {}
+105
src/query.rs
··· 1 + use std::{hash::BuildHasher, collections::{BTreeMap, HashSet}, marker::PhantomData}; 2 + use string_interner::backend::Backend as InternerBackend; 3 + use string_interner::Symbol; 4 + use crate::{TagManager, label::Label, tag::Tag, parse::Parser}; 5 + 6 + 7 + struct QueryBuilder< 8 + 'm, 9 + L, 10 + S, 11 + T, 12 + P, 13 + B, 14 + H, 15 + > 16 + where 17 + L: Label, 18 + S: Symbol, 19 + T: Tag<Label = L, Symbol = S>, 20 + P: Parser<Tag = T> + Send + Sync, 21 + B: InternerBackend<Symbol = S>, 22 + H: BuildHasher, 23 + { 24 + manager: &'m TagManager<L, S, T, P, B, H>, 25 + indices: QueryIndices<S>, 26 + } 27 + 28 + struct QueryIndices<S> where S: Symbol { 29 + plain: PlainIndex<S>, 30 + key_value: KeyValueIndex<S>, 31 + multipart: MultipartIndex<S>, 32 + } 33 + 34 + struct PlainIndex<S>(Vec<S>) where S: Symbol; 35 + 36 + struct KeyValueIndex<S>(BTreeMap<S, Vec<S>>) where S: Symbol; 37 + 38 + struct MultipartIndex<S>(Vec<Trie<S>>) where S: Symbol; 39 + 40 + struct Trie<S>(PhantomData<S>) where S: Symbol; 41 + 42 + 43 + /* 44 + The basic design of the query system is: 45 + 46 + manager 47 + .select_from(&container_of_queryable_things) 48 + .where(Contains(And("this-tag", Or("that_tag", "someothertag")))) 49 + .run() 50 + 51 + 52 + This isn't the exact API, because it needs to have a way to resolve 53 + the query tags such that identity-based matching can happen. 54 + 55 + When it's doing the "select_from" construction, it needs to go through the 56 + queryable-things and construct indices of their tags. 57 + 58 + 59 + Individual queries probably need to be relative to a single tag manager, 60 + to be able to match up the parser and storage. 61 + 62 + But then those queries return iterators over tagged items, and the 63 + intersection of the returned items from multiple queries is the answer 64 + to all the queries. 65 + 66 + 67 + struct QueryEngine { 68 + indices: QueryIndices, 69 + } 70 + 71 + struct QueryIndices { 72 + plain_index: PlainIndex, 73 + key_value_index: KeyValueIndex, 74 + multipart_index: MultipartIndex, 75 + } 76 + 77 + /* 78 + Queries might include: 79 + - Find all items with this tag and that tag but not that tag 80 + - Find all items with tags starting with this path 81 + - Find all items with this key for key-value 82 + - Find all items with this key and a value matching some constraint 83 + - Specific value 84 + - Set of values 85 + - Values match regex 86 + - Values are parseable into a particular type 87 + - Values parseable into a type meet some constraint on that type 88 + 89 + */ 90 + 91 + // This will just be a sorted vector. 92 + struct PlainIndex { 93 + 94 + } 95 + 96 + // This one will be a hash map, with keys being the keys of all KV tags, and values being sorted vectors of values. 97 + struct KeyValueIndex { 98 + 99 + } 100 + 101 + // This one will be a forest, a set of trees with roots being all the first segments of multipart paths. 102 + struct MultipartIndex { 103 + 104 + } 105 + */
+223
src/storage.rs
··· 1 + //! Types defining how tag data is stored. 2 + 3 + use crate::error::StorageError; 4 + use crate::label::DefaultLabel; 5 + use crate::label::Label; 6 + use std::fmt::Debug; 7 + use std::hash::BuildHasher; 8 + use std::marker::PhantomData; 9 + use std::ops::Deref; 10 + use std::ops::DerefMut; 11 + use std::sync::Arc; 12 + use std::sync::Mutex; 13 + use std::sync::MutexGuard; 14 + pub use string_interner::backend::Backend as InternerBackend; 15 + pub use string_interner::DefaultBackend; 16 + pub use string_interner::DefaultHashBuilder; 17 + pub use string_interner::DefaultSymbol; 18 + pub use string_interner::StringInterner; 19 + use string_interner::Symbol; 20 + 21 + /// Default interner, using the default backend, symbols, and hashing. 22 + pub type DefaultInterner = StringInterner<DefaultBackend<DefaultSymbol>, DefaultHashBuilder>; 23 + 24 + /// Stores the actual tag data. 25 + /// 26 + /// A [`Storage`] is, essentially, a wrapper around a [`StringInterner`] that handles three 27 + /// things: 1) Ensuring the interner is always wrapped in an `Arc<Mutex<_>>`, 2) providing 28 + /// a convenient `lock` method and associated `StorageLockGuard` type to make the API for 29 + /// _using_ the interner more ergonomic, and 3) keying the storage on the "label" type associated 30 + /// with it, while enabling the underlying interners to be shared even if the labels are 31 + /// different. 32 + pub struct Storage<L = DefaultLabel, B = DefaultBackend<DefaultSymbol>, H = DefaultHashBuilder>( 33 + Arc<Mutex<StringInterner<B, H>>>, 34 + PhantomData<L>, 35 + ) 36 + where 37 + L: Label, 38 + B: InternerBackend, 39 + <B as InternerBackend>::Symbol: Symbol, 40 + H: BuildHasher; 41 + 42 + impl<L, B, H> Storage<L, B, H> 43 + where 44 + L: Label, 45 + B: InternerBackend, 46 + <B as InternerBackend>::Symbol: Symbol, 47 + H: BuildHasher + Default, 48 + { 49 + /// Make a [`Storage`] with a freshly-created [`StringInterner`]. 50 + pub fn fresh() -> Self { 51 + Storage::unique(StringInterner::<B, H>::new()) 52 + } 53 + 54 + /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified capacity. 55 + pub fn fresh_with_capacity(cap: usize) -> Self { 56 + Storage::unique(StringInterner::<B, H>::with_capacity(cap)) 57 + } 58 + } 59 + 60 + impl<L, B, H> Storage<L, B, H> 61 + where 62 + L: Label, 63 + B: InternerBackend, 64 + <B as InternerBackend>::Symbol: Symbol, 65 + H: BuildHasher, 66 + { 67 + /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified hash builder. 68 + pub fn fresh_with_hasher(hash_builder: H) -> Self { 69 + Storage::unique(StringInterner::<B, H>::with_hasher(hash_builder)) 70 + } 71 + 72 + /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified capacity and hash builder. 73 + pub fn fresh_with_capacity_and_hasher(cap: usize, hash_builder: H) -> Self { 74 + Storage::unique(StringInterner::<B, H>::with_capacity_and_hasher( 75 + cap, 76 + hash_builder, 77 + )) 78 + } 79 + 80 + /// Take ownership of a singular interner to produce a [`Storage`]. 81 + pub fn unique(interner: StringInterner<B, H>) -> Self { 82 + Storage(Arc::new(Mutex::new(interner)), PhantomData) 83 + } 84 + 85 + /// Produce a [`Storage`] which may share its underlying interner. 86 + pub fn shared(interner: &Arc<Mutex<StringInterner<B, H>>>) -> Self { 87 + Storage(Arc::clone(interner), PhantomData) 88 + } 89 + 90 + /// Make a [`Storage`] by copying and sharing the underlying interner from the provided [`Storage`]. 91 + pub fn shallow_clone<L2>(&self) -> Storage<L2, B, H> 92 + where 93 + L2: Label, 94 + { 95 + Storage::shared(self) 96 + } 97 + 98 + /// Lock the [`Storage`]'s underlying [`StringInterner`]. 99 + pub fn lock(&self) -> Result<StorageLock<'_, L, B, H>, StorageError> { 100 + self.0 101 + .lock() 102 + .map(|guard| StorageLock(guard, PhantomData)) 103 + .map_err(|_| StorageError::CouldNotLock) 104 + } 105 + } 106 + 107 + impl<L, B, H> Storage<L, B, H> 108 + where 109 + L: Label, 110 + B: InternerBackend + Clone, 111 + <B as InternerBackend>::Symbol: Symbol, 112 + H: BuildHasher + Clone, 113 + { 114 + /// Make a [`Storage`] by completely copying all data stored in the provided [`Storage`] into a fresh interner. 115 + pub fn deep_clone<L2>(&self) -> Result<Storage<L2, B, H>, StorageError> 116 + where 117 + L2: Label, 118 + { 119 + Ok(Storage::unique(self.lock()?.clone())) 120 + } 121 + } 122 + 123 + impl<L, B, H> Debug for Storage<L, B, H> 124 + where 125 + L: Label, 126 + B: InternerBackend + Debug, 127 + <B as InternerBackend>::Symbol: Symbol + Debug, 128 + H: BuildHasher, 129 + { 130 + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 131 + f.debug_struct("Storage").field("0", &self.0).finish() 132 + } 133 + } 134 + 135 + impl Default for Storage<DefaultLabel, DefaultBackend<DefaultSymbol>, DefaultHashBuilder> { 136 + fn default() -> Self { 137 + Storage::unique(DefaultInterner::new()) 138 + } 139 + } 140 + 141 + impl<L, B, H> Deref for Storage<L, B, H> 142 + where 143 + L: Label, 144 + B: InternerBackend, 145 + <B as InternerBackend>::Symbol: Symbol, 146 + H: BuildHasher, 147 + { 148 + type Target = Arc<Mutex<StringInterner<B, H>>>; 149 + 150 + fn deref(&self) -> &Self::Target { 151 + &self.0 152 + } 153 + } 154 + 155 + impl<L, B, H> DerefMut for Storage<L, B, H> 156 + where 157 + L: Label, 158 + B: InternerBackend, 159 + <B as InternerBackend>::Symbol: Symbol, 160 + H: BuildHasher, 161 + { 162 + fn deref_mut(&mut self) -> &mut Self::Target { 163 + &mut self.0 164 + } 165 + } 166 + 167 + impl<L, B, H> From<StringInterner<B, H>> for Storage<L, B, H> 168 + where 169 + L: Label, 170 + B: InternerBackend, 171 + <B as InternerBackend>::Symbol: Symbol, 172 + H: BuildHasher, 173 + { 174 + fn from(interner: StringInterner<B, H>) -> Self { 175 + Storage::unique(interner) 176 + } 177 + } 178 + 179 + impl<L, B, H> From<&Arc<Mutex<StringInterner<B, H>>>> for Storage<L, B, H> 180 + where 181 + L: Label, 182 + B: InternerBackend, 183 + <B as InternerBackend>::Symbol: Symbol, 184 + H: BuildHasher, 185 + { 186 + fn from(interner: &Arc<Mutex<StringInterner<B, H>>>) -> Self { 187 + Storage::shared(interner) 188 + } 189 + } 190 + 191 + /// A lock on the underlying [`StringInterner`] in a [`Storage`]. 192 + pub struct StorageLock<'lock, L, B, H>(MutexGuard<'lock, StringInterner<B, H>>, PhantomData<L>) 193 + where 194 + L: Label, 195 + B: InternerBackend, 196 + <B as InternerBackend>::Symbol: Symbol, 197 + H: BuildHasher; 198 + 199 + impl<'lock, L, B, H> Deref for StorageLock<'lock, L, B, H> 200 + where 201 + L: Label, 202 + B: InternerBackend, 203 + <B as InternerBackend>::Symbol: Symbol, 204 + H: BuildHasher, 205 + { 206 + type Target = MutexGuard<'lock, StringInterner<B, H>>; 207 + 208 + fn deref(&self) -> &Self::Target { 209 + &self.0 210 + } 211 + } 212 + 213 + impl<'lock, L, B, H> DerefMut for StorageLock<'lock, L, B, H> 214 + where 215 + L: Label, 216 + B: InternerBackend, 217 + <B as InternerBackend>::Symbol: Symbol, 218 + H: BuildHasher, 219 + { 220 + fn deref_mut(&mut self) -> &mut Self::Target { 221 + &mut self.0 222 + } 223 + }
+463
src/tag.rs
··· 1 + //! Different kinds of [`Tag`]s that can be parsed. 2 + 3 + use crate::error::ResolveError; 4 + use crate::label::DefaultLabel; 5 + use crate::label::Label; 6 + #[cfg(doc)] 7 + use crate::storage::Storage; 8 + use crate::storage::StorageLock; 9 + #[cfg(doc)] 10 + use crate::TagManager; 11 + #[cfg(feature = "either")] 12 + use either::Either; 13 + use itertools::intersperse_with; 14 + use std::fmt::Display; 15 + use std::fmt::Formatter; 16 + use std::fmt::Result as FmtResult; 17 + use std::hash::BuildHasher; 18 + use std::marker::PhantomData; 19 + use string_interner::backend::Backend as InternerBackend; 20 + use string_interner::DefaultSymbol; 21 + #[cfg(doc)] 22 + use string_interner::StringInterner; 23 + use string_interner::Symbol; 24 + 25 + /// A trait defining a [`Tag`] which contains interned data. 26 + /// 27 + /// The _only_ defining operation of a [`Tag`] is that it can be 28 + /// converted back into a [`String`] using the [`Storage`] that 29 + /// created it and the correct separator configured by the [`TagManager`] 30 + /// that built it. 31 + /// 32 + /// [`Tag`]s have an underlying [`Symbol`] used to define their storage. 33 + /// Internally, [`Tag`]s are just a set of [`Symbol`]s used to make 34 + /// storage and identity comparison cheap while enabling reconstruction 35 + /// of the original [`String`]. 36 + pub trait Tag { 37 + /// The label of the [`TagManager`] used to produce the [`Tag`]. 38 + type Label: Label; 39 + 40 + /// The [`Symbol`] used by the [`Storage`] as a handle to the stored string data. 41 + type Symbol: Symbol; 42 + 43 + /// Get the [`TagKind`] of the current tag. 44 + fn kind(&self) -> TagKind; 45 + 46 + /// Try to resolve a [`Tag`] back into a [`String`]. 47 + fn resolve<B, H>( 48 + &self, 49 + storage: &StorageLock<'_, Self::Label, B, H>, 50 + key_value_separator: KeyValueSep, 51 + path_separator: PathSep, 52 + ) -> Result<String, ResolveError> 53 + where 54 + B: InternerBackend<Symbol = Self::Symbol>, 55 + H: BuildHasher; 56 + } 57 + 58 + #[cfg(feature = "either")] 59 + // Auto-impl for `Either` wrapping two `Tag`s. 60 + impl<L, S, T1, T2> Tag for Either<T1, T2> 61 + where 62 + L: Label, 63 + S: Symbol, 64 + T1: Tag<Label = L, Symbol = S>, 65 + T2: Tag<Label = L, Symbol = S>, 66 + { 67 + type Label = L; 68 + type Symbol = S; 69 + 70 + fn resolve<B, H>( 71 + &self, 72 + storage: &StorageLock<'_, Self::Label, B, H>, 73 + key_value_separator: KeyValueSep, 74 + path_separator: PathSep, 75 + ) -> Result<String, ResolveError> 76 + where 77 + B: InternerBackend<Symbol = Self::Symbol>, 78 + H: BuildHasher, 79 + { 80 + match self { 81 + Either::Left(t) => t.resolve(storage, key_value_separator, path_separator), 82 + Either::Right(t) => t.resolve(storage, key_value_separator, path_separator), 83 + } 84 + } 85 + 86 + fn kind(&self) -> TagKind { 87 + match self { 88 + Either::Left(t) => t.kind(), 89 + Either::Right(t) => t.kind(), 90 + } 91 + } 92 + } 93 + 94 + //--------------------------------------------------------------------------- 95 + 96 + /// A [`Tag`] without internal structure. 97 + /// 98 + /// [`PlainTag`] interns the full contents of a tag together. 99 + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 100 + pub struct PlainTag<L = DefaultLabel, S = DefaultSymbol>(S, PhantomData<L>) 101 + where 102 + L: Label, 103 + S: Symbol; 104 + 105 + impl<L: Label, S: Symbol> PlainTag<L, S> { 106 + /// Construct a new [`PlainTag`]. 107 + pub(crate) fn new<B, H>(storage: &mut StorageLock<'_, L, B, H>, raw: &str) -> Self 108 + where 109 + B: InternerBackend<Symbol = S>, 110 + H: BuildHasher, 111 + { 112 + PlainTag(storage.get_or_intern(raw), PhantomData) 113 + } 114 + 115 + /// Resolve the whole tag into a [`String`]. 116 + pub fn resolve<B, H>(&self, storage: &StorageLock<'_, L, B, H>) -> Result<String, ResolveError> 117 + where 118 + B: InternerBackend<Symbol = S>, 119 + H: BuildHasher, 120 + { 121 + self.resolve_str(storage).map(ToString::to_string) 122 + } 123 + 124 + /// Resolve the whole tag into a string slice. 125 + /// 126 + /// Note that the returned string slice is a view into the underlying interner 127 + /// data, which means you're holding a borrow on the interner as long as the slice 128 + /// is held. If you want to let go of the borrow, copy the slice into a new owned 129 + /// string. 130 + pub fn resolve_str<'intern, B, H>( 131 + &self, 132 + storage: &'intern StorageLock<'_, L, B, H>, 133 + ) -> Result<&'intern str, ResolveError> 134 + where 135 + B: InternerBackend<Symbol = S>, 136 + H: BuildHasher, 137 + { 138 + storage.resolve(self.0).ok_or(ResolveError::TagNotFound) 139 + } 140 + } 141 + 142 + impl<L: Label, S: Symbol> Tag for PlainTag<L, S> { 143 + type Label = L; 144 + type Symbol = S; 145 + 146 + fn resolve<B, H>( 147 + &self, 148 + storage: &StorageLock<'_, Self::Label, B, H>, 149 + _key_value_separator: KeyValueSep, 150 + _path_separator: PathSep, 151 + ) -> Result<String, ResolveError> 152 + where 153 + B: InternerBackend<Symbol = Self::Symbol>, 154 + H: BuildHasher, 155 + { 156 + self.resolve(storage) 157 + } 158 + 159 + fn kind(&self) -> TagKind { 160 + TagKind::Plain 161 + } 162 + } 163 + 164 + //--------------------------------------------------------------------------- 165 + 166 + /// A [`Tag`] composed of a key and a value. 167 + /// 168 + /// [`KeyValueTag`] interns the key and value separately, on the expectation 169 + /// that keys especially will be frequently repeated across tags. 170 + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 171 + pub struct KeyValueTag<L = DefaultLabel, S = DefaultSymbol>(S, S, PhantomData<L>) 172 + where 173 + L: Label, 174 + S: Symbol; 175 + 176 + impl<L: Label, S: Symbol> KeyValueTag<L, S> { 177 + /// Construct a new [`KeyValueTag`]. 178 + pub(crate) fn new<B, H>(storage: &mut StorageLock<'_, L, B, H>, key: &str, value: &str) -> Self 179 + where 180 + B: InternerBackend<Symbol = S>, 181 + H: BuildHasher, 182 + { 183 + KeyValueTag( 184 + storage.get_or_intern(key), 185 + storage.get_or_intern(value), 186 + PhantomData, 187 + ) 188 + } 189 + 190 + /// Resolve the whole tag into a [`String`]. 191 + pub fn resolve<B, H>( 192 + &self, 193 + storage: &StorageLock<'_, L, B, H>, 194 + key_value_separator: KeyValueSep, 195 + _path_separator: PathSep, 196 + ) -> Result<String, ResolveError> 197 + where 198 + B: InternerBackend<Symbol = S>, 199 + H: BuildHasher, 200 + { 201 + self.resolve_key_value(storage) 202 + .map(|(key, value)| format!("{key}{key_value_separator}{value}")) 203 + } 204 + 205 + /// Resolve the key and value parts of the tag separately. 206 + /// 207 + /// Note that the returned string slices are views into the underlying interner 208 + /// data, which means you're holding a borrow on the interner as long as the slices 209 + /// are held. If you want to let go of the borrow, copy the slices into new owned 210 + /// strings. 211 + pub fn resolve_key_value<'intern, B, H>( 212 + &self, 213 + storage: &'intern StorageLock<'_, L, B, H>, 214 + ) -> Result<(&'intern str, &'intern str), ResolveError> 215 + where 216 + B: InternerBackend<Symbol = S>, 217 + H: BuildHasher, 218 + { 219 + let (key, value) = self.try_resolve_key_value(storage); 220 + Ok((key?, value?)) 221 + } 222 + 223 + /// Try to resolve the key and value parts of the tag separately. 224 + /// 225 + /// This lets you resolve partial tags, if for some reason part of the tag 226 + /// resolves and the other doesn't. 227 + /// 228 + /// Note that the returned string slices are views into the underlying interner 229 + /// data, which means you're holding a borrow on the interner as long as the slices 230 + /// are held. If you want to let go of the borrow, copy the slices into new owned 231 + /// strings. 232 + pub fn try_resolve_key_value<'intern, B, H>( 233 + &self, 234 + storage: &'intern StorageLock<'_, L, B, H>, 235 + ) -> ( 236 + Result<&'intern str, ResolveError>, 237 + Result<&'intern str, ResolveError>, 238 + ) 239 + where 240 + B: InternerBackend<Symbol = S>, 241 + H: BuildHasher, 242 + { 243 + ( 244 + storage.resolve(self.0).ok_or(ResolveError::KeyNotFound), 245 + storage.resolve(self.1).ok_or(ResolveError::ValueNotFound), 246 + ) 247 + } 248 + } 249 + 250 + impl<L: Label, S: Symbol> Tag for KeyValueTag<L, S> { 251 + type Label = L; 252 + type Symbol = S; 253 + 254 + fn resolve<B, H>( 255 + &self, 256 + storage: &StorageLock<'_, Self::Label, B, H>, 257 + key_value_separator: KeyValueSep, 258 + path_separator: PathSep, 259 + ) -> Result<String, ResolveError> 260 + where 261 + B: InternerBackend<Symbol = Self::Symbol>, 262 + H: BuildHasher, 263 + { 264 + self.resolve(storage, key_value_separator, path_separator) 265 + } 266 + 267 + fn kind(&self) -> TagKind { 268 + TagKind::KeyValue 269 + } 270 + } 271 + 272 + //--------------------------------------------------------------------------- 273 + 274 + /// A [`Tag`] composed of arbitrary parts. 275 + /// 276 + /// [`MultipartTag`] interns each part of the tag separately, on the 277 + /// expectation that individual parts will be frequently repeated. 278 + #[derive(Debug, Clone, PartialEq, Eq, Hash)] 279 + pub struct MultipartTag<L = DefaultLabel, S = DefaultSymbol>(Vec<S>, PhantomData<L>) 280 + where 281 + L: Label, 282 + S: Symbol; 283 + 284 + impl<L: Label, S: Symbol> MultipartTag<L, S> { 285 + /// Construct a new [`MultipartTag`]. 286 + pub(crate) fn new<'part, I, B, H>(storage: &mut StorageLock<'_, L, B, H>, parts: I) -> Self 287 + where 288 + I: Iterator<Item = &'part str>, 289 + B: InternerBackend<Symbol = S>, 290 + H: BuildHasher, 291 + { 292 + MultipartTag( 293 + parts.map(|part| storage.get_or_intern(part)).collect(), 294 + PhantomData, 295 + ) 296 + } 297 + 298 + /// Resolve the whole tag into a [`String`]. 299 + pub fn resolve<B, H>( 300 + &self, 301 + storage: &StorageLock<'_, L, B, H>, 302 + _key_value_separator: KeyValueSep, 303 + path_separator: PathSep, 304 + ) -> Result<String, ResolveError> 305 + where 306 + B: InternerBackend<Symbol = S>, 307 + H: BuildHasher, 308 + { 309 + intersperse_with(self.try_resolve_parts(storage), || Ok(path_separator.0)).try_fold( 310 + String::new(), 311 + |mut acc, res| { 312 + res.map(|next| { 313 + acc.push_str(next); 314 + acc 315 + }) 316 + }, 317 + ) 318 + } 319 + 320 + /// Resolve each part of the tag. 321 + /// 322 + /// Note that the returned string slices are views into the underlying interner 323 + /// data, which means you're holding a borrow on the interner as long as the slices 324 + /// are held. If you want to let go of the borrow, copy the slices into new owned 325 + /// strings. 326 + pub fn resolve_parts<'intern, B, H, C>( 327 + &self, 328 + storage: &'intern StorageLock<'_, L, B, H>, 329 + ) -> Result<C, ResolveError> 330 + where 331 + B: InternerBackend<Symbol = S>, 332 + H: BuildHasher, 333 + C: FromIterator<&'intern str>, 334 + { 335 + self.try_resolve_parts(storage).collect() 336 + } 337 + 338 + /// Try to resolve each part of the tag. 339 + /// 340 + /// This lets you partially resolve the tag, if for some reason individual 341 + /// parts don't resolve. 342 + /// 343 + /// Note that the returned string slices are views into the underlying interner 344 + /// data, which means you're holding a borrow on the interner as long as the slices 345 + /// are held. If you want to let go of the borrow, copy the slices into new owned 346 + /// strings. 347 + pub fn try_resolve_parts<'s, 'intern: 's, B, H>( 348 + &'s self, 349 + storage: &'intern StorageLock<'_, L, B, H>, 350 + ) -> impl Iterator<Item = Result<&'intern str, ResolveError>> + 's 351 + where 352 + B: InternerBackend<Symbol = S>, 353 + H: BuildHasher, 354 + { 355 + self.0 356 + .iter() 357 + .copied() 358 + .map(|part| storage.resolve(part).ok_or(ResolveError::PartNotFound)) 359 + } 360 + } 361 + 362 + impl<L: Label, S: Symbol> Tag for MultipartTag<L, S> { 363 + type Label = L; 364 + type Symbol = S; 365 + 366 + fn resolve<B, H>( 367 + &self, 368 + storage: &StorageLock<'_, Self::Label, B, H>, 369 + key_value_separator: KeyValueSep, 370 + path_separator: PathSep, 371 + ) -> Result<String, ResolveError> 372 + where 373 + B: InternerBackend<Symbol = Self::Symbol>, 374 + H: BuildHasher, 375 + { 376 + self.resolve(storage, key_value_separator, path_separator) 377 + } 378 + 379 + fn kind(&self) -> TagKind { 380 + TagKind::Multipart 381 + } 382 + } 383 + 384 + /// The separator between keys and values in a key-value tag. 385 + /// 386 + /// The default separator is `":"`. 387 + #[derive(Debug, Copy, Clone)] 388 + pub struct KeyValueSep(pub &'static str); 389 + 390 + impl Display for KeyValueSep { 391 + fn fmt(&self, f: &mut Formatter) -> FmtResult { 392 + write!(f, "{}", self.0) 393 + } 394 + } 395 + 396 + impl Default for KeyValueSep { 397 + fn default() -> Self { 398 + KeyValueSep(":") 399 + } 400 + } 401 + 402 + /// The separator between path segments in a multipart tag. 403 + /// 404 + /// The default separator is `"/"`. 405 + #[derive(Debug, Copy, Clone)] 406 + pub struct PathSep(pub &'static str); 407 + 408 + impl Display for PathSep { 409 + fn fmt(&self, f: &mut Formatter) -> FmtResult { 410 + write!(f, "{}", self.0) 411 + } 412 + } 413 + 414 + impl Default for PathSep { 415 + fn default() -> Self { 416 + PathSep("/") 417 + } 418 + } 419 + 420 + /// The kind of tag being worked with. 421 + #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 422 + pub enum TagKind { 423 + /// A [`PlainTag`]. 424 + Plain, 425 + 426 + /// A [`KeyValueTag`]. 427 + KeyValue, 428 + 429 + /// A [`MultipartTag`]. 430 + Multipart, 431 + 432 + /// A type of [`Tag`] not otherwise known. 433 + Other, 434 + } 435 + 436 + /// A trait to implement on types that _carry_ [`Tag`]s. 437 + /// 438 + /// This trait is generic over the tag type, to permit implementing 439 + /// it for multiple types of tags. 440 + pub trait Tagged<T: Tag> { 441 + /// The type of iterator used to provide the [`Tag`]s. 442 + /// 443 + /// The lifetime bounds indicate that the tagged type and the 444 + /// tags it produces need to outlive the references to those tags 445 + /// returned by the tag iterator. 446 + type TagIter<'item>: Iterator<Item = &'item T> 447 + where 448 + Self: 'item, 449 + T: 'item; 450 + 451 + /// Get if the tagged type has tags. 452 + /// 453 + /// This is included in the API because there's not a good way to get 454 + /// the number of elements out of an iterator without using `count()`, 455 + /// which consumes the iterator. 456 + /// 457 + /// `size_hint` unfortunately is `None` for the upper bound by default, 458 + /// so it is frequently not useful. 459 + fn has_tags(&self) -> bool; 460 + 461 + /// Get the tags of the tagged type. 462 + fn get_tags(&self) -> Self::TagIter<'_>; 463 + }
+185
src/test.rs
··· 1 + //! Tests for the crate's APIs. 2 + 3 + use crate::label::Label; 4 + use crate::parse::*; 5 + use crate::storage::Storage; 6 + use crate::tag::KeyValueSep; 7 + use crate::tag::PathSep; 8 + use crate::tag::Tag; 9 + use crate::tag::TagKind; 10 + use crate::TagManager; 11 + use anyhow::anyhow as err; 12 + use anyhow::Result; 13 + use string_interner::Symbol; 14 + 15 + // Helper function to test that a tag that's parsed and then resolved 16 + // back into a string results in the same string that was originally 17 + // put into the manager. 18 + fn test_roundtrip<L, S, T, P>(manager: &TagManager<L, S, T, P>, input: &str) -> Result<()> 19 + where 20 + L: Label, 21 + S: Symbol, 22 + T: Tag<Label = L, Symbol = S>, 23 + P: Parser<Tag = T> + Send + Sync, 24 + { 25 + let tag = manager.parse_tag(input)?; 26 + let output = manager.resolve_tag(&tag)?; 27 + assert_eq!(input, output); 28 + Ok(()) 29 + } 30 + 31 + #[test] 32 + fn roundtrip_plain_tag() -> Result<()> { 33 + let manager = TagManager::builder() 34 + .parser(Plain::new()) 35 + .storage(Storage::default()) 36 + .build(); 37 + 38 + test_roundtrip(&manager, "hello") 39 + } 40 + 41 + #[test] 42 + fn transform_tag() -> Result<()> { 43 + let parser = Trim( 44 + TrimBounds::Both, 45 + ChangeCase(Case::Snake, KeyValue::new(KvPolicy::NoAmbiguousSep)), 46 + ); 47 + 48 + let manager = TagManager::builder() 49 + .parser(parser) 50 + .storage(Storage::default()) 51 + .key_value_separator(KeyValueSep("/")) 52 + .build(); 53 + 54 + let tag = manager.parse_tag(" \t HELLO_WORLD/GOODBYE_WORLD ")?; 55 + let interner = manager.storage().lock()?; 56 + let (key, value) = tag.resolve_key_value(&interner)?; 57 + 58 + assert_eq!(key, "hello_world"); 59 + assert_eq!(value, "goodbye_world"); 60 + 61 + Ok(()) 62 + } 63 + 64 + #[test] 65 + fn roundtrip_key_value_tag_unambiguous() -> Result<()> { 66 + let manager = TagManager::builder() 67 + .parser(KeyValue::new(KvPolicy::NoAmbiguousSep)) 68 + .storage(Storage::default()) 69 + .build(); 70 + 71 + test_roundtrip(&manager, "hello:world") 72 + } 73 + 74 + #[test] 75 + fn key_part_key_value_tag_unambiguous() -> Result<()> { 76 + let manager = TagManager::builder() 77 + .parser(KeyValue::new(KvPolicy::NoAmbiguousSep)) 78 + .storage(Storage::default()) 79 + .build(); 80 + 81 + let input = "hello:world"; 82 + let tag = manager.parse_tag(input)?; 83 + let lock = manager.storage().lock()?; 84 + let (key, value) = tag.resolve_key_value(&lock)?; 85 + assert_eq!(key, "hello"); 86 + assert_eq!(value, "world"); 87 + 88 + Ok(()) 89 + } 90 + 91 + #[test] 92 + fn roundtrip_key_value_tag_split_first() -> Result<()> { 93 + let manager = TagManager::builder() 94 + .parser(KeyValue::new(KvPolicy::SplitOnFirstSep)) 95 + .storage(Storage::default()) 96 + .build(); 97 + 98 + test_roundtrip(&manager, "hello:world") 99 + } 100 + 101 + #[test] 102 + fn key_part_key_value_tag_split_first() -> Result<()> { 103 + let manager = TagManager::builder() 104 + .parser(KeyValue::new(KvPolicy::SplitOnFirstSep)) 105 + .storage(Storage::default()) 106 + .build(); 107 + 108 + let input = "hello:world:today"; 109 + let tag = manager.parse_tag(input)?; 110 + let lock = manager.storage().lock()?; 111 + let (key, value) = tag.resolve_key_value(&lock)?; 112 + assert_eq!(key, "hello"); 113 + assert_eq!(value, "world:today"); 114 + Ok(()) 115 + } 116 + 117 + #[test] 118 + fn roundtrip_key_value_tag_split_last() -> Result<()> { 119 + let manager = TagManager::builder() 120 + .parser(KeyValue::new(KvPolicy::SplitOnLastSep)) 121 + .storage(Storage::default()) 122 + .build(); 123 + 124 + test_roundtrip(&manager, "hello:world") 125 + } 126 + 127 + #[test] 128 + fn key_part_key_value_tag_split_last() -> Result<()> { 129 + let manager = TagManager::builder() 130 + .parser(KeyValue::new(KvPolicy::SplitOnLastSep)) 131 + .storage(Storage::default()) 132 + .build(); 133 + 134 + let input = "hello:world:today"; 135 + let tag = manager.parse_tag(input)?; 136 + let lock = manager.storage().lock()?; 137 + let (key, value) = tag.resolve_key_value(&lock)?; 138 + assert_eq!(key, "hello:world"); 139 + assert_eq!(value, "today"); 140 + Ok(()) 141 + } 142 + 143 + #[test] 144 + fn roundtrip_multipart_tag() -> Result<()> { 145 + let manager = TagManager::builder() 146 + .parser(Multipart::new(MultipartPolicy::RequireMultipart)) 147 + .storage(Storage::default()) 148 + .path_separator(PathSep(":")) 149 + .build(); 150 + 151 + test_roundtrip(&manager, "hello:world:today:its:me") 152 + } 153 + 154 + #[test] 155 + fn complex_parser() -> Result<()> { 156 + let manager = TagManager::builder() 157 + .parser(Trim( 158 + TrimBounds::Both, 159 + ChangeCase( 160 + Case::Snake, 161 + Or( 162 + Multipart::new(MultipartPolicy::RequireMultipart), 163 + Or(KeyValue::new(KvPolicy::NoAmbiguousSep), Plain::new()), 164 + ), 165 + ), 166 + )) 167 + .storage(Storage::default()) 168 + .build(); 169 + 170 + let tags: Vec<_> = 171 + manager.parse_tags_into_with_kind(["lotr/legolas/friends", "score:5", "rustlang"]); 172 + 173 + let mut iter = tags.into_iter(); 174 + let (t1, t2, t3) = ( 175 + iter.next().ok_or(err!("nothing"))??, 176 + iter.next().ok_or(err!("nothing"))??, 177 + iter.next().ok_or(err!("nothing"))??, 178 + ); 179 + 180 + assert_eq!(t1.1, TagKind::Multipart); 181 + assert_eq!(t2.1, TagKind::KeyValue); 182 + assert_eq!(t3.1, TagKind::Plain); 183 + 184 + Ok(()) 185 + }
+191
tests/blog.rs
··· 1 + 2 + pub mod blog { 3 + use anyhow::Result; 4 + use std::iter::once as iter_once; 5 + use std::iter::Once as OnceIter; 6 + use std::ops::Not as _; 7 + use std::result::Result as StdResult; 8 + use std::slice::Iter as SliceIter; 9 + use string_interner::DefaultSymbol; 10 + use tagbuddy::generate_label; 11 + use tagbuddy::parse::*; 12 + use tagbuddy::storage::Storage; 13 + use tagbuddy::tag::KeyValueTag; 14 + use tagbuddy::tag::PlainTag; 15 + use tagbuddy::tag::Tagged; 16 + use tagbuddy::TagManager; 17 + 18 + generate_label! { 19 + pub Tags {} 20 + pub Ratings {} 21 + } 22 + 23 + type PostTagsManager = TagManager<Tags, DefaultSymbol, PlainTag<Tags>, Plain<Tags>>; 24 + type PostRatingsManager = 25 + TagManager<Ratings, DefaultSymbol, KeyValueTag<Ratings>, KeyValue<Ratings>>; 26 + 27 + pub struct Blog { 28 + posts: Vec<BlogPost>, 29 + tag_manager: PostTagsManager, 30 + rating_manager: PostRatingsManager, 31 + } 32 + 33 + impl Blog { 34 + /// Initialize a new blog. 35 + pub fn new() -> Self { 36 + let tag_manager = TagManager::builder() 37 + .parser(Plain::new()) 38 + .storage(Storage::<Tags>::fresh()) 39 + .build(); 40 + 41 + let rating_manager = TagManager::builder() 42 + .parser(KeyValue::new(KvPolicy::NoAmbiguousSep)) 43 + .storage(tag_manager.storage().shallow_clone::<Ratings>()) 44 + .build(); 45 + 46 + Self { 47 + posts: Vec::new(), 48 + tag_manager, 49 + rating_manager, 50 + } 51 + } 52 + 53 + /// Add a new post to the blog. 54 + pub fn add_post( 55 + &mut self, 56 + title: &str, 57 + content: &str, 58 + tags: &[&str], 59 + rating: &str, 60 + ) -> Result<&mut Self> { 61 + let title = title.to_owned(); 62 + let content = content.to_owned(); 63 + 64 + let tags = self 65 + .tag_manager 66 + .parse_tags_into::<StdResult<_, _>>(tags.into_iter().map(|t| *t))?; 67 + 68 + let rating = self.rating_manager.parse_tag(rating)?; 69 + 70 + self.posts.push(BlogPost { 71 + title, 72 + content, 73 + tags, 74 + rating, 75 + }); 76 + 77 + Ok(self) 78 + } 79 + 80 + /// Get the posts in the blog. 81 + pub fn posts(&self) -> impl Iterator<Item = &BlogPost> { 82 + self.posts.iter() 83 + } 84 + } 85 + 86 + /// A single post on the blog. 87 + pub struct BlogPost { 88 + /// The title of the post. 89 + #[allow(unused)] 90 + title: String, 91 + 92 + /// The content of the post. 93 + #[allow(unused)] 94 + content: String, 95 + 96 + /// The tags associated with the post. 97 + tags: Vec<PlainTag<Tags>>, 98 + 99 + /// The rating assigned to the post. 100 + rating: KeyValueTag<Ratings>, 101 + } 102 + 103 + impl BlogPost { 104 + /// Get the tags applied to a blog post. 105 + pub fn tags(&self, blog: &Blog) -> Vec<String> { 106 + // SAFETY: We know we're using the correct storage, so the tag data should always be valid. 107 + blog.tag_manager 108 + .resolve_tags_into::<StdResult<_, _>>(Tagged::<PlainTag<Tags>>::get_tags(self)) 109 + .expect("tags should always resolve successfully") 110 + } 111 + 112 + /// Get the rating of a blog post. 113 + pub fn rating(&self, blog: &Blog) -> String { 114 + // SAFETY: We know we're using the correct storage, so the rating data should always be valid. 115 + blog.rating_manager 116 + .resolve_tags_into::<StdResult<_, _>>(Tagged::<KeyValueTag<Ratings>>::get_tags( 117 + self, 118 + )) 119 + .expect("ratings should always resolve successfully") 120 + } 121 + } 122 + 123 + // Mark a blog post as being tagged with tags. 124 + impl Tagged<PlainTag<Tags>> for BlogPost { 125 + type TagIter<'iter> = SliceIter<'iter, PlainTag<Tags>>; 126 + 127 + fn has_tags(&self) -> bool { 128 + self.tags.is_empty().not() 129 + } 130 + 131 + fn get_tags(&self) -> Self::TagIter<'_> { 132 + self.tags.iter() 133 + } 134 + } 135 + 136 + // Mark a blog post as being tagged with a rating. 137 + impl Tagged<KeyValueTag<Ratings>> for BlogPost { 138 + type TagIter<'iter> = OnceIter<&'iter KeyValueTag<Ratings>>; 139 + 140 + fn has_tags(&self) -> bool { 141 + true 142 + } 143 + 144 + fn get_tags(&self) -> Self::TagIter<'_> { 145 + iter_once(&self.rating) 146 + } 147 + } 148 + } 149 + 150 + use crate::blog::Blog; 151 + use anyhow::Result; 152 + 153 + #[test] 154 + fn blog_can_handle_tags_and_rating() -> Result<()> { 155 + let mut blog = Blog::new(); 156 + 157 + blog.add_post("one", "1", &["hello", "my", "friend"], "score:1")? 158 + .add_post("two", "2", &["goodbye", "your", "enemy"], "score:2")? 159 + .add_post( 160 + "three", 161 + "3", 162 + &["see you soon", "our", "acquaintance"], 163 + "score:3", 164 + )?; 165 + 166 + assert_eq!( 167 + blog.posts() 168 + .flat_map(|post| post.tags(&blog)) 169 + .collect::<Vec<_>>(), 170 + vec![ 171 + "hello", 172 + "my", 173 + "friend", 174 + "goodbye", 175 + "your", 176 + "enemy", 177 + "see you soon", 178 + "our", 179 + "acquaintance", 180 + ] 181 + ); 182 + 183 + assert_eq!( 184 + blog.posts() 185 + .map(|post| post.rating(&blog)) 186 + .collect::<Vec<_>>(), 187 + vec!["score:1", "score:2", "score:3"] 188 + ); 189 + 190 + Ok(()) 191 + }