···11+//! Errors for producing and consuming tags.
22+33+#[cfg(doc)]
44+use crate::parse::Or;
55+#[cfg(doc)]
66+use crate::storage::Storage;
77+#[cfg(doc)]
88+use crate::tag::MultipartTag;
99+#[cfg(doc)]
1010+use crate::tag::Tag;
1111+#[cfg(doc)]
1212+use crate::TagManager;
1313+use std::error::Error as StdError;
1414+use std::fmt::Display;
1515+use std::fmt::Formatter;
1616+use std::fmt::Result as FmtResult;
1717+#[cfg(doc)]
1818+use std::sync::Mutex;
1919+2020+/// Error arising during parsing of new [`Tag`]s.
2121+#[derive(Debug)]
2222+#[non_exhaustive]
2323+pub enum ParseError {
2424+ /// Can't create an empty tag.
2525+ EmptyTag,
2626+2727+ /// Key-value tag is missing a key.
2828+ MissingKey,
2929+3030+ /// Key-value tag is missing a value.
3131+ MissingValue,
3232+3333+ /// Key-value tag is ambiguous; key-value tags must have one separator.
3434+ AmbiguousKeyValueTag,
3535+3636+ /// Tag didn't match a regular expression.
3737+ TagDidntMatchRegex,
3838+3939+ /// Tag is more characters long than allowed.
4040+ TagTooManyChars,
4141+4242+ /// Tag is more bytes long than allowed.
4343+ TagTooManyBytes,
4444+4545+ /// Could not lock the parser prior to parsing.
4646+ CouldNotLock,
4747+4848+ /// Tried to parse a single-part [`MultipartTag`].
4949+ SinglePartMultipart,
5050+5151+ /// Failed an [`Or`] match.
5252+ FailedOr(Box<ParseError>, Box<ParseError>),
5353+5454+ /// An underlying storage error arose.
5555+ StorageError(StorageError),
5656+}
5757+5858+impl From<StorageError> for ParseError {
5959+ fn from(e: StorageError) -> Self {
6060+ ParseError::StorageError(e)
6161+ }
6262+}
6363+6464+impl Display for ParseError {
6565+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
6666+ match self {
6767+ ParseError::EmptyTag => write!(f, "can't create an empty tag"),
6868+ ParseError::MissingKey => write!(f, "missing key in a key-value tag"),
6969+ ParseError::MissingValue => write!(f, "missing value in a key-value tag"),
7070+ ParseError::AmbiguousKeyValueTag => {
7171+ write!(f, "ambiguous key-tag value; should have just one separator")
7272+ }
7373+ ParseError::TagDidntMatchRegex => write!(f, "tag didn't match the regular expression"),
7474+ ParseError::TagTooManyChars => write!(f, "tag is too many characters long"),
7575+ ParseError::TagTooManyBytes => write!(f, "tag is too many bytes long"),
7676+ ParseError::CouldNotLock => write!(f, "could not lock parser"),
7777+ ParseError::SinglePartMultipart => {
7878+ write!(f, "can't accept a single-part multipart tag")
7979+ }
8080+ ParseError::FailedOr(e1, e2) => {
8181+ write!(f, "failed two parsers with errors '{e1}' and '{e2}'")
8282+ }
8383+ ParseError::StorageError(e) => write!(f, "{e}"),
8484+ }
8585+ }
8686+}
8787+8888+impl StdError for ParseError {
8989+ fn source(&self) -> Option<&(dyn StdError + 'static)> {
9090+ match self {
9191+ ParseError::StorageError(e) => Some(e),
9292+ _ => None,
9393+ }
9494+ }
9595+}
9696+9797+/// Errors arising when resolving [`Tag`]s.
9898+#[derive(Debug)]
9999+#[non_exhaustive]
100100+pub enum ResolveError {
101101+ /// Tag wasn't found in the [`TagManager`].
102102+ TagNotFound,
103103+104104+ /// Key wasn't found in the [`TagManager`].
105105+ KeyNotFound,
106106+107107+ /// Value wasn't found in the [`TagManager`].
108108+ ValueNotFound,
109109+110110+ /// Part wasn't found in the [`TagManager`].
111111+ PartNotFound,
112112+113113+ /// An underlying [`Storage`] error occurred.
114114+ StorageError(StorageError),
115115+}
116116+117117+impl From<StorageError> for ResolveError {
118118+ fn from(e: StorageError) -> Self {
119119+ ResolveError::StorageError(e)
120120+ }
121121+}
122122+123123+impl Display for ResolveError {
124124+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
125125+ match self {
126126+ ResolveError::TagNotFound => write!(f, "tag wasn't found in tag manager"),
127127+ ResolveError::KeyNotFound => write!(f, "key wasn't found in tag manager"),
128128+ ResolveError::ValueNotFound => write!(f, "value wasn't found in tag manager"),
129129+ ResolveError::PartNotFound => write!(f, "part wasn't found in tag manager"),
130130+ ResolveError::StorageError(e) => write!(f, "{e}"),
131131+ }
132132+ }
133133+}
134134+135135+impl StdError for ResolveError {
136136+ fn source(&self) -> Option<&(dyn StdError + 'static)> {
137137+ match self {
138138+ ResolveError::StorageError(e) => Some(e),
139139+ _ => None,
140140+ }
141141+ }
142142+}
143143+144144+/// Errors arising when interacting with [`Storage`]s.
145145+#[derive(Debug)]
146146+#[non_exhaustive]
147147+pub enum StorageError {
148148+ /// Failed to lock the storage, likely because the [`Mutex`] is poisoned.
149149+ CouldNotLock,
150150+}
151151+152152+impl Display for StorageError {
153153+ fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
154154+ match self {
155155+ StorageError::CouldNotLock => write!(f, "could not lock storage"),
156156+ }
157157+ }
158158+}
159159+160160+impl StdError for StorageError {}
+59
src/label.rs
···11+//! Enables compile-time separation of tags from different managers.
22+33+#[cfg(doc)]
44+use crate::generate_label;
55+#[cfg(doc)]
66+use crate::tag::Tag;
77+#[cfg(doc)]
88+use crate::TagManager;
99+1010+/// A type for compile-time separation of [`Tag`]s from different [`TagManager`]s.
1111+///
1212+/// This enables the compile-time guarantee that a [`Tag`] generated by one
1313+/// [`TagManager`] is never resolved through another [`TagManager`] (which might
1414+/// succeed but produce incorrect or nonsensical results at runtime if permitted).
1515+///
1616+/// # Safety
1717+///
1818+/// Note that the usage of [`Label`] types in this crate never actually instantiates
1919+/// any value of that type, and in general we recommend using a zero-sized type
2020+/// (this is what's done by the [`generate_label`] macro).
2121+///
2222+/// There aren't actual safety concerns around its use, and no choice to implement
2323+/// or not implement this marker trait can actually break safety guarantees of the
2424+/// crate, but it's marked `unsafe` to hint toward the special guarantees around
2525+/// the marker trait and encourage its construction through [`generate_label`].
2626+pub unsafe trait Label: Copy {}
2727+2828+/// Generate a new type implementing [`Label`].
2929+///
3030+/// # Example
3131+///
3232+/// ```
3333+/// # use tagbuddy::generate_label;
3434+///
3535+/// generate_label! {
3636+/// /// Tags used for blog posts in this blog implementation.
3737+/// pub BlogPostTags {}
3838+/// }
3939+/// ```
4040+#[macro_export]
4141+macro_rules! generate_label {
4242+ ( $( $( #[$($attrss:meta)*] )* $visibility:vis $struct_name:ident {} )* ) => {
4343+ $(
4444+ $crate::generate_label! { @single $( #[$($attrss)*] )* $visibility $struct_name {} }
4545+ )*
4646+ };
4747+4848+ ( @single $( #[$($attrss:meta)*] )* $visibility:vis $struct_name:ident {} ) => {
4949+ $( #[$($attrss)*] )*
5050+ #[derive(Debug, Copy, Clone)]
5151+ $visibility struct $struct_name;
5252+ unsafe impl $crate::label::Label for $struct_name {}
5353+ };
5454+}
5555+5656+generate_label! {
5757+ /// The default label applied when no other label is provided.
5858+ pub DefaultLabel {}
5959+}
+30
src/lib.rs
···11+//! A Rust crate for creating and managing tags and their relationships.
22+//!
33+//! "Tags" are string data which may or may not contain some structure, like
44+//! key-value pairs or multipart segments, and which are attached as metadata
55+//! to annotate data for organization.
66+//!
77+//! This crate defines a set of mechanisms for generically parsing, storing,
88+//! comparing, and querying sets of tags according to configured policies.
99+1010+pub mod error;
1111+pub mod label;
1212+mod manager;
1313+pub mod parse;
1414+pub mod query;
1515+pub mod storage;
1616+pub mod tag;
1717+#[cfg(test)]
1818+mod test;
1919+2020+pub use crate::manager::TagManager;
2121+2222+pub mod builder {
2323+ //! Contains a builder type for the [`TagManager`].
2424+2525+ #[cfg(doc)]
2626+ use crate::TagManager;
2727+2828+ #[doc(inline)]
2929+ pub use crate::manager::TagManagerBuilder;
3030+}
+258
src/manager.rs
···11+//! Produce and resolve tags.
22+33+use crate::error::ResolveError;
44+use crate::label::DefaultLabel;
55+use crate::label::Label;
66+use crate::parse::*;
77+use crate::storage::Storage;
88+use crate::tag::KeyValueSep;
99+#[cfg(doc)]
1010+use crate::tag::KeyValueTag;
1111+#[cfg(doc)]
1212+use crate::tag::MultipartTag;
1313+use crate::tag::PathSep;
1414+use crate::tag::PlainTag;
1515+use crate::tag::Tag;
1616+use crate::{error::ParseError, tag::TagKind};
1717+#[cfg(doc)]
1818+use std::sync::Mutex;
1919+use std::{convert::identity, hash::BuildHasher};
2020+use string_interner::backend::Backend as InternerBackend;
2121+use string_interner::DefaultBackend;
2222+use string_interner::DefaultHashBuilder;
2323+use string_interner::DefaultSymbol;
2424+#[cfg(doc)]
2525+use string_interner::StringInterner;
2626+use string_interner::Symbol;
2727+use typed_builder::TypedBuilder;
2828+2929+/// Constructs [`Tag`]s according to the configured parser and interner.
3030+///
3131+/// A single [`TagManager`] is responsible for parsing and resolving tags that
3232+/// match the rules of a single configured parser, with storage handled by
3333+/// an underlying [`StringInterner`]. The [`StringInterner`] may be shared
3434+/// with other [`TagManager`]s.
3535+///
3636+/// [`TagManager`] is designed to be generic over:
3737+///
3838+/// - The parser used to produce tags.
3939+/// - The interner used to store tag data.
4040+///
4141+/// The trait bounds on [`TagManager`] ensure that the parser and interner
4242+/// agree on the [`Symbol`] used as handles for the stored string data.
4343+/// This is required because the parser produces [`Tag`]s which store
4444+/// [`Symbol`]s so they can later be resolved into [`String`]s to recover
4545+/// the full originally-input tag data.
4646+///
4747+/// The manner in which tag data is stored depends on the `T` parameter.
4848+/// [`PlainTag`] stores the full string data in the interner. [`KeyValueTag`]
4949+/// stores the key and value data separately, on the expectation that keys
5050+/// especially will be repeated, and thus a lot of space saving is achieved by
5151+/// deduplicating them through separate interning. [`MultipartTag`] stores
5252+/// each part separately, again on the expectation that individual parts will
5353+/// be frequently repeated across tags, resulting in space savings from interning.
5454+#[derive(TypedBuilder)]
5555+pub struct TagManager<
5656+ L = DefaultLabel,
5757+ S = DefaultSymbol,
5858+ T = PlainTag<L, S>,
5959+ P = Plain<L, S>,
6060+ B = DefaultBackend<S>,
6161+ H = DefaultHashBuilder,
6262+> where
6363+ L: Label,
6464+ S: Symbol,
6565+ T: Tag<Label = L, Symbol = S>,
6666+ P: Parser<Tag = T> + Send + Sync,
6767+ B: InternerBackend<Symbol = S>,
6868+ H: BuildHasher,
6969+{
7070+ /// Defines how key-value tags are parsed, if key-value tags are permitted.
7171+ pub(crate) parser: P,
7272+7373+ /// The separator used for separating key and values in key-value tags.
7474+ #[builder(default)]
7575+ pub(crate) key_value_separator: KeyValueSep,
7676+7777+ /// The separator used for separating parts in multipart tags.
7878+ #[builder(default)]
7979+ pub(crate) path_separator: PathSep,
8080+8181+ /// Interns and stores string data for tags, to reduce memory usage.
8282+ pub(crate) storage: Storage<L, B, H>,
8383+}
8484+8585+// These `Send` and `Sync` impls are safe _because_:
8686+//
8787+// 1. `key_value_separator` and `path_separator` are just read-only string slices, so they are
8888+// trivially `Send` and `Sync`.
8989+// 2. `parser` is constrained to be `Send` and `Sync`, either trivially-so, or by being wrapped
9090+// in an `Arc<Mutex<_>>` (in which case it takes advantage of an auto-impl for `Parser`
9191+// that tries to lock the parser before parsing can proceed).
9292+// 3. `storage` is _always_ wrapped in an `Arc<Mutex<_>>`, so it is always `Send` and `Sync`.
9393+//
9494+// Given the above, `TagManager` is _always_ safe to send and sync, and can implement these traits.
9595+9696+unsafe impl<L, S, T, P, B, H> Send for TagManager<L, S, T, P, B, H>
9797+where
9898+ L: Label,
9999+ S: Symbol,
100100+ T: Tag<Label = L, Symbol = S>,
101101+ P: Parser<Tag = T> + Send + Sync,
102102+ B: InternerBackend<Symbol = S>,
103103+ H: BuildHasher,
104104+{
105105+}
106106+107107+unsafe impl<L, S, T, P, B, H> Sync for TagManager<L, S, T, P, B, H>
108108+where
109109+ L: Label,
110110+ S: Symbol,
111111+ T: Tag<Label = L, Symbol = S>,
112112+ P: Parser<Tag = T> + Send + Sync,
113113+ B: InternerBackend<Symbol = S>,
114114+ H: BuildHasher,
115115+{
116116+}
117117+118118+impl<
119119+ L: Label,
120120+ S: Symbol,
121121+ T: Tag<Label = L, Symbol = S>,
122122+ P: Parser<Tag = T> + Send + Sync,
123123+ B: InternerBackend<Symbol = S>,
124124+ H: BuildHasher,
125125+ > TagManager<L, S, T, P, B, H>
126126+{
127127+ /// Attempt to parse a structured tag from the provided "raw" tag.
128128+ ///
129129+ /// This may fail if the tag is empty, or if it violates the configured [`Parser`]'s rules.
130130+ pub fn parse_tag(&self, raw: &str) -> Result<P::Tag, ParseError> {
131131+ self.parser.parse(
132132+ &mut self.storage.lock()?,
133133+ self.key_value_separator,
134134+ self.path_separator,
135135+ raw,
136136+ )
137137+ }
138138+139139+ /// Parse tags into a collection of your choosing.
140140+ ///
141141+ /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
142142+ /// storage before starting to parse _any_ tags, and holds it for the duration.
143143+ pub fn parse_tags_into<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C
144144+ where
145145+ C: FromIterator<Result<P::Tag, ParseError>>,
146146+ {
147147+ self.parse_tags_into_with(src, identity)
148148+ }
149149+150150+ /// Parse tags into a collection of your choosing.
151151+ ///
152152+ /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
153153+ /// storage before starting to parse _any_ tags, and holds it for the duration.
154154+ pub fn parse_tags_into_with_kind<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C
155155+ where
156156+ C: FromIterator<Result<(P::Tag, TagKind), ParseError>>,
157157+ {
158158+ self.parse_tags_into_with(src, |t| {
159159+ let kind = t.kind();
160160+ (t, kind)
161161+ })
162162+ }
163163+164164+ /// Parse tags into a collection of your choosing.
165165+ ///
166166+ /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
167167+ /// storage before starting to parse _any_ tags, and holds it for the duration.
168168+ pub fn parse_tags_into_with<'raw, O, C>(
169169+ &self,
170170+ src: impl IntoIterator<Item = &'raw str>,
171171+ f: impl FnOnce(P::Tag) -> O + Copy,
172172+ ) -> C
173173+ where
174174+ C: FromIterator<Result<O, ParseError>>,
175175+ {
176176+ src.into_iter()
177177+ .map(move |raw| {
178178+ self.parser
179179+ .parse(
180180+ &mut self.storage.lock()?,
181181+ self.key_value_separator,
182182+ self.path_separator,
183183+ raw,
184184+ )
185185+ .map(f)
186186+ })
187187+ .collect()
188188+ }
189189+190190+ /// Get a string representation of a [`Tag`].
191191+ ///
192192+ /// Note that this may fail to resolve a tag if the tag wasn't interned
193193+ /// in the current [`TagManager`]. It may alternatively resolve an incorrect tag.
194194+ pub fn resolve_tag(&self, tag: &P::Tag) -> Result<String, ResolveError> {
195195+ tag.resolve(
196196+ &self.storage.lock()?,
197197+ self.key_value_separator,
198198+ self.path_separator,
199199+ )
200200+ }
201201+202202+ /// Get the string representation of a set of [`Tag`]s.
203203+ ///
204204+ /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock
205205+ /// before beginning iteration, and holds it for the duration.
206206+ pub fn resolve_tags_into<'tag, C>(&self, src: impl IntoIterator<Item = &'tag P::Tag>) -> C
207207+ where
208208+ P::Tag: 'tag,
209209+ C: FromIterator<Result<String, ResolveError>>,
210210+ {
211211+ self.resolve_tags_into_with(src, identity)
212212+ }
213213+214214+ /// Get the string representation of a set of [`Tag`]s.
215215+ ///
216216+ /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock
217217+ /// before beginning iteration, and holds it for the duration.
218218+ pub fn resolve_tags_into_with<'tag, O, C>(
219219+ &self,
220220+ src: impl IntoIterator<Item = &'tag P::Tag>,
221221+ f: impl FnOnce(String) -> O + Copy,
222222+ ) -> C
223223+ where
224224+ P::Tag: 'tag,
225225+ C: FromIterator<Result<O, ResolveError>>,
226226+ {
227227+ src.into_iter()
228228+ .map(move |tag| {
229229+ tag.resolve(
230230+ &self.storage.lock()?,
231231+ self.key_value_separator,
232232+ self.path_separator,
233233+ )
234234+ .map(f)
235235+ })
236236+ .collect()
237237+ }
238238+239239+ /// Get the inner [`Storage`] of the [`TagManager`].
240240+ pub fn storage(&self) -> &Storage<L, B, H> {
241241+ &self.storage
242242+ }
243243+244244+ /// Get the [`Parser`] applied by the [`TagManager`].
245245+ pub fn parser(&self) -> &P {
246246+ &self.parser
247247+ }
248248+249249+ /// Get the key-value separator (default `":"`) used by the [`TagManager`] for [`KeyValueTag`]s.
250250+ pub fn key_value_separator(&self) -> KeyValueSep {
251251+ self.key_value_separator
252252+ }
253253+254254+ /// Get the path separator (default `"/"`) used by the [`TagManager`] for [`MultipartTag`]s.
255255+ pub fn path_separator(&self) -> PathSep {
256256+ self.path_separator
257257+ }
258258+}
+288
src/parse/adapters.rs
···11+//! Types which augment or modify the behavior of an underlying parser.
22+33+use crate::error::ParseError;
44+#[cfg(feature = "either")]
55+use crate::label::Label;
66+use crate::parse::Parser;
77+use crate::storage::StorageLock;
88+use crate::tag::KeyValueSep;
99+use crate::tag::PathSep;
1010+#[cfg(feature = "either")]
1111+use crate::tag::Tag;
1212+#[cfg(feature = "convert_case")]
1313+pub use convert_case::Case;
1414+#[cfg(feature = "convert_case")]
1515+use convert_case::Casing as _;
1616+#[cfg(feature = "either")]
1717+use either::Either;
1818+#[cfg(feature = "regex")]
1919+use regex::Regex;
2020+#[cfg(feature = "regex")]
2121+use regex::Replacer;
2222+use std::hash::BuildHasher;
2323+use string_interner::backend::Backend as InternerBackend;
2424+#[cfg(feature = "either")]
2525+use string_interner::Symbol;
2626+2727+// Helper macro to generate parser adapters.
2828+macro_rules! adapters {
2929+ (
3030+ $(
3131+ $( #[$($attrss:meta)*] )*
3232+ $struct:ident $(< $($type_var:ident: $type_bound:ident),* >)? $(($($field_ty:ty),*))? => { $adapter:expr }
3333+ )*
3434+ ) => {
3535+ $(
3636+ adapters! {
3737+ @single
3838+ $( #[$($attrss)*] )*
3939+ $struct $(< $($type_var: $type_bound),* >)* $(($($field_ty),*))* => { $adapter }
4040+ }
4141+ )*
4242+ };
4343+4444+ (
4545+ @single
4646+ $(#[$($attrss:meta)*] )*
4747+ $struct:ident $(< $($type_var:ident: $type_bound:ident),* >)? $(($($field_ty:ty),* ))? => { $adapter:expr }
4848+ ) => {
4949+ $( #[$($attrss)*] )*
5050+ #[derive(Debug, Clone)]
5151+ pub struct $struct<$($($type_var: $type_bound),*,)* P: Parser>($($(pub $field_ty),*,)* pub P);
5252+5353+ impl<$($($type_var: $type_bound),*,)* P: Parser> $struct<$($($type_var),*,)* P> {
5454+ /// Parse a token with the given `interner` and `separator`.
5555+ #[allow(clippy::redundant_closure_call)]
5656+ fn parse<B, H>(
5757+ &self,
5858+ storage: &mut StorageLock<'_, <P::Tag as Tag>::Label, B, H>,
5959+ key_value_separator: KeyValueSep,
6060+ path_separator: PathSep,
6161+ raw: &str,
6262+ ) -> Result<P::Tag, ParseError>
6363+ where
6464+ B: InternerBackend<Symbol = <P::Tag as Tag>::Symbol>,
6565+ H: BuildHasher
6666+ {
6767+ ($adapter)(self, storage, key_value_separator, path_separator, raw)
6868+ }
6969+7070+ }
7171+7272+ impl<$($($type_var: $type_bound),*,)* P: Parser> Parser for $struct<$($($type_var),*,)* P> {
7373+ type Tag = P::Tag;
7474+7575+ fn parse<B, H>(
7676+ &self,
7777+ storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
7878+ key_value_separator: KeyValueSep,
7979+ path_separator: PathSep,
8080+ raw: &str,
8181+ ) -> Result<Self::Tag, ParseError>
8282+ where
8383+ B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
8484+ H: BuildHasher
8585+ {
8686+ self.parse(storage, key_value_separator, path_separator, raw)
8787+ }
8888+ }
8989+ };
9090+}
9191+9292+adapters! {
9393+ /// Trim whitespace from one or both sides of the tag.
9494+ Trim(TrimBounds) => {
9595+ |this: &Trim<P>, interner, kv_sep, path_sep, raw: &str| {
9696+ let Trim::<P>(bounds, sub_parser) = this;
9797+ let raw = match bounds {
9898+ TrimBounds::Both => raw.trim(),
9999+ TrimBounds::Start => raw.trim_start(),
100100+ TrimBounds::End => raw.trim_end(),
101101+ };
102102+ sub_parser.parse(interner, kv_sep, path_sep, raw)
103103+ }
104104+ }
105105+106106+ /// Filter out tags longer than a maximum number of characters.
107107+ MaxChar(usize) => {
108108+ |this: &MaxChar<P>, interner, kv_sep, path_sep, raw: &str| {
109109+ let MaxChar::<P>(limit, sub_parser) = this;
110110+111111+ if raw.chars().count() > *limit {
112112+ return Err(ParseError::TagTooManyChars);
113113+ }
114114+115115+ sub_parser.parse(interner, kv_sep, path_sep, raw)
116116+ }
117117+ }
118118+119119+ /// Filter out tags longer than a maximum number of bytes.
120120+ MaxBytes(usize) => {
121121+ |this: &MaxBytes<P>, interner, kv_sep, path_sep, raw: &str| {
122122+ let MaxBytes::<P>(limit, sub_parser) = this;
123123+124124+ if raw.len() > *limit {
125125+ return Err(ParseError::TagTooManyBytes);
126126+ }
127127+128128+ sub_parser.parse(interner, kv_sep, path_sep, raw)
129129+ }
130130+ }
131131+}
132132+133133+#[cfg(feature = "convert_case")]
134134+adapters! {
135135+ /// Change the case of the tag.
136136+ ChangeCase(Case) => {
137137+ |this: &ChangeCase<P>, interner, kv_sep, path_sep, raw: &str| {
138138+ let ChangeCase::<P>(case, sub_parser) = this;
139139+ let raw = raw.to_case(*case);
140140+ sub_parser.parse(interner, kv_sep, path_sep, &raw)
141141+ }
142142+ }
143143+}
144144+145145+#[cfg(feature = "regex")]
146146+adapters! {
147147+ /// Filter tags by matching against a regex.
148148+ Match(Regex) => {
149149+ |this: &Match<P>, interner, kv_sep, path_sep, raw: &str| {
150150+ let Match::<P>(regex, sub_parser) = this;
151151+152152+ let raw = regex
153153+ .is_match(raw)
154154+ .then_some(raw)
155155+ .ok_or(ParseError::TagDidntMatchRegex)?;
156156+157157+ sub_parser.parse(interner, kv_sep, path_sep, raw)
158158+ }
159159+ }
160160+161161+ /// Replace the content of a tag according to a regex.
162162+ Replace<R: CloneableReplacer>(Regex, R, ReplaceCount) => {
163163+ |this: &Replace<R, P>, interner, kv_sep, path_sep, raw: &str| {
164164+ let Replace::<R, P>(regex, replacer, count, sub_parser) = this;
165165+166166+ let raw = match count {
167167+ ReplaceCount::First => regex.replace(raw, replacer.clone()),
168168+ ReplaceCount::N(count) => regex.replacen(raw, *count, replacer.clone()),
169169+ ReplaceCount::All => regex.replace_all(raw, replacer.clone()),
170170+ };
171171+172172+ sub_parser.parse(interner, kv_sep, path_sep, &raw)
173173+ }
174174+ }
175175+}
176176+177177+/// A `regex::Replacer` that can be [`Clone`]d.
178178+///
179179+/// This is automatically implemented for any type that implements both
180180+/// `regex::Replacer` and [`Clone`].
181181+pub trait CloneableReplacer: Replacer + Clone {}
182182+183183+impl<T: Replacer + Clone> CloneableReplacer for T {}
184184+185185+// The `Or` adapter is implemented by hand, because making the adapter-generating
186186+// macro learn how to handle all of these bounds and everything isn't worth it.
187187+188188+/// Apply one parser, and if it fails, apply the other one.
189189+///
190190+/// Note that the tokens produced by the two parsers have to support the same underlying
191191+/// symbol type, as they're both being backed by the same interner for storage.
192192+#[cfg(feature = "either")]
193193+#[derive(Debug)]
194194+pub struct Or<L, S, T1, T2, P1, P2>(pub P1, pub P2)
195195+where
196196+ L: Label,
197197+ S: Symbol,
198198+ T1: Tag<Label = L, Symbol = S>,
199199+ T2: Tag<Label = L, Symbol = S>,
200200+ P1: Parser<Tag = T1>,
201201+ P2: Parser<Tag = T2>;
202202+203203+#[cfg(feature = "either")]
204204+impl<L, S, T1, T2, P1, P2> Or<L, S, T1, T2, P1, P2>
205205+where
206206+ L: Label,
207207+ S: Symbol,
208208+ T1: Tag<Label = L, Symbol = S>,
209209+ T2: Tag<Label = L, Symbol = S>,
210210+ P1: Parser<Tag = T1>,
211211+ P2: Parser<Tag = T2>,
212212+{
213213+ /// Parse a token with the given `interner` and `separator`.
214214+ fn parse<B, H>(
215215+ &self,
216216+ storage: &mut StorageLock<'_, L, B, H>,
217217+ key_value_separator: KeyValueSep,
218218+ path_separator: PathSep,
219219+ raw: &str,
220220+ ) -> Result<Either<T1, T2>, ParseError>
221221+ where
222222+ B: InternerBackend<Symbol = S>,
223223+ H: BuildHasher,
224224+ {
225225+ self.0
226226+ .parse(storage, key_value_separator, path_separator, raw)
227227+ .map(Either::Left)
228228+ .or_else(|err1| {
229229+ self.1
230230+ .parse(storage, key_value_separator, path_separator, raw)
231231+ .map(Either::Right)
232232+ .map_err(|err2| ParseError::FailedOr(Box::new(err1), Box::new(err2)))
233233+ })
234234+ }
235235+}
236236+237237+#[cfg(feature = "either")]
238238+impl<L, S, T1, T2, P1, P2> Parser for Or<L, S, T1, T2, P1, P2>
239239+where
240240+ L: Label,
241241+ S: Symbol,
242242+ T1: Tag<Label = L, Symbol = S>,
243243+ T2: Tag<Label = L, Symbol = S>,
244244+ P1: Parser<Tag = T1>,
245245+ P2: Parser<Tag = T2>,
246246+{
247247+ type Tag = Either<T1, T2>;
248248+249249+ fn parse<B, H>(
250250+ &self,
251251+ storage: &mut StorageLock<'_, L, B, H>,
252252+ key_value_separator: KeyValueSep,
253253+ path_separator: PathSep,
254254+ raw: &str,
255255+ ) -> Result<Self::Tag, ParseError>
256256+ where
257257+ B: InternerBackend<Symbol = S>,
258258+ H: BuildHasher,
259259+ {
260260+ self.parse(storage, key_value_separator, path_separator, raw)
261261+ }
262262+}
263263+264264+/// Sets which side(s) of the raw tag should be trimmed of whitespace.
265265+#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
266266+pub enum TrimBounds {
267267+ /// Both sides should be trimmed.
268268+ Both,
269269+270270+ /// Just the starting side should be trimmed.
271271+ Start,
272272+273273+ /// Just the ending side should be trimmed.
274274+ End,
275275+}
276276+277277+/// Sets how many replacements should be done when using the [`Replace`] adapter.
278278+#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
279279+pub enum ReplaceCount {
280280+ /// Replace just the first instance of the regex match.
281281+ First,
282282+283283+ /// Replace the first `N` instances of the regex match.
284284+ N(usize),
285285+286286+ /// Replace all instances of the regex match.
287287+ All,
288288+}
+275
src/parse/mod.rs
···11+//! Different tag parsers and their strategies.
22+33+mod adapters;
44+55+use crate::error::ParseError;
66+use crate::label::DefaultLabel;
77+use crate::label::Label;
88+pub use crate::parse::adapters::*;
99+#[cfg(doc)]
1010+use crate::storage::Storage;
1111+use crate::storage::StorageLock;
1212+use crate::tag::*;
1313+#[cfg(doc)]
1414+use crate::TagManager;
1515+use std::hash::BuildHasher;
1616+use std::marker::PhantomData;
1717+use std::ops::Not as _;
1818+use std::sync::Arc;
1919+use std::sync::Mutex;
2020+use string_interner::backend::Backend as InternerBackend;
2121+use string_interner::DefaultSymbol;
2222+use string_interner::Symbol;
2323+2424+/// Types that provide a strategy for parsing tags.
2525+///
2626+/// `Parser`s are required to be [`Send`] and [`Sync`] as we want [`TagManager`]
2727+/// to be [`Send`] and [`Sync`]. For basic parsers that don't maintain
2828+/// any internal state, this is trivial, but more complex parsers may
2929+/// need to establish internal synchronization of their state in the case
3030+/// that they are performing concurrent parses.
3131+pub trait Parser {
3232+ /// The type of [`Tag`] produced by the [`Parser`].
3333+ type Tag: Tag;
3434+3535+ /// Parse a given string to produce a new [`Tag`].
3636+ fn parse<B, H>(
3737+ &self,
3838+ storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
3939+ key_value_separator: KeyValueSep,
4040+ path_separator: PathSep,
4141+ raw: &str,
4242+ ) -> Result<Self::Tag, ParseError>
4343+ where
4444+ B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
4545+ H: BuildHasher;
4646+}
4747+4848+// Implement Parser for any Parser wrapped in `Arc<Mutex<_>>`, to enable
4949+// passing externally-synchronized parsers in addition to trivially-synchronized ones,
5050+// in cases where the parsers maintain internal state.
5151+impl<P> Parser for Arc<Mutex<P>>
5252+where
5353+ P: Parser,
5454+{
5555+ type Tag = P::Tag;
5656+5757+ fn parse<B, H>(
5858+ &self,
5959+ storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
6060+ key_value_separator: KeyValueSep,
6161+ path_separator: PathSep,
6262+ raw: &str,
6363+ ) -> Result<Self::Tag, ParseError>
6464+ where
6565+ B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
6666+ H: BuildHasher,
6767+ {
6868+ let internal_parser = self.lock().map_err(|_| ParseError::CouldNotLock)?;
6969+ internal_parser.parse(storage, key_value_separator, path_separator, raw)
7070+ }
7171+}
7272+7373+/// The policy to use for splitting on separators in a [`KeyValue`].
7474+#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
7575+pub enum KvPolicy {
7676+ /// Don't allow ambiguous separators. Only one separator is permitted.
7777+ NoAmbiguousSep,
7878+7979+ /// Split keys and values on the first occurence of the separator.
8080+ SplitOnFirstSep,
8181+8282+ /// Split keys and values on the last occurence of the separator.
8383+ SplitOnLastSep,
8484+}
8585+8686+/// The policy to use for permitting "single-part" [`MultipartTag`]s.
8787+#[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
8888+pub enum MultipartPolicy {
8989+ /// Permit single-part tags.
9090+ PermitOnePart,
9191+9292+ /// Do not permit single-part tags.
9393+ RequireMultipart,
9494+}
9595+9696+/// Helper macro to construct tag parsers.
9797+///
9898+/// This macro:
9999+///
100100+/// 1. Defines each parser as either an empty struct or tuple struct with only public fields.
101101+/// 2. Implements a `parse` inherent method, which calls `check_empty` and then whatever closure
102102+/// is provided by the macro to implement the actual parsing behavior.
103103+/// 3. Implements the `Parser` trait, with `Parser::parse` just delegating to the `parse`
104104+/// inherent method.
105105+///
106106+/// The syntax of each parser-defining pattern is:
107107+///
108108+/// ```text
109109+/// <doc_comment>
110110+/// <struct_name>(<field_types>)? => <tag_type> {
111111+/// <parser_closure>
112112+/// }
113113+/// ```
114114+macro_rules! parsers {
115115+ (
116116+ $(
117117+ $( #[$($attrss:meta)*] )*
118118+ $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident {
119119+ $parser:expr
120120+ }
121121+ )*
122122+ ) => {
123123+ $(
124124+ parsers! {
125125+ @single
126126+ $( #[$($attrss)*] )*
127127+ $struct { $($field_name: $field_ty),* } => $tag {
128128+ $parser
129129+ }
130130+ }
131131+ )*
132132+ };
133133+134134+ (
135135+ @single
136136+ $(#[$($attrss:meta)*] )*
137137+ $struct:ident { $($field_name:ident: $field_ty:tt),* } => $tag:ident {
138138+ $parser:expr
139139+ }
140140+ ) => {
141141+ $( #[$($attrss)*] )*
142142+ #[derive(Debug, PartialEq, Eq, Copy, Clone, Hash)]
143143+ pub struct $struct<L: Label = DefaultLabel, S: Symbol = DefaultSymbol> {
144144+ _label: PhantomData<L>,
145145+ _symbol: PhantomData<S>,
146146+ $( $field_name: $field_ty ),*
147147+ }
148148+149149+ impl<L, S> $struct<L, S> where L: Label, S: Symbol {
150150+ /// Construct a new parser.
151151+ #[allow(clippy::new_without_default)]
152152+ pub fn new($( $field_name: $field_ty ),*) -> Self {
153153+ Self {
154154+ _label: PhantomData,
155155+ _symbol: PhantomData,
156156+ $($field_name),*
157157+ }
158158+ }
159159+160160+ /// Parse a token with the given `interner` and `separator`.
161161+ #[allow(clippy::redundant_closure_call)]
162162+ pub fn parse<B, H>(
163163+ &self,
164164+ storage: &mut StorageLock<'_, L, B, H>,
165165+ key_value_separator: KeyValueSep,
166166+ path_separator: PathSep,
167167+ raw: &str
168168+ ) -> Result<$tag<L, S>, ParseError>
169169+ where
170170+ S: Symbol,
171171+ B: InternerBackend<Symbol = S>,
172172+ H: BuildHasher
173173+ {
174174+ check_empty(raw)?;
175175+ ($parser)(self, storage, key_value_separator, path_separator, raw)
176176+ }
177177+ }
178178+179179+ impl<L: Label, S: Symbol> Parser for $struct<L, S> {
180180+ type Tag = $tag<L, S>;
181181+182182+ fn parse<B, H>(
183183+ &self,
184184+ storage: &mut StorageLock<'_, <Self::Tag as Tag>::Label, B, H>,
185185+ key_value_separator: KeyValueSep,
186186+ path_separator: PathSep,
187187+ raw: &str
188188+ ) -> Result<Self::Tag, ParseError>
189189+ where
190190+ B: InternerBackend<Symbol = <Self::Tag as Tag>::Symbol>,
191191+ H: BuildHasher
192192+ {
193193+ self.parse(storage, key_value_separator, path_separator, raw)
194194+ }
195195+ }
196196+ };
197197+}
198198+199199+/// Validate that the raw tag isn't empty, error out if it is.
200200+fn check_empty(raw: &str) -> Result<(), ParseError> {
201201+ raw.is_empty()
202202+ .not()
203203+ .then_some(())
204204+ .ok_or(ParseError::EmptyTag)
205205+}
206206+207207+parsers! {
208208+ /// No internal structure, `':'` default separator.
209209+ Plain {} => PlainTag {
210210+ |_this, interner, _key_value_separator, _path_separator, raw| Ok(PlainTag::new(interner, raw))
211211+ }
212212+213213+ /// Key-value parser, `':'` default separator.
214214+ KeyValue { policy: KvPolicy } => KeyValueTag {
215215+ |this: &KeyValue<L, S>, interner, key_value_separator: KeyValueSep, _path_separator, raw: &str| {
216216+ match this.policy {
217217+ KvPolicy::NoAmbiguousSep => {
218218+ let mut parts_iter = raw.split(key_value_separator.0);
219219+ let key = parts_iter.next().ok_or(ParseError::MissingKey)?;
220220+ let value = parts_iter.next().ok_or(ParseError::MissingValue)?;
221221+ match parts_iter.next() {
222222+ Some(_) => Err(ParseError::AmbiguousKeyValueTag),
223223+ None => Ok(KeyValueTag::new(interner, key, value))
224224+ }
225225+ }
226226+ KvPolicy::SplitOnFirstSep => {
227227+ match raw.split_once(key_value_separator.0) {
228228+ None => Err(ParseError::MissingValue),
229229+ Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)),
230230+ }
231231+ }
232232+ KvPolicy::SplitOnLastSep => {
233233+ match raw.rsplit_once(key_value_separator.0) {
234234+ None => Err(ParseError::MissingValue),
235235+ Some((key, value)) => Ok(KeyValueTag::new(interner, key, value)),
236236+ }
237237+ }
238238+ }
239239+ }
240240+ }
241241+242242+ /// Multipart parser, splits parts on separator, `':'` default separator.
243243+ Multipart { policy: MultipartPolicy } => MultipartTag {
244244+ |this: &Multipart<L, S>, interner, _key_value_separator, path_separator: PathSep, raw: &str| {
245245+ match this.policy {
246246+ MultipartPolicy::PermitOnePart => Ok(MultipartTag::new(interner, raw.split(path_separator.0))),
247247+ MultipartPolicy::RequireMultipart => {
248248+ let parts = raw.split(path_separator.0);
249249+250250+ if parts.clone().count() < 2 {
251251+ return Err(ParseError::SinglePartMultipart);
252252+ }
253253+254254+ Ok(MultipartTag::new(interner, parts))
255255+ },
256256+ }
257257+ }
258258+ }
259259+}
260260+261261+/* # SAFETY
262262+ *
263263+ * There's no data to sync for any of these; the only fields involved
264264+ * are read-only once the type is created (they just set configuration).
265265+ * Since there's nothing to sync, there's no worry about deriving this.
266266+ */
267267+268268+unsafe impl<L: Label, S: Symbol> Send for Plain<L, S> {}
269269+unsafe impl<L: Label, S: Symbol> Sync for Plain<L, S> {}
270270+271271+unsafe impl<L: Label, S: Symbol> Send for KeyValue<L, S> {}
272272+unsafe impl<L: Label, S: Symbol> Sync for KeyValue<L, S> {}
273273+274274+unsafe impl<L: Label, S: Symbol> Send for Multipart<L, S> {}
275275+unsafe impl<L: Label, S: Symbol> Sync for Multipart<L, S> {}
+105
src/query.rs
···11+use std::{hash::BuildHasher, collections::{BTreeMap, HashSet}, marker::PhantomData};
22+use string_interner::backend::Backend as InternerBackend;
33+use string_interner::Symbol;
44+use crate::{TagManager, label::Label, tag::Tag, parse::Parser};
55+66+77+struct QueryBuilder<
88+ 'm,
99+ L,
1010+ S,
1111+ T,
1212+ P,
1313+ B,
1414+ H,
1515+>
1616+where
1717+ L: Label,
1818+ S: Symbol,
1919+ T: Tag<Label = L, Symbol = S>,
2020+ P: Parser<Tag = T> + Send + Sync,
2121+ B: InternerBackend<Symbol = S>,
2222+ H: BuildHasher,
2323+{
2424+ manager: &'m TagManager<L, S, T, P, B, H>,
2525+ indices: QueryIndices<S>,
2626+}
2727+2828+struct QueryIndices<S> where S: Symbol {
2929+ plain: PlainIndex<S>,
3030+ key_value: KeyValueIndex<S>,
3131+ multipart: MultipartIndex<S>,
3232+}
3333+3434+struct PlainIndex<S>(Vec<S>) where S: Symbol;
3535+3636+struct KeyValueIndex<S>(BTreeMap<S, Vec<S>>) where S: Symbol;
3737+3838+struct MultipartIndex<S>(Vec<Trie<S>>) where S: Symbol;
3939+4040+struct Trie<S>(PhantomData<S>) where S: Symbol;
4141+4242+4343+/*
4444+The basic design of the query system is:
4545+4646+manager
4747+ .select_from(&container_of_queryable_things)
4848+ .where(Contains(And("this-tag", Or("that_tag", "someothertag"))))
4949+ .run()
5050+5151+5252+This isn't the exact API, because it needs to have a way to resolve
5353+the query tags such that identity-based matching can happen.
5454+5555+When it's doing the "select_from" construction, it needs to go through the
5656+queryable-things and construct indices of their tags.
5757+5858+5959+Individual queries probably need to be relative to a single tag manager,
6060+to be able to match up the parser and storage.
6161+6262+But then those queries return iterators over tagged items, and the
6363+intersection of the returned items from multiple queries is the answer
6464+to all the queries.
6565+6666+6767+struct QueryEngine {
6868+ indices: QueryIndices,
6969+}
7070+7171+struct QueryIndices {
7272+ plain_index: PlainIndex,
7373+ key_value_index: KeyValueIndex,
7474+ multipart_index: MultipartIndex,
7575+}
7676+7777+/*
7878+ Queries might include:
7979+ - Find all items with this tag and that tag but not that tag
8080+ - Find all items with tags starting with this path
8181+ - Find all items with this key for key-value
8282+ - Find all items with this key and a value matching some constraint
8383+ - Specific value
8484+ - Set of values
8585+ - Values match regex
8686+ - Values are parseable into a particular type
8787+ - Values parseable into a type meet some constraint on that type
8888+8989+ */
9090+9191+// This will just be a sorted vector.
9292+struct PlainIndex {
9393+9494+}
9595+9696+// This one will be a hash map, with keys being the keys of all KV tags, and values being sorted vectors of values.
9797+struct KeyValueIndex {
9898+9999+}
100100+101101+// This one will be a forest, a set of trees with roots being all the first segments of multipart paths.
102102+struct MultipartIndex {
103103+104104+}
105105+*/
+223
src/storage.rs
···11+//! Types defining how tag data is stored.
22+33+use crate::error::StorageError;
44+use crate::label::DefaultLabel;
55+use crate::label::Label;
66+use std::fmt::Debug;
77+use std::hash::BuildHasher;
88+use std::marker::PhantomData;
99+use std::ops::Deref;
1010+use std::ops::DerefMut;
1111+use std::sync::Arc;
1212+use std::sync::Mutex;
1313+use std::sync::MutexGuard;
1414+pub use string_interner::backend::Backend as InternerBackend;
1515+pub use string_interner::DefaultBackend;
1616+pub use string_interner::DefaultHashBuilder;
1717+pub use string_interner::DefaultSymbol;
1818+pub use string_interner::StringInterner;
1919+use string_interner::Symbol;
2020+2121+/// Default interner, using the default backend, symbols, and hashing.
2222+pub type DefaultInterner = StringInterner<DefaultBackend<DefaultSymbol>, DefaultHashBuilder>;
2323+2424+/// Stores the actual tag data.
2525+///
2626+/// A [`Storage`] is, essentially, a wrapper around a [`StringInterner`] that handles three
2727+/// things: 1) Ensuring the interner is always wrapped in an `Arc<Mutex<_>>`, 2) providing
2828+/// a convenient `lock` method and associated `StorageLockGuard` type to make the API for
2929+/// _using_ the interner more ergonomic, and 3) keying the storage on the "label" type associated
3030+/// with it, while enabling the underlying interners to be shared even if the labels are
3131+/// different.
3232+pub struct Storage<L = DefaultLabel, B = DefaultBackend<DefaultSymbol>, H = DefaultHashBuilder>(
3333+ Arc<Mutex<StringInterner<B, H>>>,
3434+ PhantomData<L>,
3535+)
3636+where
3737+ L: Label,
3838+ B: InternerBackend,
3939+ <B as InternerBackend>::Symbol: Symbol,
4040+ H: BuildHasher;
4141+4242+impl<L, B, H> Storage<L, B, H>
4343+where
4444+ L: Label,
4545+ B: InternerBackend,
4646+ <B as InternerBackend>::Symbol: Symbol,
4747+ H: BuildHasher + Default,
4848+{
4949+ /// Make a [`Storage`] with a freshly-created [`StringInterner`].
5050+ pub fn fresh() -> Self {
5151+ Storage::unique(StringInterner::<B, H>::new())
5252+ }
5353+5454+ /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified capacity.
5555+ pub fn fresh_with_capacity(cap: usize) -> Self {
5656+ Storage::unique(StringInterner::<B, H>::with_capacity(cap))
5757+ }
5858+}
5959+6060+impl<L, B, H> Storage<L, B, H>
6161+where
6262+ L: Label,
6363+ B: InternerBackend,
6464+ <B as InternerBackend>::Symbol: Symbol,
6565+ H: BuildHasher,
6666+{
6767+ /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified hash builder.
6868+ pub fn fresh_with_hasher(hash_builder: H) -> Self {
6969+ Storage::unique(StringInterner::<B, H>::with_hasher(hash_builder))
7070+ }
7171+7272+ /// Make a [`Storage`] with a freshly-created [`StringInterner`] with the specified capacity and hash builder.
7373+ pub fn fresh_with_capacity_and_hasher(cap: usize, hash_builder: H) -> Self {
7474+ Storage::unique(StringInterner::<B, H>::with_capacity_and_hasher(
7575+ cap,
7676+ hash_builder,
7777+ ))
7878+ }
7979+8080+ /// Take ownership of a singular interner to produce a [`Storage`].
8181+ pub fn unique(interner: StringInterner<B, H>) -> Self {
8282+ Storage(Arc::new(Mutex::new(interner)), PhantomData)
8383+ }
8484+8585+ /// Produce a [`Storage`] which may share its underlying interner.
8686+ pub fn shared(interner: &Arc<Mutex<StringInterner<B, H>>>) -> Self {
8787+ Storage(Arc::clone(interner), PhantomData)
8888+ }
8989+9090+ /// Make a [`Storage`] by copying and sharing the underlying interner from the provided [`Storage`].
9191+ pub fn shallow_clone<L2>(&self) -> Storage<L2, B, H>
9292+ where
9393+ L2: Label,
9494+ {
9595+ Storage::shared(self)
9696+ }
9797+9898+ /// Lock the [`Storage`]'s underlying [`StringInterner`].
9999+ pub fn lock(&self) -> Result<StorageLock<'_, L, B, H>, StorageError> {
100100+ self.0
101101+ .lock()
102102+ .map(|guard| StorageLock(guard, PhantomData))
103103+ .map_err(|_| StorageError::CouldNotLock)
104104+ }
105105+}
106106+107107+impl<L, B, H> Storage<L, B, H>
108108+where
109109+ L: Label,
110110+ B: InternerBackend + Clone,
111111+ <B as InternerBackend>::Symbol: Symbol,
112112+ H: BuildHasher + Clone,
113113+{
114114+ /// Make a [`Storage`] by completely copying all data stored in the provided [`Storage`] into a fresh interner.
115115+ pub fn deep_clone<L2>(&self) -> Result<Storage<L2, B, H>, StorageError>
116116+ where
117117+ L2: Label,
118118+ {
119119+ Ok(Storage::unique(self.lock()?.clone()))
120120+ }
121121+}
122122+123123+impl<L, B, H> Debug for Storage<L, B, H>
124124+where
125125+ L: Label,
126126+ B: InternerBackend + Debug,
127127+ <B as InternerBackend>::Symbol: Symbol + Debug,
128128+ H: BuildHasher,
129129+{
130130+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131131+ f.debug_struct("Storage").field("0", &self.0).finish()
132132+ }
133133+}
134134+135135+impl Default for Storage<DefaultLabel, DefaultBackend<DefaultSymbol>, DefaultHashBuilder> {
136136+ fn default() -> Self {
137137+ Storage::unique(DefaultInterner::new())
138138+ }
139139+}
140140+141141+impl<L, B, H> Deref for Storage<L, B, H>
142142+where
143143+ L: Label,
144144+ B: InternerBackend,
145145+ <B as InternerBackend>::Symbol: Symbol,
146146+ H: BuildHasher,
147147+{
148148+ type Target = Arc<Mutex<StringInterner<B, H>>>;
149149+150150+ fn deref(&self) -> &Self::Target {
151151+ &self.0
152152+ }
153153+}
154154+155155+impl<L, B, H> DerefMut for Storage<L, B, H>
156156+where
157157+ L: Label,
158158+ B: InternerBackend,
159159+ <B as InternerBackend>::Symbol: Symbol,
160160+ H: BuildHasher,
161161+{
162162+ fn deref_mut(&mut self) -> &mut Self::Target {
163163+ &mut self.0
164164+ }
165165+}
166166+167167+impl<L, B, H> From<StringInterner<B, H>> for Storage<L, B, H>
168168+where
169169+ L: Label,
170170+ B: InternerBackend,
171171+ <B as InternerBackend>::Symbol: Symbol,
172172+ H: BuildHasher,
173173+{
174174+ fn from(interner: StringInterner<B, H>) -> Self {
175175+ Storage::unique(interner)
176176+ }
177177+}
178178+179179+impl<L, B, H> From<&Arc<Mutex<StringInterner<B, H>>>> for Storage<L, B, H>
180180+where
181181+ L: Label,
182182+ B: InternerBackend,
183183+ <B as InternerBackend>::Symbol: Symbol,
184184+ H: BuildHasher,
185185+{
186186+ fn from(interner: &Arc<Mutex<StringInterner<B, H>>>) -> Self {
187187+ Storage::shared(interner)
188188+ }
189189+}
190190+191191+/// A lock on the underlying [`StringInterner`] in a [`Storage`].
192192+pub struct StorageLock<'lock, L, B, H>(MutexGuard<'lock, StringInterner<B, H>>, PhantomData<L>)
193193+where
194194+ L: Label,
195195+ B: InternerBackend,
196196+ <B as InternerBackend>::Symbol: Symbol,
197197+ H: BuildHasher;
198198+199199+impl<'lock, L, B, H> Deref for StorageLock<'lock, L, B, H>
200200+where
201201+ L: Label,
202202+ B: InternerBackend,
203203+ <B as InternerBackend>::Symbol: Symbol,
204204+ H: BuildHasher,
205205+{
206206+ type Target = MutexGuard<'lock, StringInterner<B, H>>;
207207+208208+ fn deref(&self) -> &Self::Target {
209209+ &self.0
210210+ }
211211+}
212212+213213+impl<'lock, L, B, H> DerefMut for StorageLock<'lock, L, B, H>
214214+where
215215+ L: Label,
216216+ B: InternerBackend,
217217+ <B as InternerBackend>::Symbol: Symbol,
218218+ H: BuildHasher,
219219+{
220220+ fn deref_mut(&mut self) -> &mut Self::Target {
221221+ &mut self.0
222222+ }
223223+}
+463
src/tag.rs
···11+//! Different kinds of [`Tag`]s that can be parsed.
22+33+use crate::error::ResolveError;
44+use crate::label::DefaultLabel;
55+use crate::label::Label;
66+#[cfg(doc)]
77+use crate::storage::Storage;
88+use crate::storage::StorageLock;
99+#[cfg(doc)]
1010+use crate::TagManager;
1111+#[cfg(feature = "either")]
1212+use either::Either;
1313+use itertools::intersperse_with;
1414+use std::fmt::Display;
1515+use std::fmt::Formatter;
1616+use std::fmt::Result as FmtResult;
1717+use std::hash::BuildHasher;
1818+use std::marker::PhantomData;
1919+use string_interner::backend::Backend as InternerBackend;
2020+use string_interner::DefaultSymbol;
2121+#[cfg(doc)]
2222+use string_interner::StringInterner;
2323+use string_interner::Symbol;
2424+2525+/// A trait defining a [`Tag`] which contains interned data.
2626+///
2727+/// The _only_ defining operation of a [`Tag`] is that it can be
2828+/// converted back into a [`String`] using the [`Storage`] that
2929+/// created it and the correct separator configured by the [`TagManager`]
3030+/// that built it.
3131+///
3232+/// [`Tag`]s have an underlying [`Symbol`] used to define their storage.
3333+/// Internally, [`Tag`]s are just a set of [`Symbol`]s used to make
3434+/// storage and identity comparison cheap while enabling reconstruction
3535+/// of the original [`String`].
3636+pub trait Tag {
3737+ /// The label of the [`TagManager`] used to produce the [`Tag`].
3838+ type Label: Label;
3939+4040+ /// The [`Symbol`] used by the [`Storage`] as a handle to the stored string data.
4141+ type Symbol: Symbol;
4242+4343+ /// Get the [`TagKind`] of the current tag.
4444+ fn kind(&self) -> TagKind;
4545+4646+ /// Try to resolve a [`Tag`] back into a [`String`].
4747+ fn resolve<B, H>(
4848+ &self,
4949+ storage: &StorageLock<'_, Self::Label, B, H>,
5050+ key_value_separator: KeyValueSep,
5151+ path_separator: PathSep,
5252+ ) -> Result<String, ResolveError>
5353+ where
5454+ B: InternerBackend<Symbol = Self::Symbol>,
5555+ H: BuildHasher;
5656+}
5757+5858+#[cfg(feature = "either")]
5959+// Auto-impl for `Either` wrapping two `Tag`s.
6060+impl<L, S, T1, T2> Tag for Either<T1, T2>
6161+where
6262+ L: Label,
6363+ S: Symbol,
6464+ T1: Tag<Label = L, Symbol = S>,
6565+ T2: Tag<Label = L, Symbol = S>,
6666+{
6767+ type Label = L;
6868+ type Symbol = S;
6969+7070+ fn resolve<B, H>(
7171+ &self,
7272+ storage: &StorageLock<'_, Self::Label, B, H>,
7373+ key_value_separator: KeyValueSep,
7474+ path_separator: PathSep,
7575+ ) -> Result<String, ResolveError>
7676+ where
7777+ B: InternerBackend<Symbol = Self::Symbol>,
7878+ H: BuildHasher,
7979+ {
8080+ match self {
8181+ Either::Left(t) => t.resolve(storage, key_value_separator, path_separator),
8282+ Either::Right(t) => t.resolve(storage, key_value_separator, path_separator),
8383+ }
8484+ }
8585+8686+ fn kind(&self) -> TagKind {
8787+ match self {
8888+ Either::Left(t) => t.kind(),
8989+ Either::Right(t) => t.kind(),
9090+ }
9191+ }
9292+}
9393+9494+//---------------------------------------------------------------------------
9595+9696+/// A [`Tag`] without internal structure.
9797+///
9898+/// [`PlainTag`] interns the full contents of a tag together.
9999+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
100100+pub struct PlainTag<L = DefaultLabel, S = DefaultSymbol>(S, PhantomData<L>)
101101+where
102102+ L: Label,
103103+ S: Symbol;
104104+105105+impl<L: Label, S: Symbol> PlainTag<L, S> {
106106+ /// Construct a new [`PlainTag`].
107107+ pub(crate) fn new<B, H>(storage: &mut StorageLock<'_, L, B, H>, raw: &str) -> Self
108108+ where
109109+ B: InternerBackend<Symbol = S>,
110110+ H: BuildHasher,
111111+ {
112112+ PlainTag(storage.get_or_intern(raw), PhantomData)
113113+ }
114114+115115+ /// Resolve the whole tag into a [`String`].
116116+ pub fn resolve<B, H>(&self, storage: &StorageLock<'_, L, B, H>) -> Result<String, ResolveError>
117117+ where
118118+ B: InternerBackend<Symbol = S>,
119119+ H: BuildHasher,
120120+ {
121121+ self.resolve_str(storage).map(ToString::to_string)
122122+ }
123123+124124+ /// Resolve the whole tag into a string slice.
125125+ ///
126126+ /// Note that the returned string slice is a view into the underlying interner
127127+ /// data, which means you're holding a borrow on the interner as long as the slice
128128+ /// is held. If you want to let go of the borrow, copy the slice into a new owned
129129+ /// string.
130130+ pub fn resolve_str<'intern, B, H>(
131131+ &self,
132132+ storage: &'intern StorageLock<'_, L, B, H>,
133133+ ) -> Result<&'intern str, ResolveError>
134134+ where
135135+ B: InternerBackend<Symbol = S>,
136136+ H: BuildHasher,
137137+ {
138138+ storage.resolve(self.0).ok_or(ResolveError::TagNotFound)
139139+ }
140140+}
141141+142142+impl<L: Label, S: Symbol> Tag for PlainTag<L, S> {
143143+ type Label = L;
144144+ type Symbol = S;
145145+146146+ fn resolve<B, H>(
147147+ &self,
148148+ storage: &StorageLock<'_, Self::Label, B, H>,
149149+ _key_value_separator: KeyValueSep,
150150+ _path_separator: PathSep,
151151+ ) -> Result<String, ResolveError>
152152+ where
153153+ B: InternerBackend<Symbol = Self::Symbol>,
154154+ H: BuildHasher,
155155+ {
156156+ self.resolve(storage)
157157+ }
158158+159159+ fn kind(&self) -> TagKind {
160160+ TagKind::Plain
161161+ }
162162+}
163163+164164+//---------------------------------------------------------------------------
165165+166166+/// A [`Tag`] composed of a key and a value.
167167+///
168168+/// [`KeyValueTag`] interns the key and value separately, on the expectation
169169+/// that keys especially will be frequently repeated across tags.
170170+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
171171+pub struct KeyValueTag<L = DefaultLabel, S = DefaultSymbol>(S, S, PhantomData<L>)
172172+where
173173+ L: Label,
174174+ S: Symbol;
175175+176176+impl<L: Label, S: Symbol> KeyValueTag<L, S> {
177177+ /// Construct a new [`KeyValueTag`].
178178+ pub(crate) fn new<B, H>(storage: &mut StorageLock<'_, L, B, H>, key: &str, value: &str) -> Self
179179+ where
180180+ B: InternerBackend<Symbol = S>,
181181+ H: BuildHasher,
182182+ {
183183+ KeyValueTag(
184184+ storage.get_or_intern(key),
185185+ storage.get_or_intern(value),
186186+ PhantomData,
187187+ )
188188+ }
189189+190190+ /// Resolve the whole tag into a [`String`].
191191+ pub fn resolve<B, H>(
192192+ &self,
193193+ storage: &StorageLock<'_, L, B, H>,
194194+ key_value_separator: KeyValueSep,
195195+ _path_separator: PathSep,
196196+ ) -> Result<String, ResolveError>
197197+ where
198198+ B: InternerBackend<Symbol = S>,
199199+ H: BuildHasher,
200200+ {
201201+ self.resolve_key_value(storage)
202202+ .map(|(key, value)| format!("{key}{key_value_separator}{value}"))
203203+ }
204204+205205+ /// Resolve the key and value parts of the tag separately.
206206+ ///
207207+ /// Note that the returned string slices are views into the underlying interner
208208+ /// data, which means you're holding a borrow on the interner as long as the slices
209209+ /// are held. If you want to let go of the borrow, copy the slices into new owned
210210+ /// strings.
211211+ pub fn resolve_key_value<'intern, B, H>(
212212+ &self,
213213+ storage: &'intern StorageLock<'_, L, B, H>,
214214+ ) -> Result<(&'intern str, &'intern str), ResolveError>
215215+ where
216216+ B: InternerBackend<Symbol = S>,
217217+ H: BuildHasher,
218218+ {
219219+ let (key, value) = self.try_resolve_key_value(storage);
220220+ Ok((key?, value?))
221221+ }
222222+223223+ /// Try to resolve the key and value parts of the tag separately.
224224+ ///
225225+ /// This lets you resolve partial tags, if for some reason part of the tag
226226+ /// resolves and the other doesn't.
227227+ ///
228228+ /// Note that the returned string slices are views into the underlying interner
229229+ /// data, which means you're holding a borrow on the interner as long as the slices
230230+ /// are held. If you want to let go of the borrow, copy the slices into new owned
231231+ /// strings.
232232+ pub fn try_resolve_key_value<'intern, B, H>(
233233+ &self,
234234+ storage: &'intern StorageLock<'_, L, B, H>,
235235+ ) -> (
236236+ Result<&'intern str, ResolveError>,
237237+ Result<&'intern str, ResolveError>,
238238+ )
239239+ where
240240+ B: InternerBackend<Symbol = S>,
241241+ H: BuildHasher,
242242+ {
243243+ (
244244+ storage.resolve(self.0).ok_or(ResolveError::KeyNotFound),
245245+ storage.resolve(self.1).ok_or(ResolveError::ValueNotFound),
246246+ )
247247+ }
248248+}
249249+250250+impl<L: Label, S: Symbol> Tag for KeyValueTag<L, S> {
251251+ type Label = L;
252252+ type Symbol = S;
253253+254254+ fn resolve<B, H>(
255255+ &self,
256256+ storage: &StorageLock<'_, Self::Label, B, H>,
257257+ key_value_separator: KeyValueSep,
258258+ path_separator: PathSep,
259259+ ) -> Result<String, ResolveError>
260260+ where
261261+ B: InternerBackend<Symbol = Self::Symbol>,
262262+ H: BuildHasher,
263263+ {
264264+ self.resolve(storage, key_value_separator, path_separator)
265265+ }
266266+267267+ fn kind(&self) -> TagKind {
268268+ TagKind::KeyValue
269269+ }
270270+}
271271+272272+//---------------------------------------------------------------------------
273273+274274+/// A [`Tag`] composed of arbitrary parts.
275275+///
276276+/// [`MultipartTag`] interns each part of the tag separately, on the
277277+/// expectation that individual parts will be frequently repeated.
278278+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
279279+pub struct MultipartTag<L = DefaultLabel, S = DefaultSymbol>(Vec<S>, PhantomData<L>)
280280+where
281281+ L: Label,
282282+ S: Symbol;
283283+284284+impl<L: Label, S: Symbol> MultipartTag<L, S> {
285285+ /// Construct a new [`MultipartTag`].
286286+ pub(crate) fn new<'part, I, B, H>(storage: &mut StorageLock<'_, L, B, H>, parts: I) -> Self
287287+ where
288288+ I: Iterator<Item = &'part str>,
289289+ B: InternerBackend<Symbol = S>,
290290+ H: BuildHasher,
291291+ {
292292+ MultipartTag(
293293+ parts.map(|part| storage.get_or_intern(part)).collect(),
294294+ PhantomData,
295295+ )
296296+ }
297297+298298+ /// Resolve the whole tag into a [`String`].
299299+ pub fn resolve<B, H>(
300300+ &self,
301301+ storage: &StorageLock<'_, L, B, H>,
302302+ _key_value_separator: KeyValueSep,
303303+ path_separator: PathSep,
304304+ ) -> Result<String, ResolveError>
305305+ where
306306+ B: InternerBackend<Symbol = S>,
307307+ H: BuildHasher,
308308+ {
309309+ intersperse_with(self.try_resolve_parts(storage), || Ok(path_separator.0)).try_fold(
310310+ String::new(),
311311+ |mut acc, res| {
312312+ res.map(|next| {
313313+ acc.push_str(next);
314314+ acc
315315+ })
316316+ },
317317+ )
318318+ }
319319+320320+ /// Resolve each part of the tag.
321321+ ///
322322+ /// Note that the returned string slices are views into the underlying interner
323323+ /// data, which means you're holding a borrow on the interner as long as the slices
324324+ /// are held. If you want to let go of the borrow, copy the slices into new owned
325325+ /// strings.
326326+ pub fn resolve_parts<'intern, B, H, C>(
327327+ &self,
328328+ storage: &'intern StorageLock<'_, L, B, H>,
329329+ ) -> Result<C, ResolveError>
330330+ where
331331+ B: InternerBackend<Symbol = S>,
332332+ H: BuildHasher,
333333+ C: FromIterator<&'intern str>,
334334+ {
335335+ self.try_resolve_parts(storage).collect()
336336+ }
337337+338338+ /// Try to resolve each part of the tag.
339339+ ///
340340+ /// This lets you partially resolve the tag, if for some reason individual
341341+ /// parts don't resolve.
342342+ ///
343343+ /// Note that the returned string slices are views into the underlying interner
344344+ /// data, which means you're holding a borrow on the interner as long as the slices
345345+ /// are held. If you want to let go of the borrow, copy the slices into new owned
346346+ /// strings.
347347+ pub fn try_resolve_parts<'s, 'intern: 's, B, H>(
348348+ &'s self,
349349+ storage: &'intern StorageLock<'_, L, B, H>,
350350+ ) -> impl Iterator<Item = Result<&'intern str, ResolveError>> + 's
351351+ where
352352+ B: InternerBackend<Symbol = S>,
353353+ H: BuildHasher,
354354+ {
355355+ self.0
356356+ .iter()
357357+ .copied()
358358+ .map(|part| storage.resolve(part).ok_or(ResolveError::PartNotFound))
359359+ }
360360+}
361361+362362+impl<L: Label, S: Symbol> Tag for MultipartTag<L, S> {
363363+ type Label = L;
364364+ type Symbol = S;
365365+366366+ fn resolve<B, H>(
367367+ &self,
368368+ storage: &StorageLock<'_, Self::Label, B, H>,
369369+ key_value_separator: KeyValueSep,
370370+ path_separator: PathSep,
371371+ ) -> Result<String, ResolveError>
372372+ where
373373+ B: InternerBackend<Symbol = Self::Symbol>,
374374+ H: BuildHasher,
375375+ {
376376+ self.resolve(storage, key_value_separator, path_separator)
377377+ }
378378+379379+ fn kind(&self) -> TagKind {
380380+ TagKind::Multipart
381381+ }
382382+}
383383+384384+/// The separator between keys and values in a key-value tag.
385385+///
386386+/// The default separator is `":"`.
387387+#[derive(Debug, Copy, Clone)]
388388+pub struct KeyValueSep(pub &'static str);
389389+390390+impl Display for KeyValueSep {
391391+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
392392+ write!(f, "{}", self.0)
393393+ }
394394+}
395395+396396+impl Default for KeyValueSep {
397397+ fn default() -> Self {
398398+ KeyValueSep(":")
399399+ }
400400+}
401401+402402+/// The separator between path segments in a multipart tag.
403403+///
404404+/// The default separator is `"/"`.
405405+#[derive(Debug, Copy, Clone)]
406406+pub struct PathSep(pub &'static str);
407407+408408+impl Display for PathSep {
409409+ fn fmt(&self, f: &mut Formatter) -> FmtResult {
410410+ write!(f, "{}", self.0)
411411+ }
412412+}
413413+414414+impl Default for PathSep {
415415+ fn default() -> Self {
416416+ PathSep("/")
417417+ }
418418+}
419419+420420+/// The kind of tag being worked with.
421421+#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
422422+pub enum TagKind {
423423+ /// A [`PlainTag`].
424424+ Plain,
425425+426426+ /// A [`KeyValueTag`].
427427+ KeyValue,
428428+429429+ /// A [`MultipartTag`].
430430+ Multipart,
431431+432432+ /// A type of [`Tag`] not otherwise known.
433433+ Other,
434434+}
435435+436436+/// A trait to implement on types that _carry_ [`Tag`]s.
437437+///
438438+/// This trait is generic over the tag type, to permit implementing
439439+/// it for multiple types of tags.
440440+pub trait Tagged<T: Tag> {
441441+ /// The type of iterator used to provide the [`Tag`]s.
442442+ ///
443443+ /// The lifetime bounds indicate that the tagged type and the
444444+ /// tags it produces need to outlive the references to those tags
445445+ /// returned by the tag iterator.
446446+ type TagIter<'item>: Iterator<Item = &'item T>
447447+ where
448448+ Self: 'item,
449449+ T: 'item;
450450+451451+ /// Get if the tagged type has tags.
452452+ ///
453453+ /// This is included in the API because there's not a good way to get
454454+ /// the number of elements out of an iterator without using `count()`,
455455+ /// which consumes the iterator.
456456+ ///
457457+ /// `size_hint` unfortunately is `None` for the upper bound by default,
458458+ /// so it is frequently not useful.
459459+ fn has_tags(&self) -> bool;
460460+461461+ /// Get the tags of the tagged type.
462462+ fn get_tags(&self) -> Self::TagIter<'_>;
463463+}
+185
src/test.rs
···11+//! Tests for the crate's APIs.
22+33+use crate::label::Label;
44+use crate::parse::*;
55+use crate::storage::Storage;
66+use crate::tag::KeyValueSep;
77+use crate::tag::PathSep;
88+use crate::tag::Tag;
99+use crate::tag::TagKind;
1010+use crate::TagManager;
1111+use anyhow::anyhow as err;
1212+use anyhow::Result;
1313+use string_interner::Symbol;
1414+1515+// Helper function to test that a tag that's parsed and then resolved
1616+// back into a string results in the same string that was originally
1717+// put into the manager.
1818+fn test_roundtrip<L, S, T, P>(manager: &TagManager<L, S, T, P>, input: &str) -> Result<()>
1919+where
2020+ L: Label,
2121+ S: Symbol,
2222+ T: Tag<Label = L, Symbol = S>,
2323+ P: Parser<Tag = T> + Send + Sync,
2424+{
2525+ let tag = manager.parse_tag(input)?;
2626+ let output = manager.resolve_tag(&tag)?;
2727+ assert_eq!(input, output);
2828+ Ok(())
2929+}
3030+3131+#[test]
3232+fn roundtrip_plain_tag() -> Result<()> {
3333+ let manager = TagManager::builder()
3434+ .parser(Plain::new())
3535+ .storage(Storage::default())
3636+ .build();
3737+3838+ test_roundtrip(&manager, "hello")
3939+}
4040+4141+#[test]
4242+fn transform_tag() -> Result<()> {
4343+ let parser = Trim(
4444+ TrimBounds::Both,
4545+ ChangeCase(Case::Snake, KeyValue::new(KvPolicy::NoAmbiguousSep)),
4646+ );
4747+4848+ let manager = TagManager::builder()
4949+ .parser(parser)
5050+ .storage(Storage::default())
5151+ .key_value_separator(KeyValueSep("/"))
5252+ .build();
5353+5454+ let tag = manager.parse_tag(" \t HELLO_WORLD/GOODBYE_WORLD ")?;
5555+ let interner = manager.storage().lock()?;
5656+ let (key, value) = tag.resolve_key_value(&interner)?;
5757+5858+ assert_eq!(key, "hello_world");
5959+ assert_eq!(value, "goodbye_world");
6060+6161+ Ok(())
6262+}
6363+6464+#[test]
6565+fn roundtrip_key_value_tag_unambiguous() -> Result<()> {
6666+ let manager = TagManager::builder()
6767+ .parser(KeyValue::new(KvPolicy::NoAmbiguousSep))
6868+ .storage(Storage::default())
6969+ .build();
7070+7171+ test_roundtrip(&manager, "hello:world")
7272+}
7373+7474+#[test]
7575+fn key_part_key_value_tag_unambiguous() -> Result<()> {
7676+ let manager = TagManager::builder()
7777+ .parser(KeyValue::new(KvPolicy::NoAmbiguousSep))
7878+ .storage(Storage::default())
7979+ .build();
8080+8181+ let input = "hello:world";
8282+ let tag = manager.parse_tag(input)?;
8383+ let lock = manager.storage().lock()?;
8484+ let (key, value) = tag.resolve_key_value(&lock)?;
8585+ assert_eq!(key, "hello");
8686+ assert_eq!(value, "world");
8787+8888+ Ok(())
8989+}
9090+9191+#[test]
9292+fn roundtrip_key_value_tag_split_first() -> Result<()> {
9393+ let manager = TagManager::builder()
9494+ .parser(KeyValue::new(KvPolicy::SplitOnFirstSep))
9595+ .storage(Storage::default())
9696+ .build();
9797+9898+ test_roundtrip(&manager, "hello:world")
9999+}
100100+101101+#[test]
102102+fn key_part_key_value_tag_split_first() -> Result<()> {
103103+ let manager = TagManager::builder()
104104+ .parser(KeyValue::new(KvPolicy::SplitOnFirstSep))
105105+ .storage(Storage::default())
106106+ .build();
107107+108108+ let input = "hello:world:today";
109109+ let tag = manager.parse_tag(input)?;
110110+ let lock = manager.storage().lock()?;
111111+ let (key, value) = tag.resolve_key_value(&lock)?;
112112+ assert_eq!(key, "hello");
113113+ assert_eq!(value, "world:today");
114114+ Ok(())
115115+}
116116+117117+#[test]
118118+fn roundtrip_key_value_tag_split_last() -> Result<()> {
119119+ let manager = TagManager::builder()
120120+ .parser(KeyValue::new(KvPolicy::SplitOnLastSep))
121121+ .storage(Storage::default())
122122+ .build();
123123+124124+ test_roundtrip(&manager, "hello:world")
125125+}
126126+127127+#[test]
128128+fn key_part_key_value_tag_split_last() -> Result<()> {
129129+ let manager = TagManager::builder()
130130+ .parser(KeyValue::new(KvPolicy::SplitOnLastSep))
131131+ .storage(Storage::default())
132132+ .build();
133133+134134+ let input = "hello:world:today";
135135+ let tag = manager.parse_tag(input)?;
136136+ let lock = manager.storage().lock()?;
137137+ let (key, value) = tag.resolve_key_value(&lock)?;
138138+ assert_eq!(key, "hello:world");
139139+ assert_eq!(value, "today");
140140+ Ok(())
141141+}
142142+143143+#[test]
144144+fn roundtrip_multipart_tag() -> Result<()> {
145145+ let manager = TagManager::builder()
146146+ .parser(Multipart::new(MultipartPolicy::RequireMultipart))
147147+ .storage(Storage::default())
148148+ .path_separator(PathSep(":"))
149149+ .build();
150150+151151+ test_roundtrip(&manager, "hello:world:today:its:me")
152152+}
153153+154154+#[test]
155155+fn complex_parser() -> Result<()> {
156156+ let manager = TagManager::builder()
157157+ .parser(Trim(
158158+ TrimBounds::Both,
159159+ ChangeCase(
160160+ Case::Snake,
161161+ Or(
162162+ Multipart::new(MultipartPolicy::RequireMultipart),
163163+ Or(KeyValue::new(KvPolicy::NoAmbiguousSep), Plain::new()),
164164+ ),
165165+ ),
166166+ ))
167167+ .storage(Storage::default())
168168+ .build();
169169+170170+ let tags: Vec<_> =
171171+ manager.parse_tags_into_with_kind(["lotr/legolas/friends", "score:5", "rustlang"]);
172172+173173+ let mut iter = tags.into_iter();
174174+ let (t1, t2, t3) = (
175175+ iter.next().ok_or(err!("nothing"))??,
176176+ iter.next().ok_or(err!("nothing"))??,
177177+ iter.next().ok_or(err!("nothing"))??,
178178+ );
179179+180180+ assert_eq!(t1.1, TagKind::Multipart);
181181+ assert_eq!(t2.1, TagKind::KeyValue);
182182+ assert_eq!(t3.1, TagKind::Plain);
183183+184184+ Ok(())
185185+}
+191
tests/blog.rs
···11+22+pub mod blog {
33+ use anyhow::Result;
44+ use std::iter::once as iter_once;
55+ use std::iter::Once as OnceIter;
66+ use std::ops::Not as _;
77+ use std::result::Result as StdResult;
88+ use std::slice::Iter as SliceIter;
99+ use string_interner::DefaultSymbol;
1010+ use tagbuddy::generate_label;
1111+ use tagbuddy::parse::*;
1212+ use tagbuddy::storage::Storage;
1313+ use tagbuddy::tag::KeyValueTag;
1414+ use tagbuddy::tag::PlainTag;
1515+ use tagbuddy::tag::Tagged;
1616+ use tagbuddy::TagManager;
1717+1818+ generate_label! {
1919+ pub Tags {}
2020+ pub Ratings {}
2121+ }
2222+2323+ type PostTagsManager = TagManager<Tags, DefaultSymbol, PlainTag<Tags>, Plain<Tags>>;
2424+ type PostRatingsManager =
2525+ TagManager<Ratings, DefaultSymbol, KeyValueTag<Ratings>, KeyValue<Ratings>>;
2626+2727+ pub struct Blog {
2828+ posts: Vec<BlogPost>,
2929+ tag_manager: PostTagsManager,
3030+ rating_manager: PostRatingsManager,
3131+ }
3232+3333+ impl Blog {
3434+ /// Initialize a new blog.
3535+ pub fn new() -> Self {
3636+ let tag_manager = TagManager::builder()
3737+ .parser(Plain::new())
3838+ .storage(Storage::<Tags>::fresh())
3939+ .build();
4040+4141+ let rating_manager = TagManager::builder()
4242+ .parser(KeyValue::new(KvPolicy::NoAmbiguousSep))
4343+ .storage(tag_manager.storage().shallow_clone::<Ratings>())
4444+ .build();
4545+4646+ Self {
4747+ posts: Vec::new(),
4848+ tag_manager,
4949+ rating_manager,
5050+ }
5151+ }
5252+5353+ /// Add a new post to the blog.
5454+ pub fn add_post(
5555+ &mut self,
5656+ title: &str,
5757+ content: &str,
5858+ tags: &[&str],
5959+ rating: &str,
6060+ ) -> Result<&mut Self> {
6161+ let title = title.to_owned();
6262+ let content = content.to_owned();
6363+6464+ let tags = self
6565+ .tag_manager
6666+ .parse_tags_into::<StdResult<_, _>>(tags.into_iter().map(|t| *t))?;
6767+6868+ let rating = self.rating_manager.parse_tag(rating)?;
6969+7070+ self.posts.push(BlogPost {
7171+ title,
7272+ content,
7373+ tags,
7474+ rating,
7575+ });
7676+7777+ Ok(self)
7878+ }
7979+8080+ /// Get the posts in the blog.
8181+ pub fn posts(&self) -> impl Iterator<Item = &BlogPost> {
8282+ self.posts.iter()
8383+ }
8484+ }
8585+8686+ /// A single post on the blog.
8787+ pub struct BlogPost {
8888+ /// The title of the post.
8989+ #[allow(unused)]
9090+ title: String,
9191+9292+ /// The content of the post.
9393+ #[allow(unused)]
9494+ content: String,
9595+9696+ /// The tags associated with the post.
9797+ tags: Vec<PlainTag<Tags>>,
9898+9999+ /// The rating assigned to the post.
100100+ rating: KeyValueTag<Ratings>,
101101+ }
102102+103103+ impl BlogPost {
104104+ /// Get the tags applied to a blog post.
105105+ pub fn tags(&self, blog: &Blog) -> Vec<String> {
106106+ // SAFETY: We know we're using the correct storage, so the tag data should always be valid.
107107+ blog.tag_manager
108108+ .resolve_tags_into::<StdResult<_, _>>(Tagged::<PlainTag<Tags>>::get_tags(self))
109109+ .expect("tags should always resolve successfully")
110110+ }
111111+112112+ /// Get the rating of a blog post.
113113+ pub fn rating(&self, blog: &Blog) -> String {
114114+ // SAFETY: We know we're using the correct storage, so the rating data should always be valid.
115115+ blog.rating_manager
116116+ .resolve_tags_into::<StdResult<_, _>>(Tagged::<KeyValueTag<Ratings>>::get_tags(
117117+ self,
118118+ ))
119119+ .expect("ratings should always resolve successfully")
120120+ }
121121+ }
122122+123123+ // Mark a blog post as being tagged with tags.
124124+ impl Tagged<PlainTag<Tags>> for BlogPost {
125125+ type TagIter<'iter> = SliceIter<'iter, PlainTag<Tags>>;
126126+127127+ fn has_tags(&self) -> bool {
128128+ self.tags.is_empty().not()
129129+ }
130130+131131+ fn get_tags(&self) -> Self::TagIter<'_> {
132132+ self.tags.iter()
133133+ }
134134+ }
135135+136136+ // Mark a blog post as being tagged with a rating.
137137+ impl Tagged<KeyValueTag<Ratings>> for BlogPost {
138138+ type TagIter<'iter> = OnceIter<&'iter KeyValueTag<Ratings>>;
139139+140140+ fn has_tags(&self) -> bool {
141141+ true
142142+ }
143143+144144+ fn get_tags(&self) -> Self::TagIter<'_> {
145145+ iter_once(&self.rating)
146146+ }
147147+ }
148148+}
149149+150150+use crate::blog::Blog;
151151+use anyhow::Result;
152152+153153+#[test]
154154+fn blog_can_handle_tags_and_rating() -> Result<()> {
155155+ let mut blog = Blog::new();
156156+157157+ blog.add_post("one", "1", &["hello", "my", "friend"], "score:1")?
158158+ .add_post("two", "2", &["goodbye", "your", "enemy"], "score:2")?
159159+ .add_post(
160160+ "three",
161161+ "3",
162162+ &["see you soon", "our", "acquaintance"],
163163+ "score:3",
164164+ )?;
165165+166166+ assert_eq!(
167167+ blog.posts()
168168+ .flat_map(|post| post.tags(&blog))
169169+ .collect::<Vec<_>>(),
170170+ vec![
171171+ "hello",
172172+ "my",
173173+ "friend",
174174+ "goodbye",
175175+ "your",
176176+ "enemy",
177177+ "see you soon",
178178+ "our",
179179+ "acquaintance",
180180+ ]
181181+ );
182182+183183+ assert_eq!(
184184+ blog.posts()
185185+ .map(|post| post.rating(&blog))
186186+ .collect::<Vec<_>>(),
187187+ vec!["score:1", "score:2", "score:3"]
188188+ );
189189+190190+ Ok(())
191191+}