Lightweight tagged data library.
at main 258 lines 8.9 kB view raw
1//! Produce and resolve tags. 2 3use crate::error::ResolveError; 4use crate::label::DefaultLabel; 5use crate::label::Label; 6use crate::parse::*; 7use crate::storage::Storage; 8use crate::tag::KeyValueSep; 9#[cfg(doc)] 10use crate::tag::KeyValueTag; 11#[cfg(doc)] 12use crate::tag::MultipartTag; 13use crate::tag::PathSep; 14use crate::tag::PlainTag; 15use crate::tag::Tag; 16use crate::{error::ParseError, tag::TagKind}; 17#[cfg(doc)] 18use std::sync::Mutex; 19use std::{convert::identity, hash::BuildHasher}; 20use string_interner::backend::Backend as InternerBackend; 21use string_interner::DefaultBackend; 22use string_interner::DefaultHashBuilder; 23use string_interner::DefaultSymbol; 24#[cfg(doc)] 25use string_interner::StringInterner; 26use string_interner::Symbol; 27use typed_builder::TypedBuilder; 28 29/// Constructs [`Tag`]s according to the configured parser and interner. 30/// 31/// A single [`TagManager`] is responsible for parsing and resolving tags that 32/// match the rules of a single configured parser, with storage handled by 33/// an underlying [`StringInterner`]. The [`StringInterner`] may be shared 34/// with other [`TagManager`]s. 35/// 36/// [`TagManager`] is designed to be generic over: 37/// 38/// - The parser used to produce tags. 39/// - The interner used to store tag data. 40/// 41/// The trait bounds on [`TagManager`] ensure that the parser and interner 42/// agree on the [`Symbol`] used as handles for the stored string data. 43/// This is required because the parser produces [`Tag`]s which store 44/// [`Symbol`]s so they can later be resolved into [`String`]s to recover 45/// the full originally-input tag data. 46/// 47/// The manner in which tag data is stored depends on the `T` parameter. 48/// [`PlainTag`] stores the full string data in the interner. [`KeyValueTag`] 49/// stores the key and value data separately, on the expectation that keys 50/// especially will be repeated, and thus a lot of space saving is achieved by 51/// deduplicating them through separate interning. [`MultipartTag`] stores 52/// each part separately, again on the expectation that individual parts will 53/// be frequently repeated across tags, resulting in space savings from interning. 54#[derive(TypedBuilder)] 55pub struct TagManager< 56 L = DefaultLabel, 57 S = DefaultSymbol, 58 T = PlainTag<L, S>, 59 P = Plain<L, S>, 60 B = DefaultBackend<S>, 61 H = DefaultHashBuilder, 62> where 63 L: Label, 64 S: Symbol, 65 T: Tag<Label = L, Symbol = S>, 66 P: Parser<Tag = T> + Send + Sync, 67 B: InternerBackend<Symbol = S>, 68 H: BuildHasher, 69{ 70 /// Defines how key-value tags are parsed, if key-value tags are permitted. 71 pub(crate) parser: P, 72 73 /// The separator used for separating key and values in key-value tags. 74 #[builder(default)] 75 pub(crate) key_value_separator: KeyValueSep, 76 77 /// The separator used for separating parts in multipart tags. 78 #[builder(default)] 79 pub(crate) path_separator: PathSep, 80 81 /// Interns and stores string data for tags, to reduce memory usage. 82 pub(crate) storage: Storage<L, B, H>, 83} 84 85// These `Send` and `Sync` impls are safe _because_: 86// 87// 1. `key_value_separator` and `path_separator` are just read-only string slices, so they are 88// trivially `Send` and `Sync`. 89// 2. `parser` is constrained to be `Send` and `Sync`, either trivially-so, or by being wrapped 90// in an `Arc<Mutex<_>>` (in which case it takes advantage of an auto-impl for `Parser` 91// that tries to lock the parser before parsing can proceed). 92// 3. `storage` is _always_ wrapped in an `Arc<Mutex<_>>`, so it is always `Send` and `Sync`. 93// 94// Given the above, `TagManager` is _always_ safe to send and sync, and can implement these traits. 95 96unsafe impl<L, S, T, P, B, H> Send for TagManager<L, S, T, P, B, H> 97where 98 L: Label, 99 S: Symbol, 100 T: Tag<Label = L, Symbol = S>, 101 P: Parser<Tag = T> + Send + Sync, 102 B: InternerBackend<Symbol = S>, 103 H: BuildHasher, 104{ 105} 106 107unsafe impl<L, S, T, P, B, H> Sync for TagManager<L, S, T, P, B, H> 108where 109 L: Label, 110 S: Symbol, 111 T: Tag<Label = L, Symbol = S>, 112 P: Parser<Tag = T> + Send + Sync, 113 B: InternerBackend<Symbol = S>, 114 H: BuildHasher, 115{ 116} 117 118impl< 119 L: Label, 120 S: Symbol, 121 T: Tag<Label = L, Symbol = S>, 122 P: Parser<Tag = T> + Send + Sync, 123 B: InternerBackend<Symbol = S>, 124 H: BuildHasher, 125 > TagManager<L, S, T, P, B, H> 126{ 127 /// Attempt to parse a structured tag from the provided "raw" tag. 128 /// 129 /// This may fail if the tag is empty, or if it violates the configured [`Parser`]'s rules. 130 pub fn parse_tag(&self, raw: &str) -> Result<P::Tag, ParseError> { 131 self.parser.parse( 132 &mut self.storage.lock()?, 133 self.key_value_separator, 134 self.path_separator, 135 raw, 136 ) 137 } 138 139 /// Parse tags into a collection of your choosing. 140 /// 141 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 142 /// storage before starting to parse _any_ tags, and holds it for the duration. 143 pub fn parse_tags_into<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C 144 where 145 C: FromIterator<Result<P::Tag, ParseError>>, 146 { 147 self.parse_tags_into_with(src, identity) 148 } 149 150 /// Parse tags into a collection of your choosing. 151 /// 152 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 153 /// storage before starting to parse _any_ tags, and holds it for the duration. 154 pub fn parse_tags_into_with_kind<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C 155 where 156 C: FromIterator<Result<(P::Tag, TagKind), ParseError>>, 157 { 158 self.parse_tags_into_with(src, |t| { 159 let kind = t.kind(); 160 (t, kind) 161 }) 162 } 163 164 /// Parse tags into a collection of your choosing. 165 /// 166 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the 167 /// storage before starting to parse _any_ tags, and holds it for the duration. 168 pub fn parse_tags_into_with<'raw, O, C>( 169 &self, 170 src: impl IntoIterator<Item = &'raw str>, 171 f: impl FnOnce(P::Tag) -> O + Copy, 172 ) -> C 173 where 174 C: FromIterator<Result<O, ParseError>>, 175 { 176 src.into_iter() 177 .map(move |raw| { 178 self.parser 179 .parse( 180 &mut self.storage.lock()?, 181 self.key_value_separator, 182 self.path_separator, 183 raw, 184 ) 185 .map(f) 186 }) 187 .collect() 188 } 189 190 /// Get a string representation of a [`Tag`]. 191 /// 192 /// Note that this may fail to resolve a tag if the tag wasn't interned 193 /// in the current [`TagManager`]. It may alternatively resolve an incorrect tag. 194 pub fn resolve_tag(&self, tag: &P::Tag) -> Result<String, ResolveError> { 195 tag.resolve( 196 &self.storage.lock()?, 197 self.key_value_separator, 198 self.path_separator, 199 ) 200 } 201 202 /// Get the string representation of a set of [`Tag`]s. 203 /// 204 /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock 205 /// before beginning iteration, and holds it for the duration. 206 pub fn resolve_tags_into<'tag, C>(&self, src: impl IntoIterator<Item = &'tag P::Tag>) -> C 207 where 208 P::Tag: 'tag, 209 C: FromIterator<Result<String, ResolveError>>, 210 { 211 self.resolve_tags_into_with(src, identity) 212 } 213 214 /// Get the string representation of a set of [`Tag`]s. 215 /// 216 /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock 217 /// before beginning iteration, and holds it for the duration. 218 pub fn resolve_tags_into_with<'tag, O, C>( 219 &self, 220 src: impl IntoIterator<Item = &'tag P::Tag>, 221 f: impl FnOnce(String) -> O + Copy, 222 ) -> C 223 where 224 P::Tag: 'tag, 225 C: FromIterator<Result<O, ResolveError>>, 226 { 227 src.into_iter() 228 .map(move |tag| { 229 tag.resolve( 230 &self.storage.lock()?, 231 self.key_value_separator, 232 self.path_separator, 233 ) 234 .map(f) 235 }) 236 .collect() 237 } 238 239 /// Get the inner [`Storage`] of the [`TagManager`]. 240 pub fn storage(&self) -> &Storage<L, B, H> { 241 &self.storage 242 } 243 244 /// Get the [`Parser`] applied by the [`TagManager`]. 245 pub fn parser(&self) -> &P { 246 &self.parser 247 } 248 249 /// Get the key-value separator (default `":"`) used by the [`TagManager`] for [`KeyValueTag`]s. 250 pub fn key_value_separator(&self) -> KeyValueSep { 251 self.key_value_separator 252 } 253 254 /// Get the path separator (default `"/"`) used by the [`TagManager`] for [`MultipartTag`]s. 255 pub fn path_separator(&self) -> PathSep { 256 self.path_separator 257 } 258}