Lightweight tagged data library.
1//! Produce and resolve tags.
2
3use crate::error::ResolveError;
4use crate::label::DefaultLabel;
5use crate::label::Label;
6use crate::parse::*;
7use crate::storage::Storage;
8use crate::tag::KeyValueSep;
9#[cfg(doc)]
10use crate::tag::KeyValueTag;
11#[cfg(doc)]
12use crate::tag::MultipartTag;
13use crate::tag::PathSep;
14use crate::tag::PlainTag;
15use crate::tag::Tag;
16use crate::{error::ParseError, tag::TagKind};
17#[cfg(doc)]
18use std::sync::Mutex;
19use std::{convert::identity, hash::BuildHasher};
20use string_interner::backend::Backend as InternerBackend;
21use string_interner::DefaultBackend;
22use string_interner::DefaultHashBuilder;
23use string_interner::DefaultSymbol;
24#[cfg(doc)]
25use string_interner::StringInterner;
26use string_interner::Symbol;
27use typed_builder::TypedBuilder;
28
29/// Constructs [`Tag`]s according to the configured parser and interner.
30///
31/// A single [`TagManager`] is responsible for parsing and resolving tags that
32/// match the rules of a single configured parser, with storage handled by
33/// an underlying [`StringInterner`]. The [`StringInterner`] may be shared
34/// with other [`TagManager`]s.
35///
36/// [`TagManager`] is designed to be generic over:
37///
38/// - The parser used to produce tags.
39/// - The interner used to store tag data.
40///
41/// The trait bounds on [`TagManager`] ensure that the parser and interner
42/// agree on the [`Symbol`] used as handles for the stored string data.
43/// This is required because the parser produces [`Tag`]s which store
44/// [`Symbol`]s so they can later be resolved into [`String`]s to recover
45/// the full originally-input tag data.
46///
47/// The manner in which tag data is stored depends on the `T` parameter.
48/// [`PlainTag`] stores the full string data in the interner. [`KeyValueTag`]
49/// stores the key and value data separately, on the expectation that keys
50/// especially will be repeated, and thus a lot of space saving is achieved by
51/// deduplicating them through separate interning. [`MultipartTag`] stores
52/// each part separately, again on the expectation that individual parts will
53/// be frequently repeated across tags, resulting in space savings from interning.
54#[derive(TypedBuilder)]
55pub struct TagManager<
56 L = DefaultLabel,
57 S = DefaultSymbol,
58 T = PlainTag<L, S>,
59 P = Plain<L, S>,
60 B = DefaultBackend<S>,
61 H = DefaultHashBuilder,
62> where
63 L: Label,
64 S: Symbol,
65 T: Tag<Label = L, Symbol = S>,
66 P: Parser<Tag = T> + Send + Sync,
67 B: InternerBackend<Symbol = S>,
68 H: BuildHasher,
69{
70 /// Defines how key-value tags are parsed, if key-value tags are permitted.
71 pub(crate) parser: P,
72
73 /// The separator used for separating key and values in key-value tags.
74 #[builder(default)]
75 pub(crate) key_value_separator: KeyValueSep,
76
77 /// The separator used for separating parts in multipart tags.
78 #[builder(default)]
79 pub(crate) path_separator: PathSep,
80
81 /// Interns and stores string data for tags, to reduce memory usage.
82 pub(crate) storage: Storage<L, B, H>,
83}
84
85// These `Send` and `Sync` impls are safe _because_:
86//
87// 1. `key_value_separator` and `path_separator` are just read-only string slices, so they are
88// trivially `Send` and `Sync`.
89// 2. `parser` is constrained to be `Send` and `Sync`, either trivially-so, or by being wrapped
90// in an `Arc<Mutex<_>>` (in which case it takes advantage of an auto-impl for `Parser`
91// that tries to lock the parser before parsing can proceed).
92// 3. `storage` is _always_ wrapped in an `Arc<Mutex<_>>`, so it is always `Send` and `Sync`.
93//
94// Given the above, `TagManager` is _always_ safe to send and sync, and can implement these traits.
95
96unsafe impl<L, S, T, P, B, H> Send for TagManager<L, S, T, P, B, H>
97where
98 L: Label,
99 S: Symbol,
100 T: Tag<Label = L, Symbol = S>,
101 P: Parser<Tag = T> + Send + Sync,
102 B: InternerBackend<Symbol = S>,
103 H: BuildHasher,
104{
105}
106
107unsafe impl<L, S, T, P, B, H> Sync for TagManager<L, S, T, P, B, H>
108where
109 L: Label,
110 S: Symbol,
111 T: Tag<Label = L, Symbol = S>,
112 P: Parser<Tag = T> + Send + Sync,
113 B: InternerBackend<Symbol = S>,
114 H: BuildHasher,
115{
116}
117
118impl<
119 L: Label,
120 S: Symbol,
121 T: Tag<Label = L, Symbol = S>,
122 P: Parser<Tag = T> + Send + Sync,
123 B: InternerBackend<Symbol = S>,
124 H: BuildHasher,
125 > TagManager<L, S, T, P, B, H>
126{
127 /// Attempt to parse a structured tag from the provided "raw" tag.
128 ///
129 /// This may fail if the tag is empty, or if it violates the configured [`Parser`]'s rules.
130 pub fn parse_tag(&self, raw: &str) -> Result<P::Tag, ParseError> {
131 self.parser.parse(
132 &mut self.storage.lock()?,
133 self.key_value_separator,
134 self.path_separator,
135 raw,
136 )
137 }
138
139 /// Parse tags into a collection of your choosing.
140 ///
141 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
142 /// storage before starting to parse _any_ tags, and holds it for the duration.
143 pub fn parse_tags_into<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C
144 where
145 C: FromIterator<Result<P::Tag, ParseError>>,
146 {
147 self.parse_tags_into_with(src, identity)
148 }
149
150 /// Parse tags into a collection of your choosing.
151 ///
152 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
153 /// storage before starting to parse _any_ tags, and holds it for the duration.
154 pub fn parse_tags_into_with_kind<'raw, C>(&self, src: impl IntoIterator<Item = &'raw str>) -> C
155 where
156 C: FromIterator<Result<(P::Tag, TagKind), ParseError>>,
157 {
158 self.parse_tags_into_with(src, |t| {
159 let kind = t.kind();
160 (t, kind)
161 })
162 }
163
164 /// Parse tags into a collection of your choosing.
165 ///
166 /// Note this can perform strictly better than `parse_tag`, because it takes the lock on the
167 /// storage before starting to parse _any_ tags, and holds it for the duration.
168 pub fn parse_tags_into_with<'raw, O, C>(
169 &self,
170 src: impl IntoIterator<Item = &'raw str>,
171 f: impl FnOnce(P::Tag) -> O + Copy,
172 ) -> C
173 where
174 C: FromIterator<Result<O, ParseError>>,
175 {
176 src.into_iter()
177 .map(move |raw| {
178 self.parser
179 .parse(
180 &mut self.storage.lock()?,
181 self.key_value_separator,
182 self.path_separator,
183 raw,
184 )
185 .map(f)
186 })
187 .collect()
188 }
189
190 /// Get a string representation of a [`Tag`].
191 ///
192 /// Note that this may fail to resolve a tag if the tag wasn't interned
193 /// in the current [`TagManager`]. It may alternatively resolve an incorrect tag.
194 pub fn resolve_tag(&self, tag: &P::Tag) -> Result<String, ResolveError> {
195 tag.resolve(
196 &self.storage.lock()?,
197 self.key_value_separator,
198 self.path_separator,
199 )
200 }
201
202 /// Get the string representation of a set of [`Tag`]s.
203 ///
204 /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock
205 /// before beginning iteration, and holds it for the duration.
206 pub fn resolve_tags_into<'tag, C>(&self, src: impl IntoIterator<Item = &'tag P::Tag>) -> C
207 where
208 P::Tag: 'tag,
209 C: FromIterator<Result<String, ResolveError>>,
210 {
211 self.resolve_tags_into_with(src, identity)
212 }
213
214 /// Get the string representation of a set of [`Tag`]s.
215 ///
216 /// Note this can perform strictly better than `resolve_tag` because it takes the storage lock
217 /// before beginning iteration, and holds it for the duration.
218 pub fn resolve_tags_into_with<'tag, O, C>(
219 &self,
220 src: impl IntoIterator<Item = &'tag P::Tag>,
221 f: impl FnOnce(String) -> O + Copy,
222 ) -> C
223 where
224 P::Tag: 'tag,
225 C: FromIterator<Result<O, ResolveError>>,
226 {
227 src.into_iter()
228 .map(move |tag| {
229 tag.resolve(
230 &self.storage.lock()?,
231 self.key_value_separator,
232 self.path_separator,
233 )
234 .map(f)
235 })
236 .collect()
237 }
238
239 /// Get the inner [`Storage`] of the [`TagManager`].
240 pub fn storage(&self) -> &Storage<L, B, H> {
241 &self.storage
242 }
243
244 /// Get the [`Parser`] applied by the [`TagManager`].
245 pub fn parser(&self) -> &P {
246 &self.parser
247 }
248
249 /// Get the key-value separator (default `":"`) used by the [`TagManager`] for [`KeyValueTag`]s.
250 pub fn key_value_separator(&self) -> KeyValueSep {
251 self.key_value_separator
252 }
253
254 /// Get the path separator (default `"/"`) used by the [`TagManager`] for [`MultipartTag`]s.
255 pub fn path_separator(&self) -> PathSep {
256 self.path_separator
257 }
258}