just playing with tangled
1//! Portable, stable hashing suitable for identifying values
2
3use blake2::Blake2b512;
4// Re-export DigestUpdate so that the ContentHash proc macro can be used in
5// external crates without directly depending on the digest crate.
6pub use digest::Update as DigestUpdate;
7use itertools::Itertools as _;
8pub use jj_lib_proc_macros::ContentHash;
9
10/// Portable, stable hashing suitable for identifying values
11///
12/// Variable-length sequences should hash a 64-bit little-endian representation
13/// of their length, then their elements in order. Unordered containers should
14/// order their elements according to their `Ord` implementation. Enums should
15/// hash a 32-bit little-endian encoding of the ordinal number of the enum
16/// variant, then the variant's fields in lexical order.
17///
18/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
19pub trait ContentHash {
20 /// Update the hasher state with this object's content
21 fn hash(&self, state: &mut impl DigestUpdate);
22}
23
24/// The 512-bit BLAKE2b content hash
25pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
26 use digest::Digest as _;
27 let mut hasher = Blake2b512::default();
28 x.hash(&mut hasher);
29 hasher.finalize()
30}
31
32impl ContentHash for () {
33 fn hash(&self, _: &mut impl DigestUpdate) {}
34}
35
36impl ContentHash for bool {
37 fn hash(&self, state: &mut impl DigestUpdate) {
38 u8::from(*self).hash(state);
39 }
40}
41
42impl ContentHash for u8 {
43 fn hash(&self, state: &mut impl DigestUpdate) {
44 state.update(&[*self]);
45 }
46}
47
48impl ContentHash for u32 {
49 fn hash(&self, state: &mut impl DigestUpdate) {
50 state.update(&self.to_le_bytes());
51 }
52}
53
54impl ContentHash for i32 {
55 fn hash(&self, state: &mut impl DigestUpdate) {
56 state.update(&self.to_le_bytes());
57 }
58}
59
60impl ContentHash for u64 {
61 fn hash(&self, state: &mut impl DigestUpdate) {
62 state.update(&self.to_le_bytes());
63 }
64}
65
66impl ContentHash for i64 {
67 fn hash(&self, state: &mut impl DigestUpdate) {
68 state.update(&self.to_le_bytes());
69 }
70}
71
72// TODO: Specialize for [u8] once specialization exists
73impl<T: ContentHash> ContentHash for [T] {
74 fn hash(&self, state: &mut impl DigestUpdate) {
75 state.update(&(self.len() as u64).to_le_bytes());
76 for x in self {
77 x.hash(state);
78 }
79 }
80}
81
82impl<T: ContentHash> ContentHash for Vec<T> {
83 fn hash(&self, state: &mut impl DigestUpdate) {
84 self.as_slice().hash(state);
85 }
86}
87
88impl ContentHash for str {
89 fn hash(&self, state: &mut impl DigestUpdate) {
90 self.as_bytes().hash(state);
91 }
92}
93
94impl ContentHash for String {
95 fn hash(&self, state: &mut impl DigestUpdate) {
96 self.as_str().hash(state);
97 }
98}
99
100impl<T: ContentHash> ContentHash for Option<T> {
101 fn hash(&self, state: &mut impl DigestUpdate) {
102 match self {
103 None => state.update(&0u32.to_le_bytes()),
104 Some(x) => {
105 state.update(&1u32.to_le_bytes());
106 x.hash(state);
107 }
108 }
109 }
110}
111
112impl<K, V> ContentHash for std::collections::HashMap<K, V>
113where
114 K: ContentHash + Ord,
115 V: ContentHash,
116{
117 fn hash(&self, state: &mut impl DigestUpdate) {
118 state.update(&(self.len() as u64).to_le_bytes());
119 let mut kv = self.iter().collect_vec();
120 kv.sort_unstable_by_key(|&(k, _)| k);
121 for (k, v) in kv {
122 k.hash(state);
123 v.hash(state);
124 }
125 }
126}
127
128impl<K> ContentHash for std::collections::HashSet<K>
129where
130 K: ContentHash + Ord,
131{
132 fn hash(&self, state: &mut impl DigestUpdate) {
133 state.update(&(self.len() as u64).to_le_bytes());
134 for k in self.iter().sorted() {
135 k.hash(state);
136 }
137 }
138}
139
140impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
141where
142 K: ContentHash,
143 V: ContentHash,
144{
145 fn hash(&self, state: &mut impl DigestUpdate) {
146 state.update(&(self.len() as u64).to_le_bytes());
147 for (k, v) in self {
148 k.hash(state);
149 v.hash(state);
150 }
151 }
152}
153
154#[cfg(test)]
155mod tests {
156 use std::collections::BTreeMap;
157 use std::collections::HashMap;
158
159 use super::*;
160
161 #[test]
162 fn test_string_sanity() {
163 let a = "a".to_string();
164 let b = "b".to_string();
165 assert_eq!(hash(&a), hash(&a.clone()));
166 assert_ne!(hash(&a), hash(&b));
167 assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string()));
168 }
169
170 #[test]
171 fn test_hash_map_key_value_distinction() {
172 let a = [("ab".to_string(), "cd".to_string())]
173 .into_iter()
174 .collect::<HashMap<_, _>>();
175 let b = [("a".to_string(), "bcd".to_string())]
176 .into_iter()
177 .collect::<HashMap<_, _>>();
178
179 assert_ne!(hash(&a), hash(&b));
180 }
181
182 #[test]
183 fn test_btree_map_key_value_distinction() {
184 let a = [("ab".to_string(), "cd".to_string())]
185 .into_iter()
186 .collect::<BTreeMap<_, _>>();
187 let b = [("a".to_string(), "bcd".to_string())]
188 .into_iter()
189 .collect::<BTreeMap<_, _>>();
190
191 assert_ne!(hash(&a), hash(&b));
192 }
193
194 #[test]
195 fn test_struct_sanity() {
196 #[derive(ContentHash)]
197 struct Foo {
198 x: i32,
199 }
200 assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 }));
201 }
202
203 #[test]
204 fn test_option_sanity() {
205 assert_ne!(hash(&Some(42)), hash(&42));
206 assert_ne!(hash(&None::<i32>), hash(&42i32));
207 }
208
209 #[test]
210 fn test_slice_sanity() {
211 assert_ne!(hash(&[42i32][..]), hash(&[12i32][..]));
212 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..]));
213 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&()));
214 assert_ne!(hash(&42i32), hash(&[42i32][..]));
215 }
216
217 #[test]
218 fn test_consistent_hashing() {
219 #[derive(ContentHash)]
220 struct Foo {
221 x: Vec<Option<i32>>,
222 y: i64,
223 }
224 let foo_hash = hex::encode(hash(&Foo {
225 x: vec![None, Some(42)],
226 y: 17,
227 }));
228 insta::assert_snapshot!(
229 foo_hash,
230 @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4"
231 );
232
233 // Try again with an equivalent generic struct deriving ContentHash.
234 #[derive(ContentHash)]
235 struct GenericFoo<X, Y> {
236 x: X,
237 y: Y,
238 }
239 assert_eq!(
240 hex::encode(hash(&GenericFoo {
241 x: vec![None, Some(42)],
242 y: 17i64
243 })),
244 foo_hash
245 );
246 }
247
248 // Test that the derived version of `ContentHash` matches the that's
249 // manually implemented for `std::Option`.
250 #[test]
251 fn derive_for_enum() {
252 #[derive(ContentHash)]
253 enum MyOption<T> {
254 None,
255 Some(T),
256 }
257 assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None));
258 assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1)));
259 }
260
261 fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
262 blake2b_hash(x)
263 }
264}