just playing with tangled
1//! Portable, stable hashing suitable for identifying values
2
3use blake2::Blake2b512;
4// Re-export DigestUpdate so that the ContentHash proc macro can be used in
5// external crates without directly depending on the digest crate.
6pub use digest::Update as DigestUpdate;
7use itertools::Itertools as _;
8pub use jj_lib_proc_macros::ContentHash;
9
10/// Portable, stable hashing suitable for identifying values
11///
12/// Variable-length sequences should hash a 64-bit little-endian representation
13/// of their length, then their elements in order. Unordered containers should
14/// order their elements according to their `Ord` implementation. Enums should
15/// hash a 32-bit little-endian encoding of the ordinal number of the enum
16/// variant, then the variant's fields in lexical order.
17///
18/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`.
19pub trait ContentHash {
20 /// Update the hasher state with this object's content
21 fn hash(&self, state: &mut impl DigestUpdate);
22}
23
24/// The 512-bit BLAKE2b content hash
25pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
26 use digest::Digest as _;
27 let mut hasher = Blake2b512::default();
28 x.hash(&mut hasher);
29 hasher.finalize()
30}
31
32impl ContentHash for () {
33 fn hash(&self, _: &mut impl DigestUpdate) {}
34}
35
36impl ContentHash for bool {
37 fn hash(&self, state: &mut impl DigestUpdate) {
38 u8::from(*self).hash(state);
39 }
40}
41
42impl ContentHash for u8 {
43 fn hash(&self, state: &mut impl DigestUpdate) {
44 state.update(&[*self]);
45 }
46}
47
48impl ContentHash for u32 {
49 fn hash(&self, state: &mut impl DigestUpdate) {
50 state.update(&self.to_le_bytes());
51 }
52}
53
54impl ContentHash for i32 {
55 fn hash(&self, state: &mut impl DigestUpdate) {
56 state.update(&self.to_le_bytes());
57 }
58}
59
60impl ContentHash for u64 {
61 fn hash(&self, state: &mut impl DigestUpdate) {
62 state.update(&self.to_le_bytes());
63 }
64}
65
66impl ContentHash for i64 {
67 fn hash(&self, state: &mut impl DigestUpdate) {
68 state.update(&self.to_le_bytes());
69 }
70}
71
72// TODO: Specialize for [u8] once specialization exists
73impl<T: ContentHash> ContentHash for [T] {
74 fn hash(&self, state: &mut impl DigestUpdate) {
75 state.update(&(self.len() as u64).to_le_bytes());
76 for x in self {
77 x.hash(state);
78 }
79 }
80}
81
82impl<T: ContentHash> ContentHash for Vec<T> {
83 fn hash(&self, state: &mut impl DigestUpdate) {
84 self.as_slice().hash(state);
85 }
86}
87
88impl ContentHash for String {
89 fn hash(&self, state: &mut impl DigestUpdate) {
90 self.as_bytes().hash(state);
91 }
92}
93
94impl<T: ContentHash> ContentHash for Option<T> {
95 fn hash(&self, state: &mut impl DigestUpdate) {
96 match self {
97 None => state.update(&0u32.to_le_bytes()),
98 Some(x) => {
99 state.update(&1u32.to_le_bytes());
100 x.hash(state);
101 }
102 }
103 }
104}
105
106impl<K, V> ContentHash for std::collections::HashMap<K, V>
107where
108 K: ContentHash + Ord,
109 V: ContentHash,
110{
111 fn hash(&self, state: &mut impl DigestUpdate) {
112 state.update(&(self.len() as u64).to_le_bytes());
113 let mut kv = self.iter().collect_vec();
114 kv.sort_unstable_by_key(|&(k, _)| k);
115 for (k, v) in kv {
116 k.hash(state);
117 v.hash(state);
118 }
119 }
120}
121
122impl<K> ContentHash for std::collections::HashSet<K>
123where
124 K: ContentHash + Ord,
125{
126 fn hash(&self, state: &mut impl DigestUpdate) {
127 state.update(&(self.len() as u64).to_le_bytes());
128 for k in self.iter().sorted() {
129 k.hash(state);
130 }
131 }
132}
133
134impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
135where
136 K: ContentHash,
137 V: ContentHash,
138{
139 fn hash(&self, state: &mut impl DigestUpdate) {
140 state.update(&(self.len() as u64).to_le_bytes());
141 for (k, v) in self {
142 k.hash(state);
143 v.hash(state);
144 }
145 }
146}
147
148#[cfg(test)]
149mod tests {
150 use std::collections::BTreeMap;
151 use std::collections::HashMap;
152
153 use super::*;
154
155 #[test]
156 fn test_string_sanity() {
157 let a = "a".to_string();
158 let b = "b".to_string();
159 assert_eq!(hash(&a), hash(&a.clone()));
160 assert_ne!(hash(&a), hash(&b));
161 assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string()));
162 }
163
164 #[test]
165 fn test_hash_map_key_value_distinction() {
166 let a = [("ab".to_string(), "cd".to_string())]
167 .into_iter()
168 .collect::<HashMap<_, _>>();
169 let b = [("a".to_string(), "bcd".to_string())]
170 .into_iter()
171 .collect::<HashMap<_, _>>();
172
173 assert_ne!(hash(&a), hash(&b));
174 }
175
176 #[test]
177 fn test_btree_map_key_value_distinction() {
178 let a = [("ab".to_string(), "cd".to_string())]
179 .into_iter()
180 .collect::<BTreeMap<_, _>>();
181 let b = [("a".to_string(), "bcd".to_string())]
182 .into_iter()
183 .collect::<BTreeMap<_, _>>();
184
185 assert_ne!(hash(&a), hash(&b));
186 }
187
188 #[test]
189 fn test_struct_sanity() {
190 #[derive(ContentHash)]
191 struct Foo {
192 x: i32,
193 }
194 assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 }));
195 }
196
197 #[test]
198 fn test_option_sanity() {
199 assert_ne!(hash(&Some(42)), hash(&42));
200 assert_ne!(hash(&None::<i32>), hash(&42i32));
201 }
202
203 #[test]
204 fn test_slice_sanity() {
205 assert_ne!(hash(&[42i32][..]), hash(&[12i32][..]));
206 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..]));
207 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&()));
208 assert_ne!(hash(&42i32), hash(&[42i32][..]));
209 }
210
211 #[test]
212 fn test_consistent_hashing() {
213 #[derive(ContentHash)]
214 struct Foo {
215 x: Vec<Option<i32>>,
216 y: i64,
217 }
218 let foo_hash = hex::encode(hash(&Foo {
219 x: vec![None, Some(42)],
220 y: 17,
221 }));
222 insta::assert_snapshot!(
223 foo_hash,
224 @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4"
225 );
226
227 // Try again with an equivalent generic struct deriving ContentHash.
228 #[derive(ContentHash)]
229 struct GenericFoo<X, Y> {
230 x: X,
231 y: Y,
232 }
233 assert_eq!(
234 hex::encode(hash(&GenericFoo {
235 x: vec![None, Some(42)],
236 y: 17i64
237 })),
238 foo_hash
239 );
240 }
241
242 // Test that the derived version of `ContentHash` matches the that's
243 // manually implemented for `std::Option`.
244 #[test]
245 fn derive_for_enum() {
246 #[derive(ContentHash)]
247 enum MyOption<T> {
248 None,
249 Some(T),
250 }
251 assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None));
252 assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1)));
253 }
254
255 fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> {
256 blake2b_hash(x)
257 }
258}