just playing with tangled
at globpattern 258 lines 7.1 kB view raw
1//! Portable, stable hashing suitable for identifying values 2 3use blake2::Blake2b512; 4// Re-export DigestUpdate so that the ContentHash proc macro can be used in 5// external crates without directly depending on the digest crate. 6pub use digest::Update as DigestUpdate; 7use itertools::Itertools as _; 8pub use jj_lib_proc_macros::ContentHash; 9 10/// Portable, stable hashing suitable for identifying values 11/// 12/// Variable-length sequences should hash a 64-bit little-endian representation 13/// of their length, then their elements in order. Unordered containers should 14/// order their elements according to their `Ord` implementation. Enums should 15/// hash a 32-bit little-endian encoding of the ordinal number of the enum 16/// variant, then the variant's fields in lexical order. 17/// 18/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`. 19pub trait ContentHash { 20 /// Update the hasher state with this object's content 21 fn hash(&self, state: &mut impl DigestUpdate); 22} 23 24/// The 512-bit BLAKE2b content hash 25pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> { 26 use digest::Digest as _; 27 let mut hasher = Blake2b512::default(); 28 x.hash(&mut hasher); 29 hasher.finalize() 30} 31 32impl ContentHash for () { 33 fn hash(&self, _: &mut impl DigestUpdate) {} 34} 35 36impl ContentHash for bool { 37 fn hash(&self, state: &mut impl DigestUpdate) { 38 u8::from(*self).hash(state); 39 } 40} 41 42impl ContentHash for u8 { 43 fn hash(&self, state: &mut impl DigestUpdate) { 44 state.update(&[*self]); 45 } 46} 47 48impl ContentHash for u32 { 49 fn hash(&self, state: &mut impl DigestUpdate) { 50 state.update(&self.to_le_bytes()); 51 } 52} 53 54impl ContentHash for i32 { 55 fn hash(&self, state: &mut impl DigestUpdate) { 56 state.update(&self.to_le_bytes()); 57 } 58} 59 60impl ContentHash for u64 { 61 fn hash(&self, state: &mut impl DigestUpdate) { 62 state.update(&self.to_le_bytes()); 63 } 64} 65 66impl ContentHash for i64 { 67 fn hash(&self, state: &mut impl DigestUpdate) { 68 state.update(&self.to_le_bytes()); 69 } 70} 71 72// TODO: Specialize for [u8] once specialization exists 73impl<T: ContentHash> ContentHash for [T] { 74 fn hash(&self, state: &mut impl DigestUpdate) { 75 state.update(&(self.len() as u64).to_le_bytes()); 76 for x in self { 77 x.hash(state); 78 } 79 } 80} 81 82impl<T: ContentHash> ContentHash for Vec<T> { 83 fn hash(&self, state: &mut impl DigestUpdate) { 84 self.as_slice().hash(state); 85 } 86} 87 88impl ContentHash for String { 89 fn hash(&self, state: &mut impl DigestUpdate) { 90 self.as_bytes().hash(state); 91 } 92} 93 94impl<T: ContentHash> ContentHash for Option<T> { 95 fn hash(&self, state: &mut impl DigestUpdate) { 96 match self { 97 None => state.update(&0u32.to_le_bytes()), 98 Some(x) => { 99 state.update(&1u32.to_le_bytes()); 100 x.hash(state); 101 } 102 } 103 } 104} 105 106impl<K, V> ContentHash for std::collections::HashMap<K, V> 107where 108 K: ContentHash + Ord, 109 V: ContentHash, 110{ 111 fn hash(&self, state: &mut impl DigestUpdate) { 112 state.update(&(self.len() as u64).to_le_bytes()); 113 let mut kv = self.iter().collect_vec(); 114 kv.sort_unstable_by_key(|&(k, _)| k); 115 for (k, v) in kv { 116 k.hash(state); 117 v.hash(state); 118 } 119 } 120} 121 122impl<K> ContentHash for std::collections::HashSet<K> 123where 124 K: ContentHash + Ord, 125{ 126 fn hash(&self, state: &mut impl DigestUpdate) { 127 state.update(&(self.len() as u64).to_le_bytes()); 128 for k in self.iter().sorted() { 129 k.hash(state); 130 } 131 } 132} 133 134impl<K, V> ContentHash for std::collections::BTreeMap<K, V> 135where 136 K: ContentHash, 137 V: ContentHash, 138{ 139 fn hash(&self, state: &mut impl DigestUpdate) { 140 state.update(&(self.len() as u64).to_le_bytes()); 141 for (k, v) in self { 142 k.hash(state); 143 v.hash(state); 144 } 145 } 146} 147 148#[cfg(test)] 149mod tests { 150 use std::collections::BTreeMap; 151 use std::collections::HashMap; 152 153 use super::*; 154 155 #[test] 156 fn test_string_sanity() { 157 let a = "a".to_string(); 158 let b = "b".to_string(); 159 assert_eq!(hash(&a), hash(&a.clone())); 160 assert_ne!(hash(&a), hash(&b)); 161 assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string())); 162 } 163 164 #[test] 165 fn test_hash_map_key_value_distinction() { 166 let a = [("ab".to_string(), "cd".to_string())] 167 .into_iter() 168 .collect::<HashMap<_, _>>(); 169 let b = [("a".to_string(), "bcd".to_string())] 170 .into_iter() 171 .collect::<HashMap<_, _>>(); 172 173 assert_ne!(hash(&a), hash(&b)); 174 } 175 176 #[test] 177 fn test_btree_map_key_value_distinction() { 178 let a = [("ab".to_string(), "cd".to_string())] 179 .into_iter() 180 .collect::<BTreeMap<_, _>>(); 181 let b = [("a".to_string(), "bcd".to_string())] 182 .into_iter() 183 .collect::<BTreeMap<_, _>>(); 184 185 assert_ne!(hash(&a), hash(&b)); 186 } 187 188 #[test] 189 fn test_struct_sanity() { 190 #[derive(ContentHash)] 191 struct Foo { 192 x: i32, 193 } 194 assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 })); 195 } 196 197 #[test] 198 fn test_option_sanity() { 199 assert_ne!(hash(&Some(42)), hash(&42)); 200 assert_ne!(hash(&None::<i32>), hash(&42i32)); 201 } 202 203 #[test] 204 fn test_slice_sanity() { 205 assert_ne!(hash(&[42i32][..]), hash(&[12i32][..])); 206 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..])); 207 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&())); 208 assert_ne!(hash(&42i32), hash(&[42i32][..])); 209 } 210 211 #[test] 212 fn test_consistent_hashing() { 213 #[derive(ContentHash)] 214 struct Foo { 215 x: Vec<Option<i32>>, 216 y: i64, 217 } 218 let foo_hash = hex::encode(hash(&Foo { 219 x: vec![None, Some(42)], 220 y: 17, 221 })); 222 insta::assert_snapshot!( 223 foo_hash, 224 @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4" 225 ); 226 227 // Try again with an equivalent generic struct deriving ContentHash. 228 #[derive(ContentHash)] 229 struct GenericFoo<X, Y> { 230 x: X, 231 y: Y, 232 } 233 assert_eq!( 234 hex::encode(hash(&GenericFoo { 235 x: vec![None, Some(42)], 236 y: 17i64 237 })), 238 foo_hash 239 ); 240 } 241 242 // Test that the derived version of `ContentHash` matches the that's 243 // manually implemented for `std::Option`. 244 #[test] 245 fn derive_for_enum() { 246 #[derive(ContentHash)] 247 enum MyOption<T> { 248 None, 249 Some(T), 250 } 251 assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None)); 252 assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1))); 253 } 254 255 fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> { 256 blake2b_hash(x) 257 } 258}