just playing with tangled
at ig/vimdiffwarn 264 lines 7.2 kB view raw
1//! Portable, stable hashing suitable for identifying values 2 3use blake2::Blake2b512; 4// Re-export DigestUpdate so that the ContentHash proc macro can be used in 5// external crates without directly depending on the digest crate. 6pub use digest::Update as DigestUpdate; 7use itertools::Itertools as _; 8pub use jj_lib_proc_macros::ContentHash; 9 10/// Portable, stable hashing suitable for identifying values 11/// 12/// Variable-length sequences should hash a 64-bit little-endian representation 13/// of their length, then their elements in order. Unordered containers should 14/// order their elements according to their `Ord` implementation. Enums should 15/// hash a 32-bit little-endian encoding of the ordinal number of the enum 16/// variant, then the variant's fields in lexical order. 17/// 18/// Structs can implement `ContentHash` by using `#[derive(ContentHash)]`. 19pub trait ContentHash { 20 /// Update the hasher state with this object's content 21 fn hash(&self, state: &mut impl DigestUpdate); 22} 23 24/// The 512-bit BLAKE2b content hash 25pub fn blake2b_hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> { 26 use digest::Digest as _; 27 let mut hasher = Blake2b512::default(); 28 x.hash(&mut hasher); 29 hasher.finalize() 30} 31 32impl ContentHash for () { 33 fn hash(&self, _: &mut impl DigestUpdate) {} 34} 35 36impl ContentHash for bool { 37 fn hash(&self, state: &mut impl DigestUpdate) { 38 u8::from(*self).hash(state); 39 } 40} 41 42impl ContentHash for u8 { 43 fn hash(&self, state: &mut impl DigestUpdate) { 44 state.update(&[*self]); 45 } 46} 47 48impl ContentHash for u32 { 49 fn hash(&self, state: &mut impl DigestUpdate) { 50 state.update(&self.to_le_bytes()); 51 } 52} 53 54impl ContentHash for i32 { 55 fn hash(&self, state: &mut impl DigestUpdate) { 56 state.update(&self.to_le_bytes()); 57 } 58} 59 60impl ContentHash for u64 { 61 fn hash(&self, state: &mut impl DigestUpdate) { 62 state.update(&self.to_le_bytes()); 63 } 64} 65 66impl ContentHash for i64 { 67 fn hash(&self, state: &mut impl DigestUpdate) { 68 state.update(&self.to_le_bytes()); 69 } 70} 71 72// TODO: Specialize for [u8] once specialization exists 73impl<T: ContentHash> ContentHash for [T] { 74 fn hash(&self, state: &mut impl DigestUpdate) { 75 state.update(&(self.len() as u64).to_le_bytes()); 76 for x in self { 77 x.hash(state); 78 } 79 } 80} 81 82impl<T: ContentHash> ContentHash for Vec<T> { 83 fn hash(&self, state: &mut impl DigestUpdate) { 84 self.as_slice().hash(state); 85 } 86} 87 88impl ContentHash for str { 89 fn hash(&self, state: &mut impl DigestUpdate) { 90 self.as_bytes().hash(state); 91 } 92} 93 94impl ContentHash for String { 95 fn hash(&self, state: &mut impl DigestUpdate) { 96 self.as_str().hash(state); 97 } 98} 99 100impl<T: ContentHash> ContentHash for Option<T> { 101 fn hash(&self, state: &mut impl DigestUpdate) { 102 match self { 103 None => state.update(&0u32.to_le_bytes()), 104 Some(x) => { 105 state.update(&1u32.to_le_bytes()); 106 x.hash(state); 107 } 108 } 109 } 110} 111 112impl<K, V> ContentHash for std::collections::HashMap<K, V> 113where 114 K: ContentHash + Ord, 115 V: ContentHash, 116{ 117 fn hash(&self, state: &mut impl DigestUpdate) { 118 state.update(&(self.len() as u64).to_le_bytes()); 119 let mut kv = self.iter().collect_vec(); 120 kv.sort_unstable_by_key(|&(k, _)| k); 121 for (k, v) in kv { 122 k.hash(state); 123 v.hash(state); 124 } 125 } 126} 127 128impl<K> ContentHash for std::collections::HashSet<K> 129where 130 K: ContentHash + Ord, 131{ 132 fn hash(&self, state: &mut impl DigestUpdate) { 133 state.update(&(self.len() as u64).to_le_bytes()); 134 for k in self.iter().sorted() { 135 k.hash(state); 136 } 137 } 138} 139 140impl<K, V> ContentHash for std::collections::BTreeMap<K, V> 141where 142 K: ContentHash, 143 V: ContentHash, 144{ 145 fn hash(&self, state: &mut impl DigestUpdate) { 146 state.update(&(self.len() as u64).to_le_bytes()); 147 for (k, v) in self { 148 k.hash(state); 149 v.hash(state); 150 } 151 } 152} 153 154#[cfg(test)] 155mod tests { 156 use std::collections::BTreeMap; 157 use std::collections::HashMap; 158 159 use super::*; 160 161 #[test] 162 fn test_string_sanity() { 163 let a = "a".to_string(); 164 let b = "b".to_string(); 165 assert_eq!(hash(&a), hash(&a.clone())); 166 assert_ne!(hash(&a), hash(&b)); 167 assert_ne!(hash(&"a".to_string()), hash(&"a\0".to_string())); 168 } 169 170 #[test] 171 fn test_hash_map_key_value_distinction() { 172 let a = [("ab".to_string(), "cd".to_string())] 173 .into_iter() 174 .collect::<HashMap<_, _>>(); 175 let b = [("a".to_string(), "bcd".to_string())] 176 .into_iter() 177 .collect::<HashMap<_, _>>(); 178 179 assert_ne!(hash(&a), hash(&b)); 180 } 181 182 #[test] 183 fn test_btree_map_key_value_distinction() { 184 let a = [("ab".to_string(), "cd".to_string())] 185 .into_iter() 186 .collect::<BTreeMap<_, _>>(); 187 let b = [("a".to_string(), "bcd".to_string())] 188 .into_iter() 189 .collect::<BTreeMap<_, _>>(); 190 191 assert_ne!(hash(&a), hash(&b)); 192 } 193 194 #[test] 195 fn test_struct_sanity() { 196 #[derive(ContentHash)] 197 struct Foo { 198 x: i32, 199 } 200 assert_ne!(hash(&Foo { x: 42 }), hash(&Foo { x: 12 })); 201 } 202 203 #[test] 204 fn test_option_sanity() { 205 assert_ne!(hash(&Some(42)), hash(&42)); 206 assert_ne!(hash(&None::<i32>), hash(&42i32)); 207 } 208 209 #[test] 210 fn test_slice_sanity() { 211 assert_ne!(hash(&[42i32][..]), hash(&[12i32][..])); 212 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&[42i32][..])); 213 assert_ne!(hash(&([] as [i32; 0])[..]), hash(&())); 214 assert_ne!(hash(&42i32), hash(&[42i32][..])); 215 } 216 217 #[test] 218 fn test_consistent_hashing() { 219 #[derive(ContentHash)] 220 struct Foo { 221 x: Vec<Option<i32>>, 222 y: i64, 223 } 224 let foo_hash = hex::encode(hash(&Foo { 225 x: vec![None, Some(42)], 226 y: 17, 227 })); 228 insta::assert_snapshot!( 229 foo_hash, 230 @"e33c423b4b774b1353c414e0f9ef108822fde2fd5113fcd53bf7bd9e74e3206690b96af96373f268ed95dd020c7cbe171c7b7a6947fcaf5703ff6c8e208cefd4" 231 ); 232 233 // Try again with an equivalent generic struct deriving ContentHash. 234 #[derive(ContentHash)] 235 struct GenericFoo<X, Y> { 236 x: X, 237 y: Y, 238 } 239 assert_eq!( 240 hex::encode(hash(&GenericFoo { 241 x: vec![None, Some(42)], 242 y: 17i64 243 })), 244 foo_hash 245 ); 246 } 247 248 // Test that the derived version of `ContentHash` matches the that's 249 // manually implemented for `std::Option`. 250 #[test] 251 fn derive_for_enum() { 252 #[derive(ContentHash)] 253 enum MyOption<T> { 254 None, 255 Some(T), 256 } 257 assert_eq!(hash(&Option::<i32>::None), hash(&MyOption::<i32>::None)); 258 assert_eq!(hash(&Some(1)), hash(&MyOption::Some(1))); 259 } 260 261 fn hash(x: &(impl ContentHash + ?Sized)) -> digest::Output<Blake2b512> { 262 blake2b_hash(x) 263 } 264}