use image::DynamicImage; use image_hasher::{HashAlg, HasherConfig, ImageHash}; use miette::Diagnostic; use thiserror::Error; #[derive(Debug, Error, Diagnostic)] pub enum PhashError { #[error("Failed to decode image")] ImageDecode(#[from] image::ImageError), #[error("Invalid hash format: {0}")] InvalidHashFormat(String), #[error("Invalid hex string")] ParseInt(#[from] std::num::ParseIntError), } /// Compute perceptual hash for an image using average hash (aHash) algorithm /// /// This matches the TypeScript implementation: /// 1. Resize to 8x8 (64 pixels) /// 2. Convert to grayscale /// 3. Compute average pixel value /// 4. Create 64-bit binary: 1 if pixel > avg, 0 otherwise /// 5. Convert to hex string (16 chars) pub fn compute_phash(image_bytes: &[u8]) -> Result { let img = image::load_from_memory(image_bytes)?; compute_phash_from_image(&img) } /// Compute perceptual hash from a DynamicImage pub fn compute_phash_from_image(img: &DynamicImage) -> Result { // Configure hasher with aHash (Mean) algorithm and 8x8 size let hasher = HasherConfig::new() .hash_alg(HashAlg::Mean) // average hash .hash_size(8, 8) // 64 bits .to_hasher(); // Compute hash let hash = hasher.hash_image(img); // Convert to hex string hash_to_hex(&hash) } /// Convert ImageHash to hex string format (16 chars, matching TS output) fn hash_to_hex(hash: &ImageHash) -> Result { // Get hash bytes let bytes = hash.as_bytes(); // Convert to hex string let hex = bytes .iter() .map(|b| format!("{:02x}", b)) .collect::(); // Ensure it's 16 characters (64 bits = 8 bytes = 16 hex chars) if hex.len() != 16 { return Err(PhashError::InvalidHashFormat(format!( "Expected 16 hex characters, got {}", hex.len() )) .into()); } Ok(hex) } /// Compute hamming distance between two phash hex strings /// /// Uses Brian Kernighan's algorithm to count set bits pub fn hamming_distance(hash1: &str, hash2: &str) -> Result { // Validate input lengths if hash1.len() != 16 || hash2.len() != 16 { return Err(PhashError::InvalidHashFormat(format!( "Hashes must be 16 hex characters, got {} and {}", hash1.len(), hash2.len() )) .into()); } let a = u64::from_str_radix(hash1, 16)?; let b = u64::from_str_radix(hash2, 16)?; // XOR to find differing bits let xor = a ^ b; // Count set bits using Brian Kernighan's algorithm let mut count = 0u32; let mut n = xor; while n > 0 { count += 1; n &= n - 1; // clear the lowest set bit } Ok(count) } #[cfg(test)] mod tests { use super::*; #[test] fn test_hamming_distance_identical() { let hash = "e0e0e0e0e0fcfefe"; let distance = hamming_distance(hash, hash).unwrap(); assert_eq!(distance, 0); } #[test] fn test_hamming_distance_different() { let hash1 = "0000000000000000"; let hash2 = "ffffffffffffffff"; let distance = hamming_distance(hash1, hash2).unwrap(); assert_eq!(distance, 64); // all bits different } #[test] fn test_hamming_distance_one_bit() { let hash1 = "0000000000000000"; let hash2 = "0000000000000001"; let distance = hamming_distance(hash1, hash2).unwrap(); assert_eq!(distance, 1); } #[test] fn test_hamming_distance_invalid_length() { let hash1 = "e0e0e0e0e0fcfefe"; let hash2 = "short"; let result = hamming_distance(hash1, hash2); assert!(result.is_err()); } #[test] fn test_hamming_distance_invalid_hex() { let hash1 = "e0e0e0e0e0fcfefe"; let hash2 = "gggggggggggggggg"; let result = hamming_distance(hash1, hash2); assert!(result.is_err()); } #[test] fn test_phash_format() { // Create a simple test image (1x1 black pixel) let img = DynamicImage::new_luma8(1, 1); let hash = compute_phash_from_image(&img).unwrap(); // Should be 16 hex characters assert_eq!(hash.len(), 16); // Should be valid hex u64::from_str_radix(&hash, 16).unwrap(); } }