···1+# Test data (downloaded images for benchmarking)
2+testdata/
3+4+# Go build artifacts
5+*.exe
6+*.exe~
7+*.dll
8+*.so
9+*.dylib
10+11+# Go test cache
12+*.test
13+*.out
14+15+# Binaries
16+/pdqhasher
17+/helper
18+19+# IDE
20+.idea/
21+.vscode/
22+*.swp
23+*.swo
24+*~
25+26+# OS
27+.DS_Store
28+Thumbs.db
+21
LICENSE
···000000000000000000000
···1+MIT License
2+3+Copyright (c) 2026 me@haileyok.com
4+5+Permission is hereby granted, free of charge, to any person obtaining a copy
6+of this software and associated documentation files (the "Software"), to deal
7+in the Software without restriction, including without limitation the rights
8+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+copies of the Software, and to permit persons to whom the Software is
10+furnished to do so, subject to the following conditions:
11+12+The above copyright notice and this permission notice shall be included in all
13+copies or substantial portions of the Software.
14+15+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+SOFTWARE.
···1+# gopdq
2+3+A Go implementation of [Meta's PDQ](https://github.com/facebook/ThreatExchange/tree/main/pdq) perceptual hashing algorithm.
4+5+PDQ is a perceptual hashing algorithm designed to identify visually similar images. It generates a compact 256-bit hash that remains stable across common image transformations like resizing, compression, and minor edits.
6+7+8+## Installation
9+10+```bash
11+go get github.com/haileyok/gopdq
12+```
13+14+## Usage
15+16+There are two different functions provided in this package: `HashFromFile` and `HashFromImage`. While either will work, you should ensure that the input image has been resized to a size no greater than 512x512. See
17+[the PDQ paper](https://github.com/facebook/ThreatExchange/blob/main/hashing/hashing.pdf).
18+19+> Using two-pass Jarosz filters (i.e. tent convolutions), compute a weighted average of 64x64 subblocks of
20+the luminance image. (This is prohibitively time-consuming for megapixel input so we recommend using an
21+off-the-shelf technique to first resize to 512x512 before converting from RGB to luminance.)
22+23+For conveneicne, there is a helper method `helpers.ResizeIfNeeded(img image.Image)` which will return a resized `image.Image` that can be passed to `HashFromImage`.
24+25+26+```go
27+package main
28+29+import (
30+ "fmt"
31+ "log"
32+33+ "github.com/haileyok/gopdq"
34+)
35+36+func main() {
37+ // Hash an image file, assuming it has already been resized.
38+ // NOTE: There is no logic that _guarantees_ an image has been resized, this is up to you to ensure.
39+ result, err := pdq.HashFromFile("image.jpg")
40+ if err != nil {
41+ log.Fatal(err)
42+ }
43+44+ fmt.Printf("Hash: %s\n", result.Hash)
45+ fmt.Printf("Quality: %d\n", result.Quality)
46+}
47+```
48+49+### Using with pre-loaded images
50+51+```go
52+import (
53+ "image"
54+ _ "image/jpeg"
55+56+ "github.com/haileyok/gopdq"
57+ "github.com/haileyok/gopdq/helpers"
58+)
59+60+func main() {
61+ // Open the image and decode it
62+ file, _ := os.Open("image.jpg")
63+ img, _, _ := image.Decode(file)
64+65+ // Resize if needed
66+ img = helpers.ResizeIfNeeded(img)
67+68+ // Generate hash
69+ result, _ := pdq.HashFromImage(img)
70+ fmt.Println(result.Hash)
71+}
72+```
73+74+### HashResult
75+76+Both of the above functions will return a `HashResult`, which includes both the hash and the quality score.
77+78+```go
79+type HashResult struct {
80+ Hash string
81+ Quality int // Results with a quality score < 50 should be discarded
82+ ImageHeightTimesWidth int
83+ HashDuration time.Duration
84+}
85+```
86+87+## Command Line Tools
88+89+### PDQ Hasher
90+91+```bash
92+# Build the hasher
93+go build ./cmd/pdqhasher
94+95+# Hash an image
96+./pdqhasher path/to/image.jpg
97+98+# Output:
99+# Hash: e77b19ca5399466258c656bc4666a7853939a567a9193939e667199856ccc6c6
100+# Quality: 100
101+# Binary: 1110011110110001000110011010010100110011100110010100011001100010...
102+```
103+104+### Hamming Distance Helper
105+106+```bash
107+# Build the helper
108+go build ./cmd/helper
109+110+# Calculate hamming distance
111+./helper hamming <hash1> <hash2>
112+113+# Output:
114+# 8
115+```
116+117+## About Distance
118+119+Please see https://github.com/facebook/ThreatExchange/tree/main/pdq#matching
120+121+Note that outputs from the C++ implementation's example binary and the `pdqhasher` binary provided here may not return hashes that are exactly the same due to
122+differences in resizing libraries. This is expected, see https://github.com/facebook/ThreatExchange/tree/main/pdq#hashing.
123+124+## References
125+126+- [PDQ Algorithm (C++ Reference)](https://github.com/facebook/ThreatExchange/tree/main/pdq)
127+- [PDQ Hashing Paper](https://github.com/facebook/ThreatExchange/blob/main/hashing/hashing.pdf)
128+- [ThreatExchange](https://github.com/facebook/ThreatExchange)
129+130+## Acknowledgments
131+132+This is a Go implementation of Meta's PDQ algorithm. All credit for the algorithm design goes to the original authors.
···1+package helpers
2+3+import (
4+ "encoding/hex"
5+ "fmt"
6+ "math/bits"
7+)
8+9+// Converts a 64-character hexidecimal PDQ hash into a 256-character binary string
10+// representation, useful for inserting into some vector stores.
11+func PdqHashToBinary(input string) (string, error) {
12+ hashb, err := hex.DecodeString(input)
13+ if err != nil {
14+ return "", err
15+ }
16+17+ result := make([]byte, len(hashb)*8)
18+ for i, b := range hashb {
19+ for j := 7; j >= 0; j-- {
20+ if (b>>j)&1 == 1 {
21+ result[i*8+(7-j)] = '1'
22+ } else {
23+ result[i*8+(7-j)] = '0'
24+ }
25+ }
26+ }
27+28+ return string(result), nil
29+}
30+31+// Calculate the hamming distance between two PDQ hashes. Input hashes should be 64-character
32+// hexidecimal strings. Returns a value between 0 (identical) and 256 (completely different).
33+func HammingDistance(hashOne, hashTwo string) (int, error) {
34+ bytes1, err := hex.DecodeString(hashOne)
35+ if err != nil {
36+ return 0, fmt.Errorf("invalid hash1: %w", err)
37+ }
38+39+ bytes2, err := hex.DecodeString(hashTwo)
40+ if err != nil {
41+ return 0, fmt.Errorf("invalid hash2: %w", err)
42+ }
43+44+ if len(bytes1) != 32 {
45+ return 0, fmt.Errorf("first hash has invalid length: expected 32 bytes, got %d", len(bytes1))
46+ }
47+ if len(bytes2) != 32 {
48+ return 0, fmt.Errorf("second hash has invalid length: expected 32 bytes, got %d", len(bytes2))
49+ }
50+51+ distance := 0
52+ for i := range 32 {
53+ xor := bytes1[i] ^ bytes2[i]
54+ distance += bits.OnesCount8(xor)
55+ }
56+57+ return distance, nil
58+}
+29
helpers/resize.go
···00000000000000000000000000000
···1+package helpers
2+3+import (
4+ "image"
5+ _ "image/gif"
6+ _ "image/jpeg"
7+ _ "image/png"
8+9+ pdq "github.com/haileyok/gopdq"
10+ "github.com/nfnt/resize"
11+ _ "golang.org/x/image/bmp"
12+ _ "golang.org/x/image/tiff"
13+ _ "golang.org/x/image/webp"
14+)
15+16+func ResizeIfNeeded(img image.Image) image.Image {
17+ size := img.Bounds().Size()
18+19+ if size.X > pdq.DownsampleDims || size.Y > pdq.DownsampleDims {
20+ // SEE: https://github.com/facebook/ThreatExchange/blob/main/pdq/cpp/io/pdqio.cpp#L103
21+ // we use NearestNeighbor here as the PDQ uses that algo as well (unspecified parameter
22+ // which defaults to nearest neighbor, see https://cimg.eu/reference/structcimg__library_1_1CImg.html)
23+ // even still, because the two libraries have different implementations, we'll still see
24+ // minor differences in output. that is expected. see "More on Downsampling" in hashing.pdf
25+ return resize.Resize(pdq.DownsampleDims, pdq.DownsampleDims, img, resize.NearestNeighbor)
26+ }
27+28+ return img
29+}