Weighs the soul of incoming HTTP requests to stop AI crawlers
1package internal
2
3import (
4 "errors"
5 "fmt"
6 "log/slog"
7 "net"
8 "net/http"
9 "net/netip"
10 "strings"
11
12 "github.com/TecharoHQ/anubis"
13 "github.com/sebest/xff"
14)
15
16// TODO: move into config
17type XFFComputePreferences struct {
18 StripPrivate bool
19 StripLoopback bool
20 StripCGNAT bool
21 StripLLU bool
22 Flatten bool
23}
24
25var CGNat = netip.MustParsePrefix("100.64.0.0/10")
26
27// UnchangingCache sets the Cache-Control header to cache a response for 1 year if
28// and only if the application is compiled in "release" mode by Docker.
29func UnchangingCache(next http.Handler) http.Handler {
30 //goland:noinspection GoBoolExpressions
31 if anubis.Version == "devel" {
32 return next
33 }
34
35 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
36 w.Header().Set("Cache-Control", "public, max-age=31536000")
37 next.ServeHTTP(w, r)
38 })
39}
40
41// RemoteXRealIP sets the X-Real-Ip header to the request's real IP if
42// the setting is enabled by the user.
43func RemoteXRealIP(useRemoteAddress bool, bindNetwork string, next http.Handler) http.Handler {
44 if !useRemoteAddress {
45 slog.Debug("skipping middleware, useRemoteAddress is empty")
46 return next
47 }
48
49 if bindNetwork == "unix" {
50 // For local sockets there is no real remote address but the localhost
51 // address should be sensible.
52 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
53 r.Header.Set("X-Real-Ip", "127.0.0.1")
54 next.ServeHTTP(w, r)
55 })
56 }
57
58 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
59 host, _, err := net.SplitHostPort(r.RemoteAddr)
60 if err != nil {
61 panic(err) // this should never happen
62 }
63 r.Header.Set("X-Real-Ip", host)
64 next.ServeHTTP(w, r)
65 })
66}
67
68// XForwardedForToXRealIP sets the X-Real-Ip header based on the contents
69// of the X-Forwarded-For header.
70func XForwardedForToXRealIP(next http.Handler) http.Handler {
71 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
72 if xffHeader := r.Header.Get("X-Forwarded-For"); r.Header.Get("X-Real-Ip") == "" && xffHeader != "" {
73 ip := xff.Parse(xffHeader)
74 slog.Debug("setting x-real-ip", "val", ip)
75 r.Header.Set("X-Real-Ip", ip)
76 }
77
78 next.ServeHTTP(w, r)
79 })
80}
81
82// XForwardedForUpdate sets or updates the X-Forwarded-For header, adding
83// the known remote address to an existing chain if present
84func XForwardedForUpdate(stripPrivate bool, next http.Handler) http.Handler {
85 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
86 defer next.ServeHTTP(w, r)
87
88 pref := XFFComputePreferences{
89 StripPrivate: stripPrivate,
90 StripLoopback: true,
91 StripCGNAT: true,
92 Flatten: true,
93 StripLLU: true,
94 }
95
96 remoteAddr := r.RemoteAddr
97 origXFFHeader := r.Header.Get("X-Forwarded-For")
98
99 if remoteAddr == "@" {
100 // remote is a unix socket
101 // do not touch chain
102 return
103 }
104
105 xffHeaderString, err := computeXFFHeader(remoteAddr, origXFFHeader, pref)
106 if err != nil {
107 slog.Debug("computing X-Forwarded-For header failed", "err", err)
108 return
109 }
110
111 if len(xffHeaderString) == 0 {
112 r.Header.Del("X-Forwarded-For")
113 } else {
114 r.Header.Set("X-Forwarded-For", xffHeaderString)
115 }
116 })
117}
118
119var (
120 ErrCantSplitHostParse = errors.New("internal: unable to net.SplitHostParse")
121 ErrCantParseRemoteIP = errors.New("internal: unable to parse remote IP")
122)
123
124func computeXFFHeader(remoteAddr string, origXFFHeader string, pref XFFComputePreferences) (string, error) {
125 remoteIP, _, err := net.SplitHostPort(remoteAddr)
126 if err != nil {
127 return "", fmt.Errorf("%w: %w", ErrCantSplitHostParse, err)
128 }
129 parsedRemoteIP, err := netip.ParseAddr(remoteIP)
130 if err != nil {
131 return "", fmt.Errorf("%w: %w", ErrCantParseRemoteIP, err)
132 }
133
134 origForwardedList := make([]string, 0, 4)
135 if origXFFHeader != "" {
136 origForwardedList = strings.Split(origXFFHeader, ",")
137 for i := range origForwardedList {
138 origForwardedList[i] = strings.TrimSpace(origForwardedList[i])
139 }
140 }
141 origForwardedList = append(origForwardedList, parsedRemoteIP.String())
142 forwardedList := make([]string, 0, len(origForwardedList))
143 // this behavior is equivalent to
144 // ingress-nginx "compute-full-forwarded-for"
145 // https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#compute-full-forwarded-for
146 //
147 // this would be the correct place to strip and/or flatten this list
148 //
149 // strip - iterate backwards and eliminate configured trusted IPs
150 // flatten - only return the last element to avoid spoofing confusion
151 //
152 // many applications handle this in different ways, but
153 // generally they'd be expected to do these two things on
154 // their own end to find the first non-spoofed IP
155 for i := len(origForwardedList) - 1; i >= 0; i-- {
156 segmentIP, err := netip.ParseAddr(origForwardedList[i])
157 if err != nil {
158 // can't assess this element, so the remainder of the chain
159 // can't be trusted. not a fatal error, since anyone can
160 // spoof an XFF header
161 slog.Debug("failed to parse XFF segment", "err", err)
162 break
163 }
164 if pref.StripPrivate && segmentIP.IsPrivate() {
165 continue
166 }
167 if pref.StripLoopback && segmentIP.IsLoopback() {
168 continue
169 }
170 if pref.StripLLU && segmentIP.IsLinkLocalUnicast() {
171 continue
172 }
173 if pref.StripCGNAT && CGNat.Contains(segmentIP) {
174 continue
175 }
176 forwardedList = append([]string{segmentIP.String()}, forwardedList...)
177 }
178 var xffHeaderString string
179 if len(forwardedList) == 0 {
180 xffHeaderString = ""
181 return xffHeaderString, nil
182 }
183 if pref.Flatten {
184 xffHeaderString = forwardedList[len(forwardedList)-1]
185 } else {
186 xffHeaderString = strings.Join(forwardedList, ",")
187 }
188 return xffHeaderString, nil
189}
190
191// NoStoreCache sets the Cache-Control header to no-store for the response.
192func NoStoreCache(next http.Handler) http.Handler {
193 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
194 w.Header().Set("Cache-Control", "no-store")
195 next.ServeHTTP(w, r)
196 })
197}
198
199// NoBrowsing prevents directory browsing by returning a 404 for any request that ends with a "/".
200func NoBrowsing(next http.Handler) http.Handler {
201 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
202 if strings.HasSuffix(r.URL.Path, "/") {
203 http.NotFound(w, r)
204 return
205 }
206 next.ServeHTTP(w, r)
207 })
208}