Weighs the soul of incoming HTTP requests to stop AI crawlers
at main 208 lines 6.2 kB view raw
1package internal 2 3import ( 4 "errors" 5 "fmt" 6 "log/slog" 7 "net" 8 "net/http" 9 "net/netip" 10 "strings" 11 12 "github.com/TecharoHQ/anubis" 13 "github.com/sebest/xff" 14) 15 16// TODO: move into config 17type XFFComputePreferences struct { 18 StripPrivate bool 19 StripLoopback bool 20 StripCGNAT bool 21 StripLLU bool 22 Flatten bool 23} 24 25var CGNat = netip.MustParsePrefix("100.64.0.0/10") 26 27// UnchangingCache sets the Cache-Control header to cache a response for 1 year if 28// and only if the application is compiled in "release" mode by Docker. 29func UnchangingCache(next http.Handler) http.Handler { 30 //goland:noinspection GoBoolExpressions 31 if anubis.Version == "devel" { 32 return next 33 } 34 35 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 36 w.Header().Set("Cache-Control", "public, max-age=31536000") 37 next.ServeHTTP(w, r) 38 }) 39} 40 41// RemoteXRealIP sets the X-Real-Ip header to the request's real IP if 42// the setting is enabled by the user. 43func RemoteXRealIP(useRemoteAddress bool, bindNetwork string, next http.Handler) http.Handler { 44 if !useRemoteAddress { 45 slog.Debug("skipping middleware, useRemoteAddress is empty") 46 return next 47 } 48 49 if bindNetwork == "unix" { 50 // For local sockets there is no real remote address but the localhost 51 // address should be sensible. 52 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 53 r.Header.Set("X-Real-Ip", "127.0.0.1") 54 next.ServeHTTP(w, r) 55 }) 56 } 57 58 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 59 host, _, err := net.SplitHostPort(r.RemoteAddr) 60 if err != nil { 61 panic(err) // this should never happen 62 } 63 r.Header.Set("X-Real-Ip", host) 64 next.ServeHTTP(w, r) 65 }) 66} 67 68// XForwardedForToXRealIP sets the X-Real-Ip header based on the contents 69// of the X-Forwarded-For header. 70func XForwardedForToXRealIP(next http.Handler) http.Handler { 71 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 72 if xffHeader := r.Header.Get("X-Forwarded-For"); r.Header.Get("X-Real-Ip") == "" && xffHeader != "" { 73 ip := xff.Parse(xffHeader) 74 slog.Debug("setting x-real-ip", "val", ip) 75 r.Header.Set("X-Real-Ip", ip) 76 } 77 78 next.ServeHTTP(w, r) 79 }) 80} 81 82// XForwardedForUpdate sets or updates the X-Forwarded-For header, adding 83// the known remote address to an existing chain if present 84func XForwardedForUpdate(stripPrivate bool, next http.Handler) http.Handler { 85 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 86 defer next.ServeHTTP(w, r) 87 88 pref := XFFComputePreferences{ 89 StripPrivate: stripPrivate, 90 StripLoopback: true, 91 StripCGNAT: true, 92 Flatten: true, 93 StripLLU: true, 94 } 95 96 remoteAddr := r.RemoteAddr 97 origXFFHeader := r.Header.Get("X-Forwarded-For") 98 99 if remoteAddr == "@" { 100 // remote is a unix socket 101 // do not touch chain 102 return 103 } 104 105 xffHeaderString, err := computeXFFHeader(remoteAddr, origXFFHeader, pref) 106 if err != nil { 107 slog.Debug("computing X-Forwarded-For header failed", "err", err) 108 return 109 } 110 111 if len(xffHeaderString) == 0 { 112 r.Header.Del("X-Forwarded-For") 113 } else { 114 r.Header.Set("X-Forwarded-For", xffHeaderString) 115 } 116 }) 117} 118 119var ( 120 ErrCantSplitHostParse = errors.New("internal: unable to net.SplitHostParse") 121 ErrCantParseRemoteIP = errors.New("internal: unable to parse remote IP") 122) 123 124func computeXFFHeader(remoteAddr string, origXFFHeader string, pref XFFComputePreferences) (string, error) { 125 remoteIP, _, err := net.SplitHostPort(remoteAddr) 126 if err != nil { 127 return "", fmt.Errorf("%w: %w", ErrCantSplitHostParse, err) 128 } 129 parsedRemoteIP, err := netip.ParseAddr(remoteIP) 130 if err != nil { 131 return "", fmt.Errorf("%w: %w", ErrCantParseRemoteIP, err) 132 } 133 134 origForwardedList := make([]string, 0, 4) 135 if origXFFHeader != "" { 136 origForwardedList = strings.Split(origXFFHeader, ",") 137 for i := range origForwardedList { 138 origForwardedList[i] = strings.TrimSpace(origForwardedList[i]) 139 } 140 } 141 origForwardedList = append(origForwardedList, parsedRemoteIP.String()) 142 forwardedList := make([]string, 0, len(origForwardedList)) 143 // this behavior is equivalent to 144 // ingress-nginx "compute-full-forwarded-for" 145 // https://kubernetes.github.io/ingress-nginx/user-guide/nginx-configuration/configmap/#compute-full-forwarded-for 146 // 147 // this would be the correct place to strip and/or flatten this list 148 // 149 // strip - iterate backwards and eliminate configured trusted IPs 150 // flatten - only return the last element to avoid spoofing confusion 151 // 152 // many applications handle this in different ways, but 153 // generally they'd be expected to do these two things on 154 // their own end to find the first non-spoofed IP 155 for i := len(origForwardedList) - 1; i >= 0; i-- { 156 segmentIP, err := netip.ParseAddr(origForwardedList[i]) 157 if err != nil { 158 // can't assess this element, so the remainder of the chain 159 // can't be trusted. not a fatal error, since anyone can 160 // spoof an XFF header 161 slog.Debug("failed to parse XFF segment", "err", err) 162 break 163 } 164 if pref.StripPrivate && segmentIP.IsPrivate() { 165 continue 166 } 167 if pref.StripLoopback && segmentIP.IsLoopback() { 168 continue 169 } 170 if pref.StripLLU && segmentIP.IsLinkLocalUnicast() { 171 continue 172 } 173 if pref.StripCGNAT && CGNat.Contains(segmentIP) { 174 continue 175 } 176 forwardedList = append([]string{segmentIP.String()}, forwardedList...) 177 } 178 var xffHeaderString string 179 if len(forwardedList) == 0 { 180 xffHeaderString = "" 181 return xffHeaderString, nil 182 } 183 if pref.Flatten { 184 xffHeaderString = forwardedList[len(forwardedList)-1] 185 } else { 186 xffHeaderString = strings.Join(forwardedList, ",") 187 } 188 return xffHeaderString, nil 189} 190 191// NoStoreCache sets the Cache-Control header to no-store for the response. 192func NoStoreCache(next http.Handler) http.Handler { 193 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 194 w.Header().Set("Cache-Control", "no-store") 195 next.ServeHTTP(w, r) 196 }) 197} 198 199// NoBrowsing prevents directory browsing by returning a 404 for any request that ends with a "/". 200func NoBrowsing(next http.Handler) http.Handler { 201 return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { 202 if strings.HasSuffix(r.URL.Path, "/") { 203 http.NotFound(w, r) 204 return 205 } 206 next.ServeHTTP(w, r) 207 }) 208}