A batteries included HTTP/1.1 client in OCaml
at main 508 lines 19 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6(** HTTP header field handling per 7 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5}RFC 9110 Section 8 5} 9 10 This module provides an efficient implementation of HTTP headers with 11 case-insensitive field names per 12 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5.1}RFC 9110 13 Section 5.1}. Headers can have multiple values for the same field name 14 (e.g., Set-Cookie). 15 16 {2 Type-Safe Header Names} 17 18 Header names use the {!Header_name.t} type, providing compile-time safety 19 for standard headers while allowing custom headers via [`Other]: 20 21 {[ 22 let headers = 23 Headers.empty 24 |> Headers.set `Content_type "application/json" 25 |> Headers.set `Authorization "Bearer token" 26 |> Headers.set (`Other "X-Custom") "value" 27 ]} 28 29 {2 Security} 30 31 Header names and values are validated to prevent HTTP header injection 32 attacks. CR and LF characters are rejected per 33 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5.5}RFC 9110 34 Section 5.5}. *) 35 36val src : Logs.Src.t 37(** Log source for header operations. *) 38 39type t 40(** Abstract header collection type. Headers are stored with case-insensitive 41 keys and maintain insertion order. *) 42 43(** {1 Creation and Conversion} *) 44 45val empty : t 46(** [empty] creates an empty header collection. *) 47 48val of_list : (string * string) list -> t 49(** [of_list pairs] creates headers from an association list of string pairs. 50 This is useful when parsing headers from the wire format. Later entries 51 override earlier ones for the same key. *) 52 53val to_list : t -> (string * string) list 54(** [to_list headers] converts headers to an association list. The order of 55 headers is preserved. *) 56 57(** {1 Header Injection Prevention} *) 58 59exception Invalid_header of { name : string; reason : string } 60(** Raised when a header name or value contains invalid characters (CR/LF) that 61 could enable HTTP request smuggling attacks. *) 62 63exception Invalid_basic_auth of { reason : string } 64(** Raised when Basic auth credentials contain invalid characters. Per 65 {{:https://datatracker.ietf.org/doc/html/rfc7617#section-2}RFC 7617 Section 66 2}: 67 - Username must not contain colon characters 68 - Username and password must not contain control characters (0x00-0x1F, 69 0x7F) *) 70 71(** {1 Type-Safe Header Operations} 72 73 These functions use {!Header_name.t} for compile-time type safety. *) 74 75val add : Header_name.t -> string -> t -> t 76(** [add name value headers] adds a header value. Multiple values for the same 77 header name are allowed (e.g., for Set-Cookie). 78 79 @raise Invalid_header 80 if the header value contains CR/LF characters (to prevent HTTP header 81 injection attacks). *) 82 83val set : Header_name.t -> string -> t -> t 84(** [set name value headers] sets a header value, replacing any existing values 85 for that header name. 86 87 @raise Invalid_header 88 if the header value contains CR/LF characters (to prevent HTTP header 89 injection attacks). *) 90 91val find : Header_name.t -> t -> string option 92(** [find name headers] returns the first value for a header name, or [None] if 93 the header doesn't exist. *) 94 95val all : Header_name.t -> t -> string list 96(** [all name headers] returns all values for a header name. Returns an empty 97 list if the header doesn't exist. *) 98 99val remove : Header_name.t -> t -> t 100(** [remove name headers] removes all values for a header name. *) 101 102val mem : Header_name.t -> t -> bool 103(** [mem name headers] checks if a header name exists. *) 104 105(** {1 String-Based Header Operations} 106 107 These functions accept string header names for wire format compatibility. 108 Use these when parsing HTTP messages where header names arrive as strings. 109*) 110 111val add_string : string -> string -> t -> t 112(** [add_string name value headers] adds a header using a string name. Use this 113 when parsing headers from the wire. 114 115 @raise Invalid_header if the header name or value contains CR/LF characters. 116*) 117 118val set_string : string -> string -> t -> t 119(** [set_string name value headers] sets a header using a string name. 120 121 @raise Invalid_header if the header name or value contains CR/LF characters. 122*) 123 124val string : string -> t -> string option 125(** [string name headers] gets a header using a string name. *) 126 127val all_string : string -> t -> string list 128(** [all_string name headers] gets all values for a string header name. *) 129 130val remove_string : string -> t -> t 131(** [remove_string name headers] removes a header using a string name. *) 132 133val mem_string : string -> t -> bool 134(** [mem_string name headers] checks if a header exists using a string name. *) 135 136(** {1 Merging} *) 137 138val merge : t -> t -> t 139(** [merge base override] merges two header collections. Headers from [override] 140 replace those in [base]. *) 141 142(** {1 Common Header Builders} 143 144 Convenience functions for setting common HTTP headers. *) 145 146val content_type : Mime.t -> t -> t 147(** [content_type mime headers] sets the Content-Type header. *) 148 149val content_length : int64 -> t -> t 150(** [content_length length headers] sets the Content-Length header. *) 151 152val accept : Mime.t -> t -> t 153(** [accept mime headers] sets the Accept header. *) 154 155val accept_language : string -> t -> t 156(** [accept_language lang headers] sets the Accept-Language header. Per 157 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-12.5.4}RFC 9110 158 Section 12.5.4}. 159 160 Examples: 161 {[ 162 headers 163 |> Headers.accept_language "en-US" headers 164 |> Headers.accept_language "en-US, en;q=0.9, de;q=0.8" headers 165 |> Headers.accept_language "*" 166 ]} *) 167 168val authorization : string -> t -> t 169(** [authorization value headers] sets the Authorization header with a raw 170 value. *) 171 172val bearer : string -> t -> t 173(** [bearer token headers] sets the Authorization header with a Bearer token. 174 Example: [bearer "abc123"] sets ["Authorization: Bearer abc123"]. *) 175 176val basic : username:string -> password:string -> t -> t 177(** [basic ~username ~password headers] sets the Authorization header with HTTP 178 Basic authentication (base64-encoded username:password). 179 180 @raise Invalid_basic_auth 181 if the username contains a colon character or if either username or 182 password contains control characters (RFC 7617 Section 2). *) 183 184val user_agent : string -> t -> t 185(** [user_agent ua headers] sets the User-Agent header. *) 186 187val host : string -> t -> t 188(** [host hostname headers] sets the Host header. *) 189 190val cookie : string -> string -> t -> t 191(** [cookie name value headers] adds a cookie to the Cookie header. Multiple 192 cookies can be added by calling this function multiple times. *) 193 194val range : start:int64 -> ?end_:int64 -> unit -> t -> t 195(** [range ~start ?end_ () headers] sets the Range header for partial content. 196 Example: [range ~start:0L ~end_:999L ()] requests the first 1000 bytes. *) 197 198(** {1 HTTP 100-Continue Support} 199 200 Per Recommendation #7: Expect: 100-continue protocol for large uploads. RFC 201 9110 Section 10.1.1 (Expect) *) 202 203val expect : string -> t -> t 204(** [expect value headers] sets the Expect header. Example: 205 [expect "100-continue"] for large request bodies. *) 206 207val expect_100_continue : t -> t 208(** [expect_100_continue headers] sets [Expect: 100-continue]. Use this for 209 large uploads to allow the server to reject the request before the body is 210 sent, saving bandwidth. *) 211 212(** {1 TE Header Support} 213 214 Per RFC 9110 Section 10.1.4: The TE header indicates what transfer codings 215 the client is willing to accept in the response, and whether the client is 216 willing to accept trailer fields in a chunked transfer coding. *) 217 218val te : string -> t -> t 219(** [te value headers] sets the TE header to indicate accepted transfer codings. 220 Example: [te "trailers, deflate"]. *) 221 222val te_trailers : t -> t 223(** [te_trailers headers] sets [TE: trailers] to indicate the client accepts 224 trailer fields in chunked transfer coding. Per RFC 9110 Section 10.1.4, a 225 client MUST send this if it wishes to receive trailers. *) 226 227(** {1 Cache Control Headers} 228 229 Per Recommendation #17 and #19: Response caching and conditional requests. 230 RFC 9111 (HTTP Caching), RFC 9110 Section 8.8.2-8.8.3 (Last-Modified, ETag) 231*) 232 233val if_none_match : string -> t -> t 234(** [if_none_match etag headers] sets the If-None-Match header for conditional 235 requests. The request succeeds only if the resource's ETag does NOT match. 236 Used with GET/HEAD to implement efficient caching (returns 304 Not Modified 237 if matches). *) 238 239val if_match : string -> t -> t 240(** [if_match etag headers] sets the If-Match header for conditional requests. 241 The request succeeds only if the resource's ETag matches. Used with 242 PUT/DELETE for optimistic concurrency (prevents lost updates). *) 243 244val if_modified_since : string -> t -> t 245(** [if_modified_since date headers] sets the If-Modified-Since header. The date 246 should be in HTTP-date format (RFC 9110 Section 5.6.7). Example: 247 ["Sun, 06 Nov 1994 08:49:37 GMT"]. *) 248 249val if_unmodified_since : string -> t -> t 250(** [if_unmodified_since date headers] sets the If-Unmodified-Since header. The 251 request succeeds only if the resource has NOT been modified since the date. 252*) 253 254val http_date_of_ptime : Ptime.t -> string 255(** [http_date_of_ptime time] formats a Ptime.t as an HTTP-date. Format: "Sun, 256 06 Nov 1994 08:49:37 GMT" (RFC 9110 Section 5.6.7). *) 257 258val if_modified_since_ptime : Ptime.t -> t -> t 259(** [if_modified_since_ptime time headers] sets If-Modified-Since using a 260 Ptime.t value. *) 261 262val if_unmodified_since_ptime : Ptime.t -> t -> t 263(** [if_unmodified_since_ptime time headers] sets If-Unmodified-Since using a 264 Ptime.t value. *) 265 266val cache_control : string -> t -> t 267(** [cache_control directives headers] sets the Cache-Control header with a raw 268 directive string. Example: [cache_control "no-cache, max-age=3600"]. *) 269 270val cache_control_directives : 271 ?max_age:int -> 272 ?max_stale:int option option -> 273 ?min_fresh:int -> 274 ?no_cache:bool -> 275 ?no_store:bool -> 276 ?no_transform:bool -> 277 ?only_if_cached:bool -> 278 unit -> 279 t -> 280 t 281(** [cache_control_directives ?max_age ?max_stale ?min_fresh ~no_cache ~no_store 282 ~no_transform ~only_if_cached () headers] builds a Cache-Control header 283 from individual directives (RFC 9111 request directives). 284 285 - [max_age]: Maximum age in seconds the client is willing to accept 286 - [max_stale]: Accept stale responses: 287 - [None]: omit max_stale entirely 288 - [Some None]: "max-stale" (accept any staleness) 289 - [Some (Some n)]: "max-stale=N" (accept n seconds staleness) 290 - [min_fresh]: Response must be fresh for at least n more seconds 291 - [no_cache]: Force revalidation with origin server 292 - [no_store]: Response must not be stored in cache 293 - [no_transform]: Intermediaries must not transform the response 294 - [only_if_cached]: Only return cached response, 504 if not available. *) 295 296val etag : string -> t -> t 297(** [etag value headers] sets the ETag header (for responses). Example: 298 [etag "\"abc123\""]. *) 299 300val last_modified : string -> t -> t 301(** [last_modified date headers] sets the Last-Modified header (for responses). 302 The date should be in HTTP-date format. *) 303 304val last_modified_ptime : Ptime.t -> t -> t 305(** [last_modified_ptime time headers] sets Last-Modified using a Ptime.t value. 306*) 307 308(** {1 Connection Header Handling} 309 310 Per 311 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1}RFC 9110 312 Section 7.6.1}: The Connection header field lists hop-by-hop header fields 313 that MUST be removed before forwarding the message. *) 314 315val parse_connection_header : t -> Header_name.t list 316(** [parse_connection_header headers] parses the Connection header value into a 317 list of header names. *) 318 319val hop_by_hop_headers : t -> Header_name.t list 320(** [hop_by_hop_headers headers] returns all hop-by-hop headers. This is the 321 union of {!Header_name.hop_by_hop_headers} and any headers listed in the 322 Connection header. *) 323 324val remove_hop_by_hop : t -> t 325(** [remove_hop_by_hop headers] removes all hop-by-hop headers. This should be 326 called before caching or forwarding a response. Per RFC 9110 Section 7.6.1. 327*) 328 329val connection_close : t -> bool 330(** [connection_close headers] returns [true] if Connection: close is present. 331 This indicates the connection should be closed after the current message. *) 332 333val connection_keep_alive : t -> bool 334(** [connection_keep_alive headers] returns [true] if Connection: keep-alive is 335 present. This is primarily used with HTTP/1.0 to request a persistent 336 connection. *) 337 338(** {1 Aliases} *) 339 340val multi : Header_name.t -> t -> string list 341(** [multi] is an alias for {!all}. *) 342 343val pp : Format.formatter -> t -> unit 344(** Pretty printer for headers. *) 345 346val pp_brief : Format.formatter -> t -> unit 347(** Brief pretty printer showing count only. *) 348 349(** {1 HTTP/2 Pseudo-Header Support} 350 351 HTTP/2 uses pseudo-header fields to convey information that was previously 352 carried in the request line (HTTP/1.1) or status line. Pseudo-headers start 353 with a colon character ([:]). 354 355 Per 356 {{:https://datatracker.ietf.org/doc/html/rfc9113#section-8.3}RFC 9113 357 Section 8.3}: 358 - Pseudo-headers MUST appear before regular headers 359 - Pseudo-headers MUST NOT appear in trailers 360 - Unknown pseudo-headers MUST be treated as malformed 361 362 {2 Request Pseudo-Headers} 363 364 - [:method] - HTTP method (required) 365 - [:scheme] - URI scheme (required for non-CONNECT) 366 - [:authority] - Authority portion of URI (host:port) 367 - [:path] - Path and query (required for non-CONNECT) 368 369 {2 Response Pseudo-Headers} 370 371 - [:status] - HTTP status code (required) *) 372 373val is_pseudo_header : string -> bool 374(** [is_pseudo_header name] returns [true] if the header name starts with [:]. 375 Per RFC 9113 Section 8.3, pseudo-headers are identified by a colon prefix. 376*) 377 378val pseudo : string -> t -> string option 379(** [pseudo name headers] retrieves a pseudo-header value. The [name] should NOT 380 include the colon prefix. Example: [pseudo "method" headers] retrieves 381 [:method]. *) 382 383val set_pseudo : string -> string -> t -> t 384(** [set_pseudo name value headers] sets a pseudo-header value. The [name] 385 should NOT include the colon prefix. Pseudo-headers are stored with the 386 colon prefix internally. Example: [set_pseudo "method" "GET" headers] sets 387 [:method: GET]. 388 389 @raise Invalid_header if the value contains CR/LF characters. *) 390 391val remove_pseudo : string -> t -> t 392(** [remove_pseudo name headers] removes a pseudo-header. The [name] should NOT 393 include the colon prefix. *) 394 395val mem_pseudo : string -> t -> bool 396(** [mem_pseudo name headers] returns [true] if the pseudo-header exists. The 397 [name] should NOT include the colon prefix. *) 398 399val has_pseudo_headers : t -> bool 400(** [has_pseudo_headers headers] returns [true] if any pseudo-headers are 401 present. *) 402 403val pseudo_headers : t -> (string * string) list 404(** [pseudo_headers headers] returns all pseudo-headers as [(name, value)] 405 pairs. Names are returned WITHOUT the colon prefix. *) 406 407val regular_headers : t -> (string * string) list 408(** [regular_headers headers] returns all non-pseudo headers as [(name, value)] 409 pairs. *) 410 411val to_list_ordered : t -> (string * string) list 412(** [to_list_ordered headers] returns all headers with pseudo-headers first, 413 followed by regular headers, as required by RFC 9113 Section 8.3. *) 414 415(** {2 HTTP/2 Request Header Construction} *) 416 417val h2_request : 418 meth:string -> scheme:string -> ?authority:string -> path:string -> t -> t 419(** [h2_request ~meth ~scheme ?authority ~path headers] sets the required HTTP/2 420 request pseudo-headers. 421 422 Per RFC 9113 Section 8.3.1: 423 - [:method] is required 424 - [:scheme] is required (except for CONNECT) 425 - [:path] is required (except for CONNECT, OPTIONS with empty path) 426 - [:authority] is optional but recommended 427 428 Example: 429 {[ 430 Headers.empty 431 |> Headers.h2_request ~meth:"GET" ~scheme:"https" ~authority:"example.com" 432 ~path:"/" 433 |> Headers.set `Accept "application/json" 434 ]} *) 435 436(** {2 HTTP/2 Header Validation} 437 438 Per 439 {{:https://datatracker.ietf.org/doc/html/rfc9113#section-8.2}RFC 9113 440 Section 8.2}. *) 441 442type h2_validation_error = 443 | Missing_pseudo of string (** Required pseudo-header is missing *) 444 | Invalid_pseudo of string (** Unknown or misplaced pseudo-header *) 445 | Pseudo_after_regular (** Pseudo-header appeared after regular header *) 446 | Invalid_header_name of string 447 (** Header name contains invalid characters *) 448 | Uppercase_header_name of string 449 (** Header name contains uppercase (forbidden in HTTP/2) *) 450 | Connection_header_forbidden 451 (** Connection-specific headers are forbidden in HTTP/2 *) 452 | Te_header_invalid (** TE header with value other than "trailers" *) 453 454val pp_h2_validation_error : Format.formatter -> h2_validation_error -> unit 455(** Pretty printer for validation errors. *) 456 457val validate_h2_request : t -> (unit, h2_validation_error) result 458(** [validate_h2_request headers] validates headers for HTTP/2 request 459 constraints. 460 461 Per RFC 9113 Section 8.3.1, validates: 462 - Required pseudo-headers are present ([:method], [:scheme], [:path]) 463 - No unknown pseudo-headers 464 - Pseudo-headers appear before regular headers 465 - No uppercase letters in header names 466 - No connection-specific headers (Connection, Keep-Alive, etc.) 467 - TE header only contains "trailers" if present. *) 468 469val validate_h2_response : t -> (unit, h2_validation_error) result 470(** [validate_h2_response headers] validates headers for HTTP/2 response 471 constraints. 472 473 Per RFC 9113 Section 8.3.2, validates: 474 - [:status] pseudo-header is present 475 - No other pseudo-headers 476 - Pseudo-headers appear before regular headers 477 - No uppercase letters in header names 478 - No connection-specific headers. *) 479 480val validate_h2_user_headers : t -> (unit, h2_validation_error) result 481(** [validate_h2_user_headers headers] validates user-provided headers for 482 HTTP/2. 483 484 Unlike {!validate_h2_request}, this validates headers {i before} 485 pseudo-headers are added by the HTTP/2 layer. Use this in the HTTP adapter. 486 487 Per RFC 9113 Section 8.2.2 and 8.3, validates: 488 - No pseudo-headers (user should not provide them) 489 - No uppercase letters in header names 490 - No connection-specific headers (Connection, Keep-Alive, etc.) 491 - TE header only contains "trailers" if present. *) 492 493(** {2 HTTP/2 Forbidden Headers} 494 495 Per RFC 9113 Section 8.2.2, certain headers are connection-specific and MUST 496 NOT appear in HTTP/2. *) 497 498val h2_forbidden_headers : Header_name.t list 499(** Headers that MUST NOT appear in HTTP/2 messages: 500 - Connection 501 - Keep-Alive 502 - Proxy-Connection 503 - Transfer-Encoding 504 - Upgrade. *) 505 506val remove_h2_forbidden : t -> t 507(** [remove_h2_forbidden headers] removes all HTTP/2 forbidden headers. Use this 508 when converting HTTP/1.1 headers for use with HTTP/2. *)