A batteries included HTTP/1.1 client in OCaml
at main 665 lines 22 kB view raw
1(*--------------------------------------------------------------------------- 2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved. 3 SPDX-License-Identifier: ISC 4 ---------------------------------------------------------------------------*) 5 6let src = Logs.Src.create "requests.headers" ~doc:"HTTP Headers" 7 8module Log = (val Logs.src_log src : Logs.LOG) 9 10(* Use a map with lowercase keys for case-insensitive lookup *) 11module String_map = Map.Make (String) 12 13type t = (string * string list) String_map.t 14(** The internal representation stores: (canonical_name, values) *) 15 16let empty = String_map.empty 17 18(** {1 Header Injection Prevention} 19 20 Per Recommendation #3: Validate that header names and values do not contain 21 newlines (CR/LF) which could enable HTTP request smuggling attacks. 22 23 Note: We use Invalid_argument here to avoid a dependency cycle with Error 24 module. The error will be caught and wrapped appropriately by higher-level 25 code. *) 26 27exception Invalid_header of { name : string; reason : string } 28 29(** {1 Basic Auth Credential Validation} 30 31 Per RFC 7617 Section 2: 32 - Username must not contain a colon character 33 - Neither username nor password may contain control characters (0x00-0x1F, 34 0x7F) *) 35 36exception Invalid_basic_auth of { reason : string } 37 38let contains_control_chars s = 39 String.exists 40 (fun c -> 41 let code = Char.code c in 42 code <= 0x1F || code = 0x7F) 43 s 44 45let validate_basic_auth_credentials ~username ~password = 46 (* RFC 7617 Section 2: "a user-id containing a colon character is invalid" *) 47 if String.contains username ':' then 48 raise 49 (Invalid_basic_auth 50 { reason = "Username contains colon character (RFC 7617 Section 2)" }); 51 (* RFC 7617 Section 2: "The user-id and password MUST NOT contain any control characters" *) 52 if contains_control_chars username then 53 raise 54 (Invalid_basic_auth 55 { 56 reason = "Username contains control characters (RFC 7617 Section 2)"; 57 }); 58 if contains_control_chars password then 59 raise 60 (Invalid_basic_auth 61 { 62 reason = "Password contains control characters (RFC 7617 Section 2)"; 63 }) 64 65let validate_header_name_str name = 66 if String.contains name '\r' || String.contains name '\n' then 67 raise 68 (Invalid_header 69 { 70 name; 71 reason = 72 "Header name contains CR/LF characters (potential HTTP smuggling)"; 73 }) 74 75let validate_header_value name value = 76 if String.contains value '\r' || String.contains value '\n' then 77 raise 78 (Invalid_header 79 { 80 name; 81 reason = 82 "Header value contains CR/LF characters (potential HTTP smuggling)"; 83 }) 84 85(** {1 Core Operations with Typed Header Names} *) 86 87let add (name : Header_name.t) value t = 88 (* Store header names in lowercase for HTTP/2 compatibility. 89 HTTP/1.x headers are case-insensitive per RFC 9110. *) 90 let canonical = Header_name.to_lowercase_string name in 91 let nkey = canonical in 92 validate_header_value canonical value; 93 let existing = 94 match String_map.find_opt nkey t with 95 | Some (_, values) -> values 96 | None -> [] 97 in 98 (* Append to maintain order, avoiding reversal on retrieval *) 99 String_map.add nkey (canonical, existing @ [ value ]) t 100 101let set (name : Header_name.t) value t = 102 (* Store header names in lowercase for HTTP/2 compatibility. 103 HTTP/1.x headers are case-insensitive per RFC 9110. *) 104 let canonical = Header_name.to_lowercase_string name in 105 let nkey = canonical in 106 validate_header_value canonical value; 107 String_map.add nkey (canonical, [ value ]) t 108 109let find (name : Header_name.t) t = 110 let nkey = Header_name.to_lowercase_string name in 111 match String_map.find_opt nkey t with 112 | Some (_, values) -> List.nth_opt values 0 113 | None -> None 114 115let all (name : Header_name.t) t = 116 let nkey = Header_name.to_lowercase_string name in 117 match String_map.find_opt nkey t with 118 | Some (_, values) -> values 119 | None -> [] 120 121let remove (name : Header_name.t) t = 122 let nkey = Header_name.to_lowercase_string name in 123 String_map.remove nkey t 124 125let mem (name : Header_name.t) t = 126 let nkey = Header_name.to_lowercase_string name in 127 String_map.mem nkey t 128 129(** {1 String-based Operations for Wire Format Compatibility} 130 131 These are used internally when parsing HTTP messages from the wire, where 132 header names come as strings. *) 133 134let add_string key value t = 135 validate_header_name_str key; 136 validate_header_value key value; 137 let nkey = String.lowercase_ascii key in 138 let existing = 139 match String_map.find_opt nkey t with 140 | Some (_, values) -> values 141 | None -> [] 142 in 143 String_map.add nkey (key, existing @ [ value ]) t 144 145let set_string key value t = 146 validate_header_name_str key; 147 validate_header_value key value; 148 let nkey = String.lowercase_ascii key in 149 String_map.add nkey (key, [ value ]) t 150 151let string key t = 152 let nkey = String.lowercase_ascii key in 153 match String_map.find_opt nkey t with 154 | Some (_, values) -> List.nth_opt values 0 155 | None -> None 156 157let all_string key t = 158 let nkey = String.lowercase_ascii key in 159 match String_map.find_opt nkey t with 160 | Some (_, values) -> values 161 | None -> [] 162 163let remove_string key t = 164 let nkey = String.lowercase_ascii key in 165 String_map.remove nkey t 166 167let mem_string key t = 168 let nkey = String.lowercase_ascii key in 169 String_map.mem nkey t 170 171(** {1 Conversion} *) 172 173let of_list lst = 174 List.fold_left (fun acc (k, v) -> add_string k v acc) empty lst 175 176let to_list t = 177 String_map.fold 178 (fun _ (orig_key, values) acc -> 179 (* Values are already in correct order, build list in reverse then reverse at end *) 180 List.fold_left (fun acc v -> (orig_key, v) :: acc) acc values) 181 t [] 182 |> List.rev 183 184let merge t1 t2 = String_map.union (fun _ _ v2 -> Some v2) t1 t2 185 186(** {1 Common Header Builders} *) 187 188let content_type mime t = set `Content_type (Mime.to_string mime) t 189let content_length len t = set `Content_length (Int64.to_string len) t 190let accept mime t = set `Accept (Mime.to_string mime) t 191let accept_language lang t = set `Accept_language lang t 192let authorization value t = set `Authorization value t 193let bearer token t = set `Authorization (Fmt.str "Bearer %s" token) t 194 195let basic ~username ~password t = 196 validate_basic_auth_credentials ~username ~password; 197 let credentials = Fmt.str "%s:%s" username password in 198 let encoded = Base64.encode_exn credentials in 199 set `Authorization (Fmt.str "Basic %s" encoded) t 200 201let user_agent ua t = set `User_agent ua t 202let host h t = set `Host h t 203let cookie name value t = add `Cookie (Fmt.str "%s=%s" name value) t 204 205let range ~start ?end_ () t = 206 let range_value = 207 match end_ with 208 | None -> Fmt.str "bytes=%Ld-" start 209 | Some e -> Fmt.str "bytes=%Ld-%Ld" start e 210 in 211 set `Range range_value t 212 213(** {1 HTTP 100-Continue Support} 214 215 Per Recommendation #7: Expect: 100-continue protocol for large uploads. RFC 216 9110 Section 10.1.1 (Expect) *) 217 218let expect value t = set `Expect value t 219let expect_100_continue t = set `Expect "100-continue" t 220 221(** {1 TE Header Support} 222 223 Per RFC 9110 Section 10.1.4: The TE header indicates what transfer codings 224 the client is willing to accept in the response, and whether the client is 225 willing to accept trailer fields in a chunked transfer coding. *) 226 227let te value t = set `Te value t 228let te_trailers t = set `Te "trailers" t 229 230(** {1 Cache Control Headers} 231 232 Per Recommendation #17 and #19: Response caching and conditional requests. 233 RFC 9111 (HTTP Caching), RFC 9110 Section 8.8.2-8.8.3 (Last-Modified, ETag) 234*) 235 236let if_none_match etag t = set `If_none_match etag t 237let if_match etag t = set `If_match etag t 238let if_modified_since date t = set `If_modified_since date t 239let if_unmodified_since date t = set `If_unmodified_since date t 240 241(** Format a Ptime.t as an HTTP-date (RFC 9110 Section 5.6.7) *) 242let http_date_of_ptime time = 243 (* HTTP-date format: "Sun, 06 Nov 1994 08:49:37 GMT" *) 244 let (year, month, day), ((hour, min, sec), _tz_offset) = 245 Ptime.to_date_time time 246 in 247 let weekday = 248 match Ptime.weekday time with 249 | `Sun -> "Sun" 250 | `Mon -> "Mon" 251 | `Tue -> "Tue" 252 | `Wed -> "Wed" 253 | `Thu -> "Thu" 254 | `Fri -> "Fri" 255 | `Sat -> "Sat" 256 in 257 let month_name = 258 [| 259 ""; 260 "Jan"; 261 "Feb"; 262 "Mar"; 263 "Apr"; 264 "May"; 265 "Jun"; 266 "Jul"; 267 "Aug"; 268 "Sep"; 269 "Oct"; 270 "Nov"; 271 "Dec"; 272 |].(month) 273 in 274 Fmt.str "%s, %02d %s %04d %02d:%02d:%02d GMT" weekday day month_name year hour 275 min sec 276 277let if_modified_since_ptime time t = 278 if_modified_since (http_date_of_ptime time) t 279 280let if_unmodified_since_ptime time t = 281 if_unmodified_since (http_date_of_ptime time) t 282 283let cache_control directives t = set `Cache_control directives t 284 285(** Build Cache-Control header from common directive components. For max_stale: 286 [None] = not present, [Some None] = any staleness, [Some (Some n)] = n 287 seconds *) 288let cache_control_directives : 289 ?max_age:int -> 290 ?max_stale:int option option -> 291 ?min_fresh:int -> 292 ?no_cache:bool -> 293 ?no_store:bool -> 294 ?no_transform:bool -> 295 ?only_if_cached:bool -> 296 unit -> 297 t -> 298 t = 299 fun ?max_age ?max_stale ?min_fresh ?(no_cache = false) ?(no_store = false) 300 ?(no_transform = false) ?(only_if_cached = false) () t -> 301 let directives = [] in 302 let directives = 303 match max_age with 304 | Some age -> Fmt.str "max-age=%d" age :: directives 305 | None -> directives 306 in 307 let directives = 308 match max_stale with 309 | Some (Some None) -> "max-stale" :: directives 310 | Some (Some (Some secs)) -> Fmt.str "max-stale=%d" secs :: directives 311 | Some None | None -> directives 312 in 313 let directives = 314 match min_fresh with 315 | Some secs -> Fmt.str "min-fresh=%d" secs :: directives 316 | None -> directives 317 in 318 let directives = if no_cache then "no-cache" :: directives else directives in 319 let directives = if no_store then "no-store" :: directives else directives in 320 let directives = 321 if no_transform then "no-transform" :: directives else directives 322 in 323 let directives = 324 if only_if_cached then "only-if-cached" :: directives else directives 325 in 326 match directives with 327 | [] -> t 328 | _ -> set `Cache_control (String.concat ", " (List.rev directives)) t 329 330let etag value t = set `Etag value t 331let last_modified date t = set `Last_modified date t 332let last_modified_ptime time t = last_modified (http_date_of_ptime time) t 333 334(* Additional helper for getting multiple header values *) 335let multi name t = all name t 336 337(** {1 Connection Header Handling} 338 339 Per RFC 9110 Section 7.6.1: The Connection header field lists hop-by-hop 340 header fields that MUST be removed before forwarding the message. *) 341 342(** Parse Connection header value into list of header names. The Connection 343 header lists additional hop-by-hop headers. *) 344let parse_connection_header t = 345 match find `Connection t with 346 | None -> [] 347 | Some value -> 348 String.split_on_char ',' value 349 |> List.map (fun s -> Header_name.of_string (String.trim s)) 350 |> List.filter (fun n -> not (Header_name.equal n (`Other ""))) 351 352(** Get all hop-by-hop headers from a response. Returns the union of default 353 hop-by-hop headers and any headers listed in the Connection header. *) 354let hop_by_hop_headers t = 355 let connection_headers = parse_connection_header t in 356 Header_name.hop_by_hop_headers @ connection_headers 357 |> List.sort_uniq Header_name.compare 358 359(** Remove hop-by-hop headers from a header collection. This should be called 360 before caching or forwarding a response. Per RFC 9110 Section 7.6.1. *) 361let remove_hop_by_hop t = 362 let hop_by_hop = hop_by_hop_headers t in 363 List.fold_left (fun headers name -> remove name headers) t hop_by_hop 364 365(** Check if a response indicates the connection should be closed. Returns true 366 if Connection: close is present. *) 367let connection_close t = 368 match find `Connection t with 369 | Some value -> 370 String.split_on_char ',' value 371 |> List.exists (fun s -> String.trim (String.lowercase_ascii s) = "close") 372 | None -> false 373 374(** Check if a response indicates the connection should be kept alive. Returns 375 true if Connection: keep-alive is present (HTTP/1.0 behavior). *) 376let connection_keep_alive t = 377 match find `Connection t with 378 | Some value -> 379 String.split_on_char ',' value 380 |> List.exists (fun s -> 381 String.trim (String.lowercase_ascii s) = "keep-alive") 382 | None -> false 383 384(* Pretty printer for headers *) 385let pp ppf t = 386 Fmt.pf ppf "@[<v>Headers:@,"; 387 let headers = to_list t in 388 List.iter (fun (k, v) -> Fmt.pf ppf " %s: %s@," k v) headers; 389 Fmt.pf ppf "@]" 390 391let pp_brief ppf t = 392 let headers = to_list t in 393 let count = List.length headers in 394 Fmt.pf ppf "Headers(%d entries)" count 395 396(** {1 HTTP/2 Pseudo-Header Support} 397 398 Per 399 {{:https://datatracker.ietf.org/doc/html/rfc9113#section-8.3}RFC 9113 400 Section 8.3}. *) 401 402let is_pseudo_header name = String.length name > 0 && name.[0] = ':' 403 404let pseudo name t = 405 let key = ":" ^ name in 406 string key t 407 408let set_pseudo name value t = 409 let key = ":" ^ name in 410 set_string key value t 411 412let remove_pseudo name t = 413 let key = ":" ^ name in 414 remove_string key t 415 416let mem_pseudo name t = 417 let key = ":" ^ name in 418 mem_string key t 419 420let has_pseudo_headers t = 421 String_map.exists (fun key _ -> String.length key > 0 && key.[0] = ':') t 422 423let pseudo_headers t = 424 String_map.fold 425 (fun key (orig_key, values) acc -> 426 if is_pseudo_header key then 427 (* Remove the colon prefix for the returned name *) 428 let name = String.sub orig_key 1 (String.length orig_key - 1) in 429 List.fold_left (fun acc v -> (name, v) :: acc) acc values 430 else acc) 431 t [] 432 |> List.rev 433 434let regular_headers t = 435 String_map.fold 436 (fun key (orig_key, values) acc -> 437 if not (is_pseudo_header key) then 438 List.fold_left (fun acc v -> (orig_key, v) :: acc) acc values 439 else acc) 440 t [] 441 |> List.rev 442 443let to_list_ordered t = 444 (* RFC 9113 Section 8.3: pseudo-headers MUST appear before regular headers *) 445 let pseudos = 446 String_map.fold 447 (fun key (orig_key, values) acc -> 448 if is_pseudo_header key then 449 List.fold_left (fun acc v -> (orig_key, v) :: acc) acc values 450 else acc) 451 t [] 452 |> List.rev 453 in 454 let regulars = 455 String_map.fold 456 (fun key (orig_key, values) acc -> 457 if not (is_pseudo_header key) then 458 List.fold_left (fun acc v -> (orig_key, v) :: acc) acc values 459 else acc) 460 t [] 461 |> List.rev 462 in 463 pseudos @ regulars 464 465let h2_request ~meth ~scheme ?authority ~path t = 466 let t = set_pseudo "method" meth t in 467 let t = set_pseudo "scheme" scheme t in 468 let t = 469 match authority with 470 | Some auth -> set_pseudo "authority" auth t 471 | None -> t 472 in 473 set_pseudo "path" path t 474 475(** {2 HTTP/2 Header Validation} *) 476 477type h2_validation_error = 478 | Missing_pseudo of string 479 | Invalid_pseudo of string 480 | Pseudo_after_regular 481 | Invalid_header_name of string 482 | Uppercase_header_name of string 483 | Connection_header_forbidden 484 | Te_header_invalid 485 486let pp_h2_validation_error ppf = function 487 | Missing_pseudo name -> Fmt.pf ppf "Missing required pseudo-header: :%s" name 488 | Invalid_pseudo name -> 489 Fmt.pf ppf "Invalid or unknown pseudo-header: :%s" name 490 | Pseudo_after_regular -> 491 Fmt.pf ppf "Pseudo-header appeared after regular header" 492 | Invalid_header_name name -> Fmt.pf ppf "Invalid header name: %s" name 493 | Uppercase_header_name name -> 494 Fmt.pf ppf "Header name contains uppercase (forbidden in HTTP/2): %s" name 495 | Connection_header_forbidden -> 496 Fmt.pf ppf "Connection-specific header forbidden in HTTP/2" 497 | Te_header_invalid -> 498 Fmt.pf ppf "TE header must only contain 'trailers' in HTTP/2" 499 500(** HTTP/2 forbidden headers per RFC 9113 Section 8.2.2 *) 501let h2_forbidden_headers : Header_name.t list = 502 [ 503 `Connection; 504 `Keep_alive; 505 `Other "Proxy-Connection"; 506 `Transfer_encoding; 507 `Upgrade; 508 ] 509 510let remove_h2_forbidden t = 511 List.fold_left 512 (fun headers name -> remove name headers) 513 t h2_forbidden_headers 514 515(** Check if a string contains uppercase ASCII letters *) 516let contains_uppercase s = String.exists (fun c -> c >= 'A' && c <= 'Z') s 517 518(** Valid request pseudo-headers per RFC 9113 Section 8.3.1 *) 519let valid_request_pseudos = 520 [ ":method"; ":scheme"; ":authority"; ":path"; ":protocol" ] 521 522(** Valid response pseudo-headers per RFC 9113 Section 8.3.2 *) 523let valid_response_pseudos = [ ":status" ] 524 525let rec check_pseudo_order seen_regular = function 526 | [] -> Ok () 527 | (name, _) :: rest -> 528 if is_pseudo_header name then 529 if seen_regular then Error Pseudo_after_regular 530 else check_pseudo_order false rest 531 else check_pseudo_order true rest 532 533let check_h2_request_pseudos t headers_list is_connect = 534 let has_protocol = mem_pseudo "protocol" t in 535 if not (mem_pseudo "method" t) then Error (Missing_pseudo "method") 536 else if has_protocol && not is_connect then 537 Error (Invalid_pseudo "protocol (requires CONNECT method)") 538 else if (not is_connect) && not (mem_pseudo "scheme" t) then 539 Error (Missing_pseudo "scheme") 540 else if (not is_connect) && not (mem_pseudo "path" t) then 541 Error (Missing_pseudo "path") 542 else 543 match 544 List.find_opt 545 (fun (name, _) -> 546 is_pseudo_header name && not (List.mem name valid_request_pseudos)) 547 headers_list 548 with 549 | Some (name, _) -> 550 Error (Invalid_pseudo (String.sub name 1 (String.length name - 1))) 551 | None -> Ok () 552 553let check_h2_regular_headers t headers_list = 554 match 555 List.find_opt 556 (fun (name, _) -> 557 (not (is_pseudo_header name)) && contains_uppercase name) 558 headers_list 559 with 560 | Some (name, _) -> Error (Uppercase_header_name name) 561 | None -> ( 562 if List.exists (fun h -> mem h t) h2_forbidden_headers then 563 Error Connection_header_forbidden 564 else 565 match find `Te t with 566 | Some te when String.lowercase_ascii (String.trim te) <> "trailers" -> 567 Error Te_header_invalid 568 | _ -> Ok ()) 569 570let validate_h2_request t = 571 let headers_list = to_list t in 572 match check_pseudo_order false headers_list with 573 | Error e -> Error e 574 | Ok () -> ( 575 let is_connect = pseudo "method" t = Some "CONNECT" in 576 match check_h2_request_pseudos t headers_list is_connect with 577 | Error e -> Error e 578 | Ok () -> check_h2_regular_headers t headers_list) 579 580let validate_h2_response t = 581 let headers_list = to_list t in 582 583 (* Check ordering: pseudo-headers must come before regular headers *) 584 let rec check_order seen_regular = function 585 | [] -> Ok () 586 | (name, _) :: rest -> 587 if is_pseudo_header name then 588 if seen_regular then Error Pseudo_after_regular 589 else check_order false rest 590 else check_order true rest 591 in 592 593 match check_order false headers_list with 594 | Error e -> Error e 595 | Ok () -> ( 596 if 597 (* Check for required :status pseudo-header *) 598 not (mem_pseudo "status" t) 599 then Error (Missing_pseudo "status") 600 else 601 (* Check all pseudo-headers are valid (only :status allowed) *) 602 let invalid_pseudo = 603 List.find_opt 604 (fun (name, _) -> 605 is_pseudo_header name 606 && not (List.mem name valid_response_pseudos)) 607 headers_list 608 in 609 match invalid_pseudo with 610 | Some (name, _) -> 611 let name_without_colon = 612 String.sub name 1 (String.length name - 1) 613 in 614 Error (Invalid_pseudo name_without_colon) 615 | None -> ( 616 (* Check for uppercase in regular header names *) 617 let uppercase_header = 618 List.find_opt 619 (fun (name, _) -> 620 (not (is_pseudo_header name)) && contains_uppercase name) 621 headers_list 622 in 623 match uppercase_header with 624 | Some (name, _) -> Error (Uppercase_header_name name) 625 | None -> 626 (* Check for forbidden connection-specific headers *) 627 let has_forbidden = 628 List.exists (fun h -> mem h t) h2_forbidden_headers 629 in 630 if has_forbidden then Error Connection_header_forbidden 631 else Ok ())) 632 633let validate_h2_user_headers t = 634 (* Validate user-provided headers for HTTP/2 (before pseudo-headers are added). 635 Per RFC 9113 Section 8.2.2 and 8.3, validates: 636 - No pseudo-headers (user should not provide them) 637 - No connection-specific headers 638 - TE header only contains "trailers" if present 639 640 Note: We don't reject uppercase header names here because the library 641 internally stores headers with canonical HTTP/1.x names (e.g., "Accept-Encoding"). 642 The h2_adapter lowercases all header names before sending to HTTP/2. *) 643 let headers_list = to_list t in 644 645 (* Check for any pseudo-headers (user should not provide them) *) 646 let pseudo = 647 List.find_opt (fun (name, _) -> is_pseudo_header name) headers_list 648 in 649 match pseudo with 650 | Some (name, _) -> 651 let name_without_colon = String.sub name 1 (String.length name - 1) in 652 Error 653 (Invalid_pseudo 654 (name_without_colon 655 ^ " (user-provided headers must not contain pseudo-headers)")) 656 | None -> ( 657 (* Check for forbidden connection-specific headers *) 658 let has_forbidden = List.exists (fun h -> mem h t) h2_forbidden_headers in 659 if has_forbidden then Error Connection_header_forbidden 660 else 661 (* Check TE header - only "trailers" is allowed *) 662 match find `Te t with 663 | Some te when String.lowercase_ascii (String.trim te) <> "trailers" -> 664 Error Te_header_invalid 665 | _ -> Ok ())