forked from
anil.recoil.org/ocaml-requests
A batteries included HTTP/1.1 client in OCaml
1(*---------------------------------------------------------------------------
2 Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>. All rights reserved.
3 SPDX-License-Identifier: ISC
4 ---------------------------------------------------------------------------*)
5
6(** HTTP header field handling per
7 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5}RFC 9110 Section
8 5}
9
10 This module provides an efficient implementation of HTTP headers with
11 case-insensitive field names per
12 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5.1}RFC 9110
13 Section 5.1}. Headers can have multiple values for the same field name
14 (e.g., Set-Cookie).
15
16 {2 Type-Safe Header Names}
17
18 Header names use the {!Header_name.t} type, providing compile-time safety
19 for standard headers while allowing custom headers via [`Other]:
20
21 {[
22 let headers =
23 Headers.empty
24 |> Headers.set `Content_type "application/json"
25 |> Headers.set `Authorization "Bearer token"
26 |> Headers.set (`Other "X-Custom") "value"
27 ]}
28
29 {2 Security}
30
31 Header names and values are validated to prevent HTTP header injection
32 attacks. CR and LF characters are rejected per
33 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-5.5}RFC 9110
34 Section 5.5}. *)
35
36val src : Logs.Src.t
37(** Log source for header operations. *)
38
39type t
40(** Abstract header collection type. Headers are stored with case-insensitive
41 keys and maintain insertion order. *)
42
43(** {1 Creation and Conversion} *)
44
45val empty : t
46(** [empty] creates an empty header collection. *)
47
48val of_list : (string * string) list -> t
49(** [of_list pairs] creates headers from an association list of string pairs.
50 This is useful when parsing headers from the wire format. Later entries
51 override earlier ones for the same key. *)
52
53val to_list : t -> (string * string) list
54(** [to_list headers] converts headers to an association list. The order of
55 headers is preserved. *)
56
57(** {1 Header Injection Prevention} *)
58
59exception Invalid_header of { name : string; reason : string }
60(** Raised when a header name or value contains invalid characters (CR/LF) that
61 could enable HTTP request smuggling attacks. *)
62
63exception Invalid_basic_auth of { reason : string }
64(** Raised when Basic auth credentials contain invalid characters. Per
65 {{:https://datatracker.ietf.org/doc/html/rfc7617#section-2}RFC 7617 Section
66 2}:
67 - Username must not contain colon characters
68 - Username and password must not contain control characters (0x00-0x1F,
69 0x7F) *)
70
71(** {1 Type-Safe Header Operations}
72
73 These functions use {!Header_name.t} for compile-time type safety. *)
74
75val add : Header_name.t -> string -> t -> t
76(** [add name value headers] adds a header value. Multiple values for the same
77 header name are allowed (e.g., for Set-Cookie).
78
79 @raise Invalid_header
80 if the header value contains CR/LF characters (to prevent HTTP header
81 injection attacks). *)
82
83val set : Header_name.t -> string -> t -> t
84(** [set name value headers] sets a header value, replacing any existing values
85 for that header name.
86
87 @raise Invalid_header
88 if the header value contains CR/LF characters (to prevent HTTP header
89 injection attacks). *)
90
91val find : Header_name.t -> t -> string option
92(** [find name headers] returns the first value for a header name, or [None] if
93 the header doesn't exist. *)
94
95val all : Header_name.t -> t -> string list
96(** [all name headers] returns all values for a header name. Returns an empty
97 list if the header doesn't exist. *)
98
99val remove : Header_name.t -> t -> t
100(** [remove name headers] removes all values for a header name. *)
101
102val mem : Header_name.t -> t -> bool
103(** [mem name headers] checks if a header name exists. *)
104
105(** {1 String-Based Header Operations}
106
107 These functions accept string header names for wire format compatibility.
108 Use these when parsing HTTP messages where header names arrive as strings.
109*)
110
111val add_string : string -> string -> t -> t
112(** [add_string name value headers] adds a header using a string name. Use this
113 when parsing headers from the wire.
114
115 @raise Invalid_header if the header name or value contains CR/LF characters.
116*)
117
118val set_string : string -> string -> t -> t
119(** [set_string name value headers] sets a header using a string name.
120
121 @raise Invalid_header if the header name or value contains CR/LF characters.
122*)
123
124val string : string -> t -> string option
125(** [string name headers] gets a header using a string name. *)
126
127val all_string : string -> t -> string list
128(** [all_string name headers] gets all values for a string header name. *)
129
130val remove_string : string -> t -> t
131(** [remove_string name headers] removes a header using a string name. *)
132
133val mem_string : string -> t -> bool
134(** [mem_string name headers] checks if a header exists using a string name. *)
135
136(** {1 Merging} *)
137
138val merge : t -> t -> t
139(** [merge base override] merges two header collections. Headers from [override]
140 replace those in [base]. *)
141
142(** {1 Common Header Builders}
143
144 Convenience functions for setting common HTTP headers. *)
145
146val content_type : Mime.t -> t -> t
147(** [content_type mime headers] sets the Content-Type header. *)
148
149val content_length : int64 -> t -> t
150(** [content_length length headers] sets the Content-Length header. *)
151
152val accept : Mime.t -> t -> t
153(** [accept mime headers] sets the Accept header. *)
154
155val accept_language : string -> t -> t
156(** [accept_language lang headers] sets the Accept-Language header. Per
157 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-12.5.4}RFC 9110
158 Section 12.5.4}.
159
160 Examples:
161 {[
162 headers
163 |> Headers.accept_language "en-US" headers
164 |> Headers.accept_language "en-US, en;q=0.9, de;q=0.8" headers
165 |> Headers.accept_language "*"
166 ]} *)
167
168val authorization : string -> t -> t
169(** [authorization value headers] sets the Authorization header with a raw
170 value. *)
171
172val bearer : string -> t -> t
173(** [bearer token headers] sets the Authorization header with a Bearer token.
174 Example: [bearer "abc123"] sets ["Authorization: Bearer abc123"]. *)
175
176val basic : username:string -> password:string -> t -> t
177(** [basic ~username ~password headers] sets the Authorization header with HTTP
178 Basic authentication (base64-encoded username:password).
179
180 @raise Invalid_basic_auth
181 if the username contains a colon character or if either username or
182 password contains control characters (RFC 7617 Section 2). *)
183
184val user_agent : string -> t -> t
185(** [user_agent ua headers] sets the User-Agent header. *)
186
187val host : string -> t -> t
188(** [host hostname headers] sets the Host header. *)
189
190val cookie : string -> string -> t -> t
191(** [cookie name value headers] adds a cookie to the Cookie header. Multiple
192 cookies can be added by calling this function multiple times. *)
193
194val range : start:int64 -> ?end_:int64 -> unit -> t -> t
195(** [range ~start ?end_ () headers] sets the Range header for partial content.
196 Example: [range ~start:0L ~end_:999L ()] requests the first 1000 bytes. *)
197
198(** {1 HTTP 100-Continue Support}
199
200 Per Recommendation #7: Expect: 100-continue protocol for large uploads. RFC
201 9110 Section 10.1.1 (Expect) *)
202
203val expect : string -> t -> t
204(** [expect value headers] sets the Expect header. Example:
205 [expect "100-continue"] for large request bodies. *)
206
207val expect_100_continue : t -> t
208(** [expect_100_continue headers] sets [Expect: 100-continue]. Use this for
209 large uploads to allow the server to reject the request before the body is
210 sent, saving bandwidth. *)
211
212(** {1 TE Header Support}
213
214 Per RFC 9110 Section 10.1.4: The TE header indicates what transfer codings
215 the client is willing to accept in the response, and whether the client is
216 willing to accept trailer fields in a chunked transfer coding. *)
217
218val te : string -> t -> t
219(** [te value headers] sets the TE header to indicate accepted transfer codings.
220 Example: [te "trailers, deflate"]. *)
221
222val te_trailers : t -> t
223(** [te_trailers headers] sets [TE: trailers] to indicate the client accepts
224 trailer fields in chunked transfer coding. Per RFC 9110 Section 10.1.4, a
225 client MUST send this if it wishes to receive trailers. *)
226
227(** {1 Cache Control Headers}
228
229 Per Recommendation #17 and #19: Response caching and conditional requests.
230 RFC 9111 (HTTP Caching), RFC 9110 Section 8.8.2-8.8.3 (Last-Modified, ETag)
231*)
232
233val if_none_match : string -> t -> t
234(** [if_none_match etag headers] sets the If-None-Match header for conditional
235 requests. The request succeeds only if the resource's ETag does NOT match.
236 Used with GET/HEAD to implement efficient caching (returns 304 Not Modified
237 if matches). *)
238
239val if_match : string -> t -> t
240(** [if_match etag headers] sets the If-Match header for conditional requests.
241 The request succeeds only if the resource's ETag matches. Used with
242 PUT/DELETE for optimistic concurrency (prevents lost updates). *)
243
244val if_modified_since : string -> t -> t
245(** [if_modified_since date headers] sets the If-Modified-Since header. The date
246 should be in HTTP-date format (RFC 9110 Section 5.6.7). Example:
247 ["Sun, 06 Nov 1994 08:49:37 GMT"]. *)
248
249val if_unmodified_since : string -> t -> t
250(** [if_unmodified_since date headers] sets the If-Unmodified-Since header. The
251 request succeeds only if the resource has NOT been modified since the date.
252*)
253
254val http_date_of_ptime : Ptime.t -> string
255(** [http_date_of_ptime time] formats a Ptime.t as an HTTP-date. Format: "Sun,
256 06 Nov 1994 08:49:37 GMT" (RFC 9110 Section 5.6.7). *)
257
258val if_modified_since_ptime : Ptime.t -> t -> t
259(** [if_modified_since_ptime time headers] sets If-Modified-Since using a
260 Ptime.t value. *)
261
262val if_unmodified_since_ptime : Ptime.t -> t -> t
263(** [if_unmodified_since_ptime time headers] sets If-Unmodified-Since using a
264 Ptime.t value. *)
265
266val cache_control : string -> t -> t
267(** [cache_control directives headers] sets the Cache-Control header with a raw
268 directive string. Example: [cache_control "no-cache, max-age=3600"]. *)
269
270val cache_control_directives :
271 ?max_age:int ->
272 ?max_stale:int option option ->
273 ?min_fresh:int ->
274 ?no_cache:bool ->
275 ?no_store:bool ->
276 ?no_transform:bool ->
277 ?only_if_cached:bool ->
278 unit ->
279 t ->
280 t
281(** [cache_control_directives ?max_age ?max_stale ?min_fresh ~no_cache ~no_store
282 ~no_transform ~only_if_cached () headers] builds a Cache-Control header
283 from individual directives (RFC 9111 request directives).
284
285 - [max_age]: Maximum age in seconds the client is willing to accept
286 - [max_stale]: Accept stale responses:
287 - [None]: omit max_stale entirely
288 - [Some None]: "max-stale" (accept any staleness)
289 - [Some (Some n)]: "max-stale=N" (accept n seconds staleness)
290 - [min_fresh]: Response must be fresh for at least n more seconds
291 - [no_cache]: Force revalidation with origin server
292 - [no_store]: Response must not be stored in cache
293 - [no_transform]: Intermediaries must not transform the response
294 - [only_if_cached]: Only return cached response, 504 if not available. *)
295
296val etag : string -> t -> t
297(** [etag value headers] sets the ETag header (for responses). Example:
298 [etag "\"abc123\""]. *)
299
300val last_modified : string -> t -> t
301(** [last_modified date headers] sets the Last-Modified header (for responses).
302 The date should be in HTTP-date format. *)
303
304val last_modified_ptime : Ptime.t -> t -> t
305(** [last_modified_ptime time headers] sets Last-Modified using a Ptime.t value.
306*)
307
308(** {1 Connection Header Handling}
309
310 Per
311 {{:https://datatracker.ietf.org/doc/html/rfc9110#section-7.6.1}RFC 9110
312 Section 7.6.1}: The Connection header field lists hop-by-hop header fields
313 that MUST be removed before forwarding the message. *)
314
315val parse_connection_header : t -> Header_name.t list
316(** [parse_connection_header headers] parses the Connection header value into a
317 list of header names. *)
318
319val hop_by_hop_headers : t -> Header_name.t list
320(** [hop_by_hop_headers headers] returns all hop-by-hop headers. This is the
321 union of {!Header_name.hop_by_hop_headers} and any headers listed in the
322 Connection header. *)
323
324val remove_hop_by_hop : t -> t
325(** [remove_hop_by_hop headers] removes all hop-by-hop headers. This should be
326 called before caching or forwarding a response. Per RFC 9110 Section 7.6.1.
327*)
328
329val connection_close : t -> bool
330(** [connection_close headers] returns [true] if Connection: close is present.
331 This indicates the connection should be closed after the current message. *)
332
333val connection_keep_alive : t -> bool
334(** [connection_keep_alive headers] returns [true] if Connection: keep-alive is
335 present. This is primarily used with HTTP/1.0 to request a persistent
336 connection. *)
337
338(** {1 Aliases} *)
339
340val multi : Header_name.t -> t -> string list
341(** [multi] is an alias for {!all}. *)
342
343val pp : Format.formatter -> t -> unit
344(** Pretty printer for headers. *)
345
346val pp_brief : Format.formatter -> t -> unit
347(** Brief pretty printer showing count only. *)
348
349(** {1 HTTP/2 Pseudo-Header Support}
350
351 HTTP/2 uses pseudo-header fields to convey information that was previously
352 carried in the request line (HTTP/1.1) or status line. Pseudo-headers start
353 with a colon character ([:]).
354
355 Per
356 {{:https://datatracker.ietf.org/doc/html/rfc9113#section-8.3}RFC 9113
357 Section 8.3}:
358 - Pseudo-headers MUST appear before regular headers
359 - Pseudo-headers MUST NOT appear in trailers
360 - Unknown pseudo-headers MUST be treated as malformed
361
362 {2 Request Pseudo-Headers}
363
364 - [:method] - HTTP method (required)
365 - [:scheme] - URI scheme (required for non-CONNECT)
366 - [:authority] - Authority portion of URI (host:port)
367 - [:path] - Path and query (required for non-CONNECT)
368
369 {2 Response Pseudo-Headers}
370
371 - [:status] - HTTP status code (required) *)
372
373val is_pseudo_header : string -> bool
374(** [is_pseudo_header name] returns [true] if the header name starts with [:].
375 Per RFC 9113 Section 8.3, pseudo-headers are identified by a colon prefix.
376*)
377
378val pseudo : string -> t -> string option
379(** [pseudo name headers] retrieves a pseudo-header value. The [name] should NOT
380 include the colon prefix. Example: [pseudo "method" headers] retrieves
381 [:method]. *)
382
383val set_pseudo : string -> string -> t -> t
384(** [set_pseudo name value headers] sets a pseudo-header value. The [name]
385 should NOT include the colon prefix. Pseudo-headers are stored with the
386 colon prefix internally. Example: [set_pseudo "method" "GET" headers] sets
387 [:method: GET].
388
389 @raise Invalid_header if the value contains CR/LF characters. *)
390
391val remove_pseudo : string -> t -> t
392(** [remove_pseudo name headers] removes a pseudo-header. The [name] should NOT
393 include the colon prefix. *)
394
395val mem_pseudo : string -> t -> bool
396(** [mem_pseudo name headers] returns [true] if the pseudo-header exists. The
397 [name] should NOT include the colon prefix. *)
398
399val has_pseudo_headers : t -> bool
400(** [has_pseudo_headers headers] returns [true] if any pseudo-headers are
401 present. *)
402
403val pseudo_headers : t -> (string * string) list
404(** [pseudo_headers headers] returns all pseudo-headers as [(name, value)]
405 pairs. Names are returned WITHOUT the colon prefix. *)
406
407val regular_headers : t -> (string * string) list
408(** [regular_headers headers] returns all non-pseudo headers as [(name, value)]
409 pairs. *)
410
411val to_list_ordered : t -> (string * string) list
412(** [to_list_ordered headers] returns all headers with pseudo-headers first,
413 followed by regular headers, as required by RFC 9113 Section 8.3. *)
414
415(** {2 HTTP/2 Request Header Construction} *)
416
417val h2_request :
418 meth:string -> scheme:string -> ?authority:string -> path:string -> t -> t
419(** [h2_request ~meth ~scheme ?authority ~path headers] sets the required HTTP/2
420 request pseudo-headers.
421
422 Per RFC 9113 Section 8.3.1:
423 - [:method] is required
424 - [:scheme] is required (except for CONNECT)
425 - [:path] is required (except for CONNECT, OPTIONS with empty path)
426 - [:authority] is optional but recommended
427
428 Example:
429 {[
430 Headers.empty
431 |> Headers.h2_request ~meth:"GET" ~scheme:"https" ~authority:"example.com"
432 ~path:"/"
433 |> Headers.set `Accept "application/json"
434 ]} *)
435
436(** {2 HTTP/2 Header Validation}
437
438 Per
439 {{:https://datatracker.ietf.org/doc/html/rfc9113#section-8.2}RFC 9113
440 Section 8.2}. *)
441
442type h2_validation_error =
443 | Missing_pseudo of string (** Required pseudo-header is missing *)
444 | Invalid_pseudo of string (** Unknown or misplaced pseudo-header *)
445 | Pseudo_after_regular (** Pseudo-header appeared after regular header *)
446 | Invalid_header_name of string
447 (** Header name contains invalid characters *)
448 | Uppercase_header_name of string
449 (** Header name contains uppercase (forbidden in HTTP/2) *)
450 | Connection_header_forbidden
451 (** Connection-specific headers are forbidden in HTTP/2 *)
452 | Te_header_invalid (** TE header with value other than "trailers" *)
453
454val pp_h2_validation_error : Format.formatter -> h2_validation_error -> unit
455(** Pretty printer for validation errors. *)
456
457val validate_h2_request : t -> (unit, h2_validation_error) result
458(** [validate_h2_request headers] validates headers for HTTP/2 request
459 constraints.
460
461 Per RFC 9113 Section 8.3.1, validates:
462 - Required pseudo-headers are present ([:method], [:scheme], [:path])
463 - No unknown pseudo-headers
464 - Pseudo-headers appear before regular headers
465 - No uppercase letters in header names
466 - No connection-specific headers (Connection, Keep-Alive, etc.)
467 - TE header only contains "trailers" if present. *)
468
469val validate_h2_response : t -> (unit, h2_validation_error) result
470(** [validate_h2_response headers] validates headers for HTTP/2 response
471 constraints.
472
473 Per RFC 9113 Section 8.3.2, validates:
474 - [:status] pseudo-header is present
475 - No other pseudo-headers
476 - Pseudo-headers appear before regular headers
477 - No uppercase letters in header names
478 - No connection-specific headers. *)
479
480val validate_h2_user_headers : t -> (unit, h2_validation_error) result
481(** [validate_h2_user_headers headers] validates user-provided headers for
482 HTTP/2.
483
484 Unlike {!validate_h2_request}, this validates headers {i before}
485 pseudo-headers are added by the HTTP/2 layer. Use this in the HTTP adapter.
486
487 Per RFC 9113 Section 8.2.2 and 8.3, validates:
488 - No pseudo-headers (user should not provide them)
489 - No uppercase letters in header names
490 - No connection-specific headers (Connection, Keep-Alive, etc.)
491 - TE header only contains "trailers" if present. *)
492
493(** {2 HTTP/2 Forbidden Headers}
494
495 Per RFC 9113 Section 8.2.2, certain headers are connection-specific and MUST
496 NOT appear in HTTP/2. *)
497
498val h2_forbidden_headers : Header_name.t list
499(** Headers that MUST NOT appear in HTTP/2 messages:
500 - Connection
501 - Keep-Alive
502 - Proxy-Connection
503 - Transfer-Encoding
504 - Upgrade. *)
505
506val remove_h2_forbidden : t -> t
507(** [remove_h2_forbidden headers] removes all HTTP/2 forbidden headers. Use this
508 when converting HTTP/1.1 headers for use with HTTP/2. *)