(* buf_read.ml - Buffer type and utilities for HTTP parsing *) type status = | Complete | Partial | Invalid_method | Invalid_target | Invalid_version | Invalid_header | Headers_too_large | Malformed | Content_length_overflow (* Content-Length value too large or invalid *) | Ambiguous_framing (* Both Content-Length and Transfer-Encoding present *) | Bare_cr_detected (* CR without LF - HTTP smuggling attempt *) | Missing_host_header (* HTTP/1.1 requires Host header *) | Unsupported_transfer_encoding (* Transfer-Encoding other than chunked/identity *) let status_to_string = function | Complete -> "Complete" | Partial -> "Partial" | Invalid_method -> "Invalid_method" | Invalid_target -> "Invalid_target" | Invalid_version -> "Invalid_version" | Invalid_header -> "Invalid_header" | Headers_too_large -> "Headers_too_large" | Malformed -> "Malformed" | Content_length_overflow -> "Content_length_overflow" | Ambiguous_framing -> "Ambiguous_framing" | Bare_cr_detected -> "Bare_cr_detected" | Missing_host_header -> "Missing_host_header" | Unsupported_transfer_encoding -> "Unsupported_transfer_encoding" ;; let pp_status fmt t = Stdlib.Format.fprintf fmt "%s" (status_to_string t) open Base let buffer_size = 32768 let max_headers = 32 let create () = Base_bigstring.create buffer_size let peek buf pos = Base_bigstring.unsafe_get buf pos let is_token_char c = match c with | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' -> true | '!' | '#' | '$' | '%' | '&' | '\'' | '*' | '+' | '-' | '.' -> true | '^' | '_' | '`' | '|' | '~' -> true | _ -> false ;; let is_space c = match c with | ' ' | '\t' -> true | _ -> false ;; let to_lower c = match c with | 'A' .. 'Z' -> Char.of_int_exn (Char.to_int c + 32) | _ -> c ;; let find_crlf buf ~pos ~len = if len - pos < 2 then -1 else ( let p = ref pos in let found = ref false in while (not !found) && !p + 1 < len do let search_pos = !p in let search_len = len - !p in let cr_pos = Base_bigstring.unsafe_find buf '\r' ~pos:search_pos ~len:search_len in if cr_pos < 0 || cr_pos >= len - 1 then p := len else if Char.equal (Base_bigstring.unsafe_get buf (cr_pos + 1)) '\n' then ( p := cr_pos; found := true) else p := cr_pos + 1 done; if !found then !p else -1) ;; let pp fmt _t = Stdlib.Format.fprintf fmt "" buffer_size (* Security limits - configurable per-server *) type limits = { max_content_length : int64 (* Default: 100MB *) ; max_header_size : int (* Default: 16KB - size of all headers combined *) ; max_header_count : int (* Default: 100 *) ; max_chunk_size : int (* Default: 16MB *) } let default_limits = { max_content_length = 104857600L (* 100MB *) ; max_header_size = 16384 (* 16KB *) ; max_header_count = 100 ; max_chunk_size = 16777216 (* 16MB *) } (* Detect bare CR (CR not followed by LF) - RFC 7230 Section 3.5 Used to prevent HTTP request smuggling attacks *) let has_bare_cr buf ~pos ~len = let end_pos = pos + len in let p = ref pos in let found = ref false in while (not !found) && !p < end_pos do if Char.equal (peek buf !p) '\r' then ( if !p + 1 >= end_pos || not (Char.equal (peek buf (!p + 1)) '\n') then found := true else p := !p + 2 (* Skip past valid CRLF *) ) else Int.incr p done; !found ;; (* Check if a value contains CRLF injection attempt *) let has_crlf_injection buf ~pos ~len = let end_pos = pos + len in let p = ref pos in let found = ref false in while (not !found) && !p < end_pos do match peek buf !p with | '\r' | '\n' -> found := true | _ -> Int.incr p done; !found ;;