The unpac monorepo manager self-hosting as a monorepo using unpac

Merge pull request #34 from tiash/ensure-progress-on-decode

[Base64_rfc2045.decode] should always progresses

authored by dinosaure.tngl.sh and committed by

GitHub 185cb8e1 f8744314

+139 -67
+45 -66
src/base64_rfc2045.ml
··· 24 24 let invalid_bounds off len = 25 25 invalid_arg "Invalid bounds (off: %d, len: %d)" off len 26 26 27 - let malformed source off pos len = 28 - `Malformed (Bytes.sub_string source (off + pos) len) 27 + let malformed chr = 28 + `Malformed (String.make 1 chr) 29 29 30 30 let unsafe_byte source off pos = Bytes.unsafe_get source (off + pos) 31 31 let unsafe_blit = Bytes.unsafe_blit ··· 54 54 unsafe_set_chr state.buffer 2 55 55 (unsafe_chr ((quantum lsl 6) lor code land 255)) ; 56 56 flush state 57 - | _ -> malformed (Bytes.make 1 chr) 0 0 1 58 - 59 - let r_crlf source off len = 60 - (* assert (0 <= off && 0 <= len && off + len <= String.length source); *) 61 - (* assert (len = 2); *) 62 - match Bytes.sub_string source off len with 63 - | "\r\n" -> `Line_break 64 - | _ -> malformed source off 0 len 57 + | _ -> malformed chr 65 58 66 59 type src = [`Channel of in_channel | `String of string | `Manual] 67 60 ··· 78 71 ; mutable i_pos: int 79 72 ; mutable i_len: int 80 73 ; mutable s: state 81 - ; h: Bytes.t 82 - ; mutable h_len: int 83 - ; mutable h_need: int 84 74 ; mutable padding: int 85 75 ; mutable unsafe: bool 86 76 ; mutable byte_count: int ··· 127 117 if decoder.limit_count > 78 then dangerous decoder true ; 128 118 decoder.pp decoder v 129 119 130 - [@@@warning "-32"] 131 - 132 - let is_b64 = function 133 - | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '+' | '/' -> true 134 - | _ -> false 135 - 136 - let t_need decoder need = 137 - decoder.h_len <- 0 ; 138 - decoder.h_need <- need 139 - 140 - let rec t_fill k decoder = 141 - let blit decoder len = 142 - unsafe_blit decoder.i 143 - (decoder.i_off + decoder.i_pos) 144 - decoder.h decoder.h_len len ; 145 - decoder.i_pos <- decoder.i_pos + len ; 146 - decoder.h_len <- decoder.h_len + len 147 - in 148 - let rem = i_rem decoder in 149 - if rem < 0 (* end of input *) then k decoder 150 - else 151 - let need = decoder.h_need - decoder.h_len in 152 - if rem < need then ( 153 - blit decoder rem ; 154 - refill (t_fill k) decoder ) 155 - else ( blit decoder need ; k decoder ) 156 - 157 120 type flush_and_malformed = [`Flush of state | `Malformed of string] 158 121 159 122 let padding {size; _} padding = ··· 164 127 | 3, 1 -> true 165 128 | _ -> false 166 129 167 - let rec t_crlf decoder = 168 - if decoder.h_len < decoder.h_need then 169 - ret decode_base64 170 - (malformed decoder.h 0 0 decoder.h_len) 171 - decoder.h_len decoder 172 - else 173 - ret decode_base64 (r_crlf decoder.h 0 decoder.h_len) decoder.h_len decoder 174 - 175 - and t_flush {quantum; size; buffer} = 130 + let t_flush {quantum; size; buffer} = 176 131 match size with 177 132 | 0 | 1 -> `Flush {quantum; size; buffer= Bytes.empty} 178 133 | 2 -> ··· 186 141 unsafe_set_chr buffer 0 (unsafe_chr ((quantum lsr 8) land 255)) ; 187 142 unsafe_set_chr buffer 1 (unsafe_chr (quantum land 255)) ; 188 143 `Flush {quantum; size; buffer= Bytes.sub buffer 0 2} 189 - | _ -> malformed buffer 0 0 3 144 + | _ -> assert false (* this branch is impossible, size can only ever be in the range [0..3]. *) 145 + 146 + let wrong_padding decoder = 147 + let k _ = `End in 148 + decoder.k <- k ; `Wrong_padding 190 149 191 - and t_decode_base64 chr decoder = 150 + let rec t_decode_base64 chr decoder = 192 151 if decoder.padding = 0 then 193 152 let rec go pos = function 194 153 | `Continue state -> ··· 197 156 match unsafe_byte decoder.i decoder.i_off (decoder.i_pos + pos) with 198 157 | ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '+' | '/') as chr -> go (succ pos) (r_repr state chr) 199 158 | '=' -> 200 - decoder.i_pos <- decoder.i_pos + pos ; 159 + decoder.padding <- decoder.padding + 1 ; 160 + decoder.i_pos <- decoder.i_pos + pos + 1 ; 201 161 decoder.s <- state ; 202 - ret decode_base64 `Padding pos decoder 162 + ret decode_base64 `Padding (pos+1) decoder 203 163 | ' ' | '\t' -> 204 - decoder.i_pos <- decoder.i_pos + pos ; 164 + decoder.i_pos <- decoder.i_pos + pos + 1 ; 205 165 decoder.s <- state ; 206 - ret decode_base64 `Wsp pos decoder 166 + ret decode_base64 `Wsp (pos + 1) decoder 207 167 | '\r' -> 208 - decoder.i_pos <- decoder.i_pos + pos ; 168 + decoder.i_pos <- decoder.i_pos + pos + 1 ; 209 169 decoder.s <- state ; 210 - t_need decoder 2 ; 211 - t_fill t_crlf decoder 212 - | chr -> malformed (Bytes.make 1 chr) 0 0 1 170 + decode_base64_lf_after_cr decoder 171 + | chr -> 172 + decoder.i_pos <- decoder.i_pos + pos + 1 ; 173 + decoder.s <- state ; 174 + ret decode_base64 (malformed chr) (pos+1) decoder 213 175 ) else ( 214 176 decoder.i_pos <- decoder.i_pos + pos ; 215 177 decoder.byte_count <- decoder.byte_count + pos ; 178 + decoder.limit_count <- decoder.limit_count + pos ; 216 179 decoder.s <- state ; 217 180 refill decode_base64 decoder ) 218 181 | #flush_and_malformed as v -> ··· 220 183 ret decode_base64 v pos decoder 221 184 in 222 185 go 1 (r_repr decoder.s chr) 223 - else malformed (Bytes.make 1 chr) 0 0 1 186 + else ( 187 + decoder.i_pos <- decoder.i_pos + 1 ; 188 + ret decode_base64 (malformed chr) 1 decoder) 189 + 190 + and decode_base64_lf_after_cr decoder = 191 + let rem = i_rem decoder in 192 + if rem < 0 then 193 + ret decode_base64 (malformed '\r') 1 decoder 194 + else if rem = 0 then refill decode_base64_lf_after_cr decoder 195 + else 196 + match unsafe_byte decoder.i decoder.i_off decoder.i_pos with 197 + | '\n' -> 198 + decoder.i_pos <- decoder.i_pos + 1 ; 199 + ret decode_base64 `Line_break 2 decoder 200 + | _ -> 201 + ret decode_base64 (malformed '\r') 1 decoder 224 202 225 203 and decode_base64 decoder = 226 204 let rem = i_rem decoder in ··· 228 206 if rem < 0 then 229 207 ret 230 208 (fun decoder -> 231 - if padding decoder.s decoder.padding then `End else `Wrong_padding ) 209 + if padding decoder.s decoder.padding then `End else wrong_padding decoder ) 232 210 (t_flush decoder.s) 0 decoder 233 211 else refill decode_base64 decoder 234 212 else ··· 242 220 | ' ' | '\t' -> 243 221 decoder.i_pos <- decoder.i_pos + 1 ; 244 222 ret decode_base64 `Wsp 1 decoder 245 - | '\r' -> t_need decoder 2 ; t_fill t_crlf decoder 246 - | chr -> malformed (Bytes.make 1 chr) 0 0 1 223 + | '\r' -> 224 + decoder.i_pos <- decoder.i_pos + 1 ; 225 + decode_base64_lf_after_cr decoder 226 + | chr -> 227 + decoder.i_pos <- decoder.i_pos + 1 ; 228 + ret decode_base64 (malformed chr) 1 decoder 247 229 248 230 let pp_base64 decoder = function 249 231 | `Line_break -> reset decoder ; decoder.k decoder ··· 268 250 ; i_len 269 251 ; i 270 252 ; s= {quantum= 0; size= 0; buffer= Bytes.create 3} 271 - ; h= Bytes.create 2 272 - ; h_len= 0 273 - ; h_need= 0 274 253 ; padding= 0 275 254 ; unsafe= false 276 255 ; byte_count= 0
+1 -1
test/dune
··· 1 1 (executable 2 2 (name test) 3 - (libraries base64 rresult alcotest bos)) 3 + (libraries base64 base64.rfc2045 rresult alcotest bos)) 4 4 5 5 (alias 6 6 (name runtest)
+93
test/test.ml
··· 120 120 Alcotest.(check string) (sprintf "decode %s" r) c (Base64.decode_exn ~pad:false ~off ~len r); 121 121 ) cfcs_tests 122 122 123 + exception Malformed 124 + exception Wrong_padding 123 125 126 + let strict_base64_rfc2045_of_string x = 127 + let decoder = Base64_rfc2045.decoder (`String x) in 128 + let res = Buffer.create 16 in 129 + 130 + let rec go () = match Base64_rfc2045.decode decoder with 131 + | `End -> () 132 + | `Wrong_padding -> raise Wrong_padding 133 + | `Malformed _ -> raise Malformed 134 + | `Flush x -> Buffer.add_string res x ; go () 135 + | `Await -> Alcotest.failf "Retrieve impossible case: `Await" in 136 + 137 + Base64_rfc2045.src decoder (Bytes.unsafe_of_string x) 0 (String.length x) ; 138 + go () ; Buffer.contents res 139 + 140 + let relaxed_base64_rfc2045_of_string x = 141 + let decoder = Base64_rfc2045.decoder (`String x) in 142 + let res = Buffer.create 16 in 143 + 144 + let rec go () = match Base64_rfc2045.decode decoder with 145 + | `End -> () 146 + | `Wrong_padding -> go () 147 + | `Malformed _ -> go () 148 + | `Flush x -> Buffer.add_string res x ; go () 149 + | `Await -> Alcotest.failf "Retrieve impossible case: `Await" in 150 + 151 + Base64_rfc2045.src decoder (Bytes.unsafe_of_string x) 0 (String.length x) ; 152 + go () ; Buffer.contents res 153 + 154 + let test_strict_rfc2045 = 155 + [ "c2FsdXQgbGVzIGNvcGFpbnMgZmF1dCBhYnNvbHVtZW50IHF1ZSBqZSBkw6lwYXNzZSBsZXMgODAg\r\n\ 156 + Y2hhcmFjdGVycyBwb3VyIHZvaXIgc2kgbW9uIGVuY29kZXIgZml0cyBiaWVuIGRhbnMgbGVzIGxp\r\n\ 157 + bWl0ZXMgZGUgbGEgUkZDIDIwNDUgLi4u", 158 + "salut les copains faut absolument que je dépasse les 80 characters pour voir si \ 159 + mon encoder fits bien dans les limites de la RFC 2045 ..." 160 + ; "", "" 161 + ; "Zg==", "f" 162 + ; "Zm8=", "fo" 163 + ; "Zm9v", "foo" 164 + ; "Zm9vYg==", "foob" 165 + ; "Zm9vYmE=", "fooba" 166 + ; "Zm9vYmFy", "foobar" ] 167 + 168 + let test_relaxed_rfc2045 = 169 + [ "Zg", "f" 170 + ; "Zm\n8", "fo" 171 + ; "Zm\r9v", "foo" 172 + ; "Zm9 vYg", "foob" 173 + ; "Zm9\r\n vYmE", "fooba" 174 + ; "Zm9évYmFy", "foobar" ] 175 + 176 + let strict_base64_rfc2045_to_string x = 177 + let res = Buffer.create 16 in 178 + let encoder = Base64_rfc2045.encoder (`Buffer res) in 179 + String.iter 180 + (fun chr -> match Base64_rfc2045.encode encoder (`Char chr) with 181 + | `Ok -> () 182 + | `Partial -> Alcotest.failf "Retrieve impossible case for (`Char %02x): `Partial" (Char.code chr)) 183 + x ; 184 + match Base64_rfc2045.encode encoder `End with 185 + | `Ok -> Buffer.contents res 186 + | `Partial -> Alcotest.fail "Retrieve impossible case for `End: `Partial" 187 + 188 + let test_strict_with_malformed_input_rfc2045 = 189 + List.mapi (fun i (has, _) -> 190 + Alcotest.test_case (Fmt.strf "strict rfc2045 - %02d" i) `Quick @@ fun () -> 191 + try 192 + let _ = strict_base64_rfc2045_of_string has in 193 + Alcotest.failf "Strict parser valids malformed input: %S" has 194 + with Malformed | Wrong_padding -> () ) 195 + test_relaxed_rfc2045 196 + 197 + let test_strict_rfc2045 = 198 + List.mapi (fun i (has, expect) -> 199 + Alcotest.test_case (Fmt.strf "strict rfc2045 - %02d" i) `Quick @@ fun () -> 200 + try 201 + let res0 = strict_base64_rfc2045_of_string has in 202 + let res1 = strict_base64_rfc2045_to_string res0 in 203 + Alcotest.(check string) "encode(decode(x)) = x" res1 has ; 204 + Alcotest.(check string) "decode(x)" res0 expect 205 + with Malformed | Wrong_padding -> Alcotest.failf "Invalid input %S" has) 206 + test_strict_rfc2045 207 + 208 + let test_relaxed_rfc2045 = 209 + List.mapi (fun i (has, expect) -> 210 + Alcotest.test_case (Fmt.strf "relaxed rfc2045 - %02d" i) `Quick @@ fun () -> 211 + let res0 = relaxed_base64_rfc2045_of_string has in 212 + Alcotest.(check string) "decode(x)" res0 expect) 213 + test_relaxed_rfc2045 124 214 125 215 let test_invariants = [ "Alphabet size", `Quick, alphabet_size ] 126 216 let test_codec = [ "RFC4648 test vectors", `Quick, test_rfc4648 ··· 133 223 Alcotest.run "Base64" [ 134 224 "invariants", test_invariants; 135 225 "codec", test_codec; 226 + "rfc2045", test_strict_rfc2045; 227 + "rfc2045", test_strict_with_malformed_input_rfc2045; 228 + "rfc2045", test_relaxed_rfc2045; 136 229 ] 137 230