···11+(*---------------------------------------------------------------------------
22+ Copyright (c) 2025 Anil Madhavapeddy <anil@recoil.org>
33+ SPDX-License-Identifier: MIT
44+ ---------------------------------------------------------------------------*)
55+66+let detect_language input_text =
77+ let detector = Langdetect.create_default () in
88+ let results = Langdetect.detect detector input_text in
99+ List.iter
1010+ (fun (r : Langdetect.result) -> Printf.printf "%s %.4f\n" r.lang r.prob)
1111+ results
1212+1313+let read_all_stdin () =
1414+ let buf = Buffer.create 4096 in
1515+ try
1616+ while true do
1717+ Buffer.add_channel buf stdin 4096
1818+ done;
1919+ Buffer.contents buf
2020+ with End_of_file -> Buffer.contents buf
2121+2222+let read_file path =
2323+ let ic = open_in path in
2424+ let n = in_channel_length ic in
2525+ let s = really_input_string ic n in
2626+ close_in ic;
2727+ s
2828+2929+let run file_opt =
3030+ let text =
3131+ match file_opt with
3232+ | Some path -> read_file path
3333+ | None -> read_all_stdin ()
3434+ in
3535+ if String.length (String.trim text) = 0 then
3636+ `Error (false, "No input text provided")
3737+ else begin
3838+ detect_language text;
3939+ `Ok ()
4040+ end
4141+4242+open Cmdliner
4343+4444+let file_arg =
4545+ let doc = "Input file to detect language from. If not provided, reads from stdin." in
4646+ Arg.(value & pos 0 (some file) None & info [] ~docv:"FILE" ~doc)
4747+4848+let cmd =
4949+ let doc = "Detect the language of text" in
5050+ let man =
5151+ [
5252+ `S Manpage.s_description;
5353+ `P "Detects the natural language of input text using n-gram frequency analysis.";
5454+ `P "Outputs detected language codes and their probabilities as space-separated values, one per line, sorted by probability (highest first).";
5555+ `S Manpage.s_examples;
5656+ `P "Detect language from a file:";
5757+ `Pre " langdetect document.txt";
5858+ `P "Detect language from stdin:";
5959+ `Pre " echo 'Hello world' | langdetect";
6060+ ]
6161+ in
6262+ let info = Cmd.info "langdetect" ~version:"%%VERSION%%" ~doc ~man in
6363+ Cmd.v info Term.(ret (const run $ file_arg))
6464+6565+let () = exit (Cmd.eval cmd)
···4848 (targets langdetect-tests.wasm.js)
4949 (deps langdetect_js_tests.bc.wasm.js)
5050 (action (copy %{deps} %{targets})))
5151+5252+; Install web assets to share/langdetect-js/
5353+; Includes HTML demo, JS files, WASM loaders, and WASM assets with source maps
5454+(install
5555+ (package langdetect-js)
5656+ (section share)
5757+ (files
5858+ (langdetect.html as langdetect-js/langdetect.html)
5959+ ; JS files (work standalone in browsers)
6060+ (langdetect.js as langdetect-js/langdetect.js)
6161+ (langdetect-tests.js as langdetect-js/langdetect-tests.js)
6262+ ; WASM loaders (in same dir so relative asset paths work)
6363+ (langdetect_js_main.bc.wasm.js as langdetect-js/langdetect_js_main.bc.wasm.js)
6464+ (langdetect_js_tests.bc.wasm.js as langdetect-js/langdetect_js_tests.bc.wasm.js)
6565+ ; WASM assets - must be in langdetect-js/ so relative paths from loaders work
6666+ (glob_files_rec (langdetect_js_main.bc.wasm.assets/* with_prefix langdetect-js/langdetect_js_main.bc.wasm.assets))
6767+ (glob_files_rec (langdetect_js_tests.bc.wasm.assets/* with_prefix langdetect-js/langdetect_js_tests.bc.wasm.assets))))