Detect which human language a document uses from OCaml, from the Nu Html validator
languages
unicode
ocaml
1<!DOCTYPE html>
2<html lang="en">
3<head>
4 <meta charset="UTF-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1.0">
6 <title>Langdetect - Language Detection Demo</title>
7 <style>
8 * {
9 box-sizing: border-box;
10 }
11 body {
12 font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
13 line-height: 1.6;
14 max-width: 1000px;
15 margin: 0 auto;
16 padding: 2rem;
17 background: #f5f5f5;
18 color: #333;
19 }
20 h1 {
21 color: #2563eb;
22 border-bottom: 3px solid #3b82f6;
23 padding-bottom: 0.5rem;
24 margin-bottom: 0.5rem;
25 }
26 .subtitle {
27 color: #666;
28 margin-top: 0;
29 margin-bottom: 2rem;
30 }
31 .section {
32 background: white;
33 border: 1px solid #e0e0e0;
34 border-radius: 12px;
35 padding: 1.5rem;
36 margin: 1.5rem 0;
37 box-shadow: 0 2px 4px rgba(0,0,0,0.05);
38 }
39 .section h2 {
40 margin-top: 0;
41 color: #1e40af;
42 }
43 .demo-area {
44 display: flex;
45 flex-direction: column;
46 gap: 1rem;
47 }
48 textarea {
49 width: 100%;
50 padding: 1rem;
51 font-size: 1rem;
52 font-family: inherit;
53 border: 2px solid #e0e0e0;
54 border-radius: 8px;
55 resize: vertical;
56 transition: border-color 0.2s;
57 }
58 textarea:focus {
59 outline: none;
60 border-color: #3b82f6;
61 }
62 button {
63 padding: 0.75rem 2rem;
64 font-size: 1rem;
65 font-weight: 600;
66 cursor: pointer;
67 border: none;
68 border-radius: 8px;
69 background: #2563eb;
70 color: white;
71 transition: all 0.2s;
72 align-self: flex-start;
73 }
74 button:hover {
75 background: #1d4ed8;
76 transform: translateY(-1px);
77 }
78 button:active {
79 transform: translateY(0);
80 }
81 #demo-result {
82 padding: 1rem;
83 background: #f8fafc;
84 border-radius: 8px;
85 border: 1px solid #e2e8f0;
86 }
87 #demo-result ul {
88 margin: 0.5rem 0;
89 padding-left: 1.5rem;
90 }
91 #demo-result li {
92 margin: 0.25rem 0;
93 }
94 .summary {
95 background: #eff6ff;
96 padding: 1rem 1.5rem;
97 border-radius: 8px;
98 margin-bottom: 1rem;
99 }
100 .summary h2 {
101 margin: 0 0 0.5rem 0;
102 }
103 .summary p {
104 margin: 0.25rem 0;
105 }
106 .results-table {
107 width: 100%;
108 border-collapse: collapse;
109 font-size: 0.9rem;
110 }
111 .results-table th,
112 .results-table td {
113 padding: 0.75rem 1rem;
114 text-align: left;
115 border-bottom: 1px solid #e0e0e0;
116 }
117 .results-table th {
118 background: #f8fafc;
119 font-weight: 600;
120 color: #475569;
121 }
122 .results-table tr:hover {
123 background: #f8fafc;
124 }
125 .results-table .code {
126 font-family: 'SF Mono', Monaco, 'Cascadia Code', monospace;
127 font-size: 0.85rem;
128 background: #f1f5f9;
129 padding: 0.2rem 0.4rem;
130 border-radius: 4px;
131 }
132 .results-table .corpus-text {
133 font-size: 0.8rem;
134 max-width: 300px;
135 overflow: hidden;
136 text-overflow: ellipsis;
137 white-space: nowrap;
138 color: #666;
139 }
140 .results-table .pass {
141 color: #16a34a;
142 font-weight: bold;
143 font-size: 1.1rem;
144 }
145 .results-table .fail {
146 color: #dc2626;
147 font-weight: bold;
148 font-size: 1.1rem;
149 }
150 .loading {
151 text-align: center;
152 padding: 2rem;
153 color: #666;
154 }
155 .api-docs {
156 background: #1e293b;
157 color: #e2e8f0;
158 padding: 1rem;
159 border-radius: 8px;
160 overflow-x: auto;
161 }
162 .api-docs code {
163 color: #7dd3fc;
164 }
165 .api-docs .comment {
166 color: #94a3b8;
167 }
168 .sample-texts {
169 display: grid;
170 grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
171 gap: 0.5rem;
172 margin-top: 1rem;
173 }
174 .sample-text {
175 padding: 0.5rem 1rem;
176 font-size: 0.85rem;
177 background: #f1f5f9;
178 border: 1px solid #e2e8f0;
179 border-radius: 6px;
180 cursor: pointer;
181 transition: all 0.2s;
182 }
183 .sample-text:hover {
184 background: #e2e8f0;
185 border-color: #cbd5e1;
186 }
187 .sample-text .lang {
188 font-weight: 600;
189 color: #1e40af;
190 }
191 </style>
192</head>
193<body>
194
195 <h1>🌍 Langdetect</h1>
196 <p class="subtitle">Language detection for the browser using n-gram frequency analysis</p>
197
198 <!-- Mode Selector -->
199 <div class="section" style="padding: 1rem 1.5rem; display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
200 <label for="mode-select" style="font-weight: 600;">Runtime:</label>
201 <select id="mode-select" style="padding: 0.5rem 1rem; border-radius: 6px; border: 2px solid #e0e0e0; font-size: 1rem;">
202 <option value="js">JavaScript (js_of_ocaml)</option>
203 <option value="wasm">WebAssembly (wasm_of_ocaml)</option>
204 </select>
205 <button onclick="reloadWithMode()" style="padding: 0.5rem 1.5rem;">Reload</button>
206 <span id="mode-status" style="color: #666; font-size: 0.9rem;"></span>
207 </div>
208
209 <!-- Interactive Demo (created by OCaml code, but we add sample texts) -->
210 <div class="section">
211 <div id="demo">
212 <!-- Demo UI created by langdetect_js_tests.ml -->
213 <p class="loading">Loading demo...</p>
214 </div>
215
216 <p style="margin-top: 1rem; margin-bottom: 0.5rem; color: #666; font-size: 0.9rem;">
217 Click a sample to try:
218 </p>
219 <div class="sample-texts" id="sample-texts">
220 <div class="sample-text" data-text="The quick brown fox jumps over the lazy dog.">
221 <span class="lang">🇬🇧 English</span>
222 </div>
223 <div class="sample-text" data-text="Bonjour le monde! Comment allez-vous aujourd'hui?">
224 <span class="lang">🇫🇷 French</span>
225 </div>
226 <div class="sample-text" data-text="Guten Tag! Wie geht es Ihnen heute?">
227 <span class="lang">🇩🇪 German</span>
228 </div>
229 <div class="sample-text" data-text="¡Hola mundo! ¿Cómo estás hoy?">
230 <span class="lang">🇪🇸 Spanish</span>
231 </div>
232 <div class="sample-text" data-text="你好世界!今天你好吗?">
233 <span class="lang">🇨🇳 Chinese</span>
234 </div>
235 <div class="sample-text" data-text="こんにちは世界!今日はお元気ですか?">
236 <span class="lang">🇯🇵 Japanese</span>
237 </div>
238 <div class="sample-text" data-text="مرحبا بالعالم! كيف حالك اليوم؟">
239 <span class="lang">🇸🇦 Arabic</span>
240 </div>
241 <div class="sample-text" data-text="Привет мир! Как дела сегодня?">
242 <span class="lang">🇷🇺 Russian</span>
243 </div>
244 </div>
245 </div>
246
247 <!-- Test Results -->
248 <div class="section">
249 <div id="test-results">
250 <p class="loading">Loading tests...</p>
251 </div>
252 </div>
253
254 <!-- API Documentation -->
255 <div class="section">
256 <h2>JavaScript API</h2>
257 <div class="api-docs">
258<pre><span class="comment">// Detect the most likely language</span>
259<code>langdetect.detect</code>("Hello, world!")
260<span class="comment">// → "en"</span>
261
262<span class="comment">// Get detection with confidence score</span>
263<code>langdetect.detectWithProb</code>("Bonjour le monde!")
264<span class="comment">// → { lang: "fr", prob: 0.9999 }</span>
265
266<span class="comment">// Get all matching languages</span>
267<code>langdetect.detectAll</code>("Hello world")
268<span class="comment">// → [{ lang: "en", prob: 0.85 }, { lang: "de", prob: 0.10 }, ...]</span>
269
270<span class="comment">// List supported languages</span>
271<code>langdetect.languages</code>()
272<span class="comment">// → ["ar", "bg", "bn", "ca", "cs", "da", "de", "el", ...]</span></pre>
273 </div>
274 </div>
275
276 <!-- Supported Languages -->
277 <div class="section">
278 <h2>Supported Languages (47)</h2>
279 <p>
280 Arabic, Bengali, Bulgarian, Catalan, Chinese (Simplified), Chinese (Traditional),
281 Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek,
282 Gujarati, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian,
283 Lithuanian, Macedonian, Malayalam, Norwegian, Panjabi, Persian, Polish, Portuguese,
284 Romanian, Russian, Sinhala, Slovak, Spanish, Swedish, Tagalog, Tamil, Telugu, Thai,
285 Turkish, Ukrainian, Urdu, Vietnamese
286 </p>
287 </div>
288
289 <script>
290 // Get mode from URL param or localStorage
291 function getMode() {
292 const params = new URLSearchParams(window.location.search);
293 return params.get('mode') || localStorage.getItem('langdetect-mode') || 'js';
294 }
295
296 // Set mode and reload
297 function reloadWithMode() {
298 const mode = document.getElementById('mode-select').value;
299 localStorage.setItem('langdetect-mode', mode);
300 const url = new URL(window.location);
301 url.searchParams.set('mode', mode);
302 window.location.href = url.toString();
303 }
304
305 // Initialize mode selector
306 const currentMode = getMode();
307 document.getElementById('mode-select').value = currentMode;
308
309 // Load the appropriate script
310 // Note: WASM uses original filename because the loader references the original assets directory
311 const scriptName = currentMode === 'wasm' ? 'langdetect_js_tests.bc.wasm.js' : 'langdetect-tests.js';
312 document.getElementById('mode-status').textContent = `Loading ${currentMode.toUpperCase()}...`;
313
314 const script = document.createElement('script');
315 script.src = `_build/default/lib/js/${scriptName}`;
316 script.onload = function() {
317 document.getElementById('mode-status').textContent = `Loaded: ${currentMode.toUpperCase()}`;
318 document.getElementById('mode-status').style.color = '#16a34a';
319 setTimeout(setupSampleTexts, 100);
320 };
321 script.onerror = function() {
322 document.getElementById('mode-status').textContent = `Failed to load ${scriptName}`;
323 document.getElementById('mode-status').style.color = '#dc2626';
324 showLoadError();
325 };
326 document.head.appendChild(script);
327
328 // Wait for langdetect to be ready, then set up sample text handlers
329 function setupSampleTexts() {
330 document.querySelectorAll('.sample-text').forEach(el => {
331 el.addEventListener('click', () => {
332 const text = el.getAttribute('data-text');
333 // Find the OCaml-created input and button
334 const input = document.getElementById('demo-input');
335 const button = document.getElementById('demo-button');
336 if (input && button) {
337 input.value = text;
338 button.click();
339 }
340 });
341 });
342 }
343
344 function showLoadError() {
345 const mode = getMode();
346 const scriptName = mode === 'wasm' ? 'langdetect_js_tests.bc.wasm.js' : 'langdetect-tests.js';
347 const buildCmd = 'opam exec -- dune build lib/js/';
348
349 const demo = document.getElementById('demo');
350 if (demo) {
351 demo.innerHTML =
352 `<p style="color: #dc2626;"><strong>Library not loaded</strong><br>` +
353 `Run <code>${buildCmd}</code> first.</p>`;
354 }
355 document.getElementById('test-results').innerHTML =
356 `<p style="color: #dc2626;"><strong>Tests cannot run:</strong> ${scriptName} not found.<br>` +
357 `Build with: <code>opam exec -- dune build lib/js/</code></p>`;
358 }
359
360 // Check if library loaded after timeout
361 window.addEventListener('load', () => {
362 setTimeout(() => {
363 if (typeof langdetect === 'undefined') {
364 showLoadError();
365 }
366 }, 500);
367 });
368 </script>
369
370</body>
371</html>