Detect which human language a document uses from OCaml, from the Nu Html validator
languages
unicode
ocaml
1<!DOCTYPE html>
2<html lang="en">
3<head>
4 <meta charset="UTF-8">
5 <meta name="viewport" content="width=device-width, initial-scale=1.0">
6 <title>Langdetect - Language Detection Demo</title>
7 <style>
8 * {
9 box-sizing: border-box;
10 }
11 body {
12 font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
13 line-height: 1.6;
14 max-width: 1000px;
15 margin: 0 auto;
16 padding: 2rem;
17 background: #f5f5f5;
18 color: #333;
19 }
20 h1 {
21 color: #2563eb;
22 border-bottom: 3px solid #3b82f6;
23 padding-bottom: 0.5rem;
24 margin-bottom: 0.5rem;
25 }
26 .subtitle {
27 color: #666;
28 margin-top: 0;
29 margin-bottom: 2rem;
30 }
31 .section {
32 background: white;
33 border: 1px solid #e0e0e0;
34 border-radius: 12px;
35 padding: 1.5rem;
36 margin: 1.5rem 0;
37 box-shadow: 0 2px 4px rgba(0,0,0,0.05);
38 }
39 .section h2 {
40 margin-top: 0;
41 color: #1e40af;
42 }
43 .demo-area {
44 display: flex;
45 flex-direction: column;
46 gap: 1rem;
47 }
48 textarea {
49 width: 100%;
50 padding: 1rem;
51 font-size: 1rem;
52 font-family: inherit;
53 border: 2px solid #e0e0e0;
54 border-radius: 8px;
55 resize: vertical;
56 transition: border-color 0.2s;
57 }
58 textarea:focus {
59 outline: none;
60 border-color: #3b82f6;
61 }
62 button {
63 padding: 0.75rem 2rem;
64 font-size: 1rem;
65 font-weight: 600;
66 cursor: pointer;
67 border: none;
68 border-radius: 8px;
69 background: #2563eb;
70 color: white;
71 transition: all 0.2s;
72 align-self: flex-start;
73 }
74 button:hover {
75 background: #1d4ed8;
76 transform: translateY(-1px);
77 }
78 button:active {
79 transform: translateY(0);
80 }
81 #demo-result {
82 padding: 1rem;
83 background: #f8fafc;
84 border-radius: 8px;
85 border: 1px solid #e2e8f0;
86 }
87 #demo-result ul {
88 margin: 0.5rem 0;
89 padding-left: 1.5rem;
90 }
91 #demo-result li {
92 margin: 0.25rem 0;
93 }
94 .summary {
95 background: #eff6ff;
96 padding: 1rem 1.5rem;
97 border-radius: 8px;
98 margin-bottom: 1rem;
99 }
100 .summary h2 {
101 margin: 0 0 0.5rem 0;
102 }
103 .summary p {
104 margin: 0.25rem 0;
105 }
106 .results-table {
107 width: 100%;
108 border-collapse: collapse;
109 font-size: 0.9rem;
110 }
111 .results-table th,
112 .results-table td {
113 padding: 0.75rem 1rem;
114 text-align: left;
115 border-bottom: 1px solid #e0e0e0;
116 }
117 .results-table th {
118 background: #f8fafc;
119 font-weight: 600;
120 color: #475569;
121 }
122 .results-table tr:hover {
123 background: #f8fafc;
124 }
125 .results-table .code {
126 font-family: 'SF Mono', Monaco, 'Cascadia Code', monospace;
127 font-size: 0.85rem;
128 background: #f1f5f9;
129 padding: 0.2rem 0.4rem;
130 border-radius: 4px;
131 }
132 .results-table .corpus-text {
133 font-size: 0.8rem;
134 max-width: 300px;
135 overflow: hidden;
136 text-overflow: ellipsis;
137 white-space: nowrap;
138 color: #666;
139 }
140 .results-table .pass {
141 color: #16a34a;
142 font-weight: bold;
143 font-size: 1.1rem;
144 }
145 .results-table .fail {
146 color: #dc2626;
147 font-weight: bold;
148 font-size: 1.1rem;
149 }
150 .loading {
151 text-align: center;
152 padding: 2rem;
153 color: #666;
154 }
155 .api-docs {
156 background: #1e293b;
157 color: #e2e8f0;
158 padding: 1rem;
159 border-radius: 8px;
160 overflow-x: auto;
161 }
162 .api-docs code {
163 color: #7dd3fc;
164 }
165 .api-docs .comment {
166 color: #94a3b8;
167 }
168 .sample-texts {
169 display: grid;
170 grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
171 gap: 0.5rem;
172 margin-top: 1rem;
173 }
174 .sample-text {
175 padding: 0.5rem 1rem;
176 font-size: 0.85rem;
177 background: #f1f5f9;
178 border: 1px solid #e2e8f0;
179 border-radius: 6px;
180 cursor: pointer;
181 transition: all 0.2s;
182 }
183 .sample-text:hover {
184 background: #e2e8f0;
185 border-color: #cbd5e1;
186 }
187 .sample-text .lang {
188 font-weight: 600;
189 color: #1e40af;
190 }
191 </style>
192</head>
193<body>
194
195 <h1>🌍 Langdetect</h1>
196 <p class="subtitle">Language detection for the browser using n-gram frequency analysis</p>
197
198 <!-- Mode Selector -->
199 <div class="section" style="padding: 1rem 1.5rem; display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
200 <label for="mode-select" style="font-weight: 600;">Runtime:</label>
201 <select id="mode-select" style="padding: 0.5rem 1rem; border-radius: 6px; border: 2px solid #e0e0e0; font-size: 1rem;">
202 <option value="js">JavaScript (js_of_ocaml)</option>
203 <option value="wasm">WebAssembly (wasm_of_ocaml)</option>
204 </select>
205 <button onclick="reloadWithMode()" style="padding: 0.5rem 1.5rem;">Reload</button>
206 <span id="mode-status" style="color: #666; font-size: 0.9rem;"></span>
207 </div>
208
209 <!-- Interactive Demo (created by OCaml code, but we add sample texts) -->
210 <div class="section">
211 <div id="demo">
212 <!-- Demo UI created by langdetect_js_tests.ml -->
213 <p class="loading">Loading demo...</p>
214 </div>
215
216 <p style="margin-top: 1rem; margin-bottom: 0.5rem; color: #666; font-size: 0.9rem;">
217 Click a sample to try:
218 </p>
219 <div class="sample-texts" id="sample-texts">
220 <div class="sample-text" data-text="The quick brown fox jumps over the lazy dog.">
221 <span class="lang">🇬🇧 English</span>
222 </div>
223 <div class="sample-text" data-text="Bonjour le monde! Comment allez-vous aujourd'hui?">
224 <span class="lang">🇫🇷 French</span>
225 </div>
226 <div class="sample-text" data-text="Guten Tag! Wie geht es Ihnen heute?">
227 <span class="lang">🇩🇪 German</span>
228 </div>
229 <div class="sample-text" data-text="¡Hola mundo! ¿Cómo estás hoy?">
230 <span class="lang">🇪🇸 Spanish</span>
231 </div>
232 <div class="sample-text" data-text="你好世界!今天你好吗?">
233 <span class="lang">🇨🇳 Chinese</span>
234 </div>
235 <div class="sample-text" data-text="こんにちは世界!今日はお元気ですか?">
236 <span class="lang">🇯🇵 Japanese</span>
237 </div>
238 <div class="sample-text" data-text="مرحبا بالعالم! كيف حالك اليوم؟">
239 <span class="lang">🇸🇦 Arabic</span>
240 </div>
241 <div class="sample-text" data-text="Привет мир! Как дела сегодня?">
242 <span class="lang">🇷🇺 Russian</span>
243 </div>
244 </div>
245 </div>
246
247 <!-- Test Results -->
248 <div class="section">
249 <div id="test-results">
250 <p class="loading">Loading tests...</p>
251 </div>
252 </div>
253
254 <!-- API Documentation -->
255 <div class="section">
256 <h2>JavaScript API</h2>
257 <div class="api-docs">
258<pre><span class="comment">// Detect the most likely language</span>
259<code>langdetect.detect</code>("Hello, world!")
260<span class="comment">// → "en"</span>
261
262<span class="comment">// Get detection with confidence score</span>
263<code>langdetect.detectWithProb</code>("Bonjour le monde!")
264<span class="comment">// → { lang: "fr", prob: 0.9999 }</span>
265
266<span class="comment">// Get all matching languages</span>
267<code>langdetect.detectAll</code>("Hello world")
268<span class="comment">// → [{ lang: "en", prob: 0.85 }, { lang: "de", prob: 0.10 }, ...]</span>
269
270<span class="comment">// List supported languages</span>
271<code>langdetect.languages</code>()
272<span class="comment">// → ["ar", "bg", "bn", "ca", "cs", "da", "de", "el", ...]</span></pre>
273 </div>
274 </div>
275
276 <!-- Supported Languages -->
277 <div class="section">
278 <h2>Supported Languages (47)</h2>
279 <p>
280 Arabic, Bengali, Bulgarian, Catalan, Chinese (Simplified), Chinese (Traditional),
281 Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, German, Greek,
282 Gujarati, Hebrew, Hindi, Hungarian, Indonesian, Italian, Japanese, Korean, Latvian,
283 Lithuanian, Macedonian, Malayalam, Norwegian, Panjabi, Persian, Polish, Portuguese,
284 Romanian, Russian, Sinhala, Slovak, Spanish, Swedish, Tagalog, Tamil, Telugu, Thai,
285 Turkish, Ukrainian, Urdu, Vietnamese
286 </p>
287 </div>
288
289 <script>
290 // Get mode from URL param or localStorage
291 function getMode() {
292 const params = new URLSearchParams(window.location.search);
293 return params.get('mode') || localStorage.getItem('langdetect-mode') || 'js';
294 }
295
296 // Set mode and reload
297 function reloadWithMode() {
298 const mode = document.getElementById('mode-select').value;
299 localStorage.setItem('langdetect-mode', mode);
300 const url = new URL(window.location);
301 url.searchParams.set('mode', mode);
302 window.location.href = url.toString();
303 }
304
305 // Initialize mode selector
306 const currentMode = getMode();
307 document.getElementById('mode-select').value = currentMode;
308
309 // Load the appropriate script (relative paths for installed version)
310 const scriptName = currentMode === 'wasm' ? 'langdetect_js_tests.bc.wasm.js' : 'langdetect-tests.js';
311 document.getElementById('mode-status').textContent = `Loading ${currentMode.toUpperCase()}...`;
312
313 const script = document.createElement('script');
314 script.src = scriptName;
315 script.onload = function() {
316 document.getElementById('mode-status').textContent = `Loaded: ${currentMode.toUpperCase()}`;
317 document.getElementById('mode-status').style.color = '#16a34a';
318 setTimeout(setupSampleTexts, 100);
319 };
320 script.onerror = function() {
321 document.getElementById('mode-status').textContent = `Failed to load ${scriptName}`;
322 document.getElementById('mode-status').style.color = '#dc2626';
323 showLoadError();
324 };
325 document.head.appendChild(script);
326
327 // Wait for langdetect to be ready, then set up sample text handlers
328 function setupSampleTexts() {
329 document.querySelectorAll('.sample-text').forEach(el => {
330 el.addEventListener('click', () => {
331 const text = el.getAttribute('data-text');
332 // Find the OCaml-created input and button
333 const input = document.getElementById('demo-input');
334 const button = document.getElementById('demo-button');
335 if (input && button) {
336 input.value = text;
337 button.click();
338 }
339 });
340 });
341 }
342
343 function showLoadError() {
344 const mode = getMode();
345 const scriptName = mode === 'wasm' ? 'langdetect_js_tests.bc.wasm.js' : 'langdetect-tests.js';
346
347 const demo = document.getElementById('demo');
348 if (demo) {
349 demo.innerHTML =
350 `<p style="color: #dc2626;"><strong>Library not loaded</strong><br>` +
351 `Could not load <code>${scriptName}</code>.</p>`;
352 }
353 document.getElementById('test-results').innerHTML =
354 `<p style="color: #dc2626;"><strong>Tests cannot run:</strong> ${scriptName} not found.</p>`;
355 }
356
357 // Check if library loaded after timeout
358 window.addEventListener('load', () => {
359 setTimeout(() => {
360 if (typeof langdetect === 'undefined') {
361 showLoadError();
362 }
363 }, 500);
364 });
365 </script>
366
367</body>
368</html>