Pop-up dictionary browser extension for language learning. Successor to Yomichan. (PERSONAL FORK)
1/*
2 * Copyright (C) 2023-2025 Yomitan Authors
3 * Copyright (C) 2020-2022 Yomichan Authors
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19import {describe, expect, test} from 'vitest';
20import * as jpw from '../ext/js/language/ja/japanese-wanakana.js';
21import * as jp from '../ext/js/language/ja/japanese.js';
22
23describe('Japanese utility functions', () => {
24 describe('isCodePointKanji', () => {
25 /** @type {[characters: string, expected: boolean][]} */
26 const data = [
27 ['力方', true],
28 ['\u53f1\u{20b9f}', true],
29 ['かたカタ々kata、。?,.?', false],
30 ['逸逸', true],
31 ];
32
33 test.each(data)('%s -> %o', (characters, expected) => {
34 for (const character of characters) {
35 const codePoint = /** @type {number} */ (character.codePointAt(0));
36 const actual = jp.isCodePointKanji(codePoint);
37 expect(actual).toStrictEqual(expected); // `isCodePointKanji failed for ${character} (\\u{${codePoint.toString(16)}})`
38 }
39 });
40 });
41
42 describe('isCodePointKana', () => {
43 /** @type {[characters: string, expected: boolean][]} */
44 const data = [
45 ['かたカタ', true],
46 ['力方々kata、。?,.?', false],
47 ['\u53f1\u{20b9f}', false],
48 ];
49
50 test.each(data)('%s -> %o', (characters, expected) => {
51 for (const character of characters) {
52 const codePoint = /** @type {number} */ (character.codePointAt(0));
53 const actual = jp.isCodePointKana(codePoint);
54 expect(actual).toStrictEqual(expected); // `isCodePointKana failed for ${character} (\\u{${codePoint.toString(16)}})`
55 }
56 });
57 });
58
59 describe('isCodePointJapanese', () => {
60 /** @type {[characters: string, expected: boolean][]} */
61 const data = [
62 ['かたカタ力方々、。?', true],
63 ['\u53f1\u{20b9f}', true],
64 ['kata,.?', false],
65 ['逸逸', true],
66 ];
67
68 test.each(data)('%s -> %o', (characters, expected) => {
69 for (const character of characters) {
70 const codePoint = /** @type {number} */ (character.codePointAt(0));
71 const actual = jp.isCodePointJapanese(codePoint);
72 expect(actual).toStrictEqual(expected); // `isCodePointJapanese failed for ${character} (\\u{${codePoint.toString(16)}})`
73 }
74 });
75 });
76
77 describe('isStringEntirelyKana', () => {
78 /** @type {[string: string, expected: boolean][]} */
79 const data = [
80 ['かたかな', true],
81 ['カタカナ', true],
82 ['ひらがな', true],
83 ['ヒラガナ', true],
84 ['カタカナひらがな', true],
85 ['かたカタ力方々、。?', false],
86 ['\u53f1\u{20b9f}', false],
87 ['kata,.?', false],
88 ['かたカタ力方々、。?invalid', false],
89 ['\u53f1\u{20b9f}invalid', false],
90 ['kata,.?かた', false],
91 ];
92
93 test.each(data)('%s -> %o', (string, expected) => {
94 expect(jp.isStringEntirelyKana(string)).toStrictEqual(expected);
95 });
96 });
97
98 describe('isStringPartiallyJapanese', () => {
99 /** @type {[string: string, expected: boolean][]} */
100 const data = [
101 ['かたかな', true],
102 ['カタカナ', true],
103 ['ひらがな', true],
104 ['ヒラガナ', true],
105 ['カタカナひらがな', true],
106 ['かたカタ力方々、。?', true],
107 ['\u53f1\u{20b9f}', true],
108 ['kata,.?', false],
109 ['かたカタ力方々、。?invalid', true],
110 ['\u53f1\u{20b9f}invalid', true],
111 ['kata,.?かた', true],
112 ['逸逸', true],
113 ];
114
115 test.each(data)('%s -> %o', (string, expected) => {
116 expect(jp.isStringPartiallyJapanese(string)).toStrictEqual(expected);
117 });
118 });
119
120 describe('convertKatakanaToHiragana', () => {
121 /** @type {[string: string, expected: string, keepProlongedSoundMarks?: boolean][]} */
122 const data = [
123 ['かたかな', 'かたかな'],
124 ['ひらがな', 'ひらがな'],
125 ['カタカナ', 'かたかな'],
126 ['ヒラガナ', 'ひらがな'],
127 ['カタカナかたかな', 'かたかなかたかな'],
128 ['ヒラガナひらがな', 'ひらがなひらがな'],
129 ['chikaraちからチカラ力', 'chikaraちからちから力'],
130 ['katakana', 'katakana'],
131 ['hiragana', 'hiragana'],
132 ['カーナー', 'かあなあ'],
133 ['カーナー', 'かーなー', true],
134 ];
135
136 for (const [string, expected, keepProlongedSoundMarks = false] of data) {
137 test(`${string}${keepProlongedSoundMarks ? ' keeping prolonged sound marks' : ''} -> ${expected}`, () => {
138 expect(jp.convertKatakanaToHiragana(string, keepProlongedSoundMarks)).toStrictEqual(expected);
139 });
140 }
141 });
142
143 describe('convertHiraganaToKatakana', () => {
144 /** @type {[string: string, expected: string][]} */
145 const data = [
146 ['かたかな', 'カタカナ'],
147 ['ひらがな', 'ヒラガナ'],
148 ['カタカナ', 'カタカナ'],
149 ['ヒラガナ', 'ヒラガナ'],
150 ['カタカナかたかな', 'カタカナカタカナ'],
151 ['ヒラガナひらがな', 'ヒラガナヒラガナ'],
152 ['chikaraちからチカラ力', 'chikaraチカラチカラ力'],
153 ['katakana', 'katakana'],
154 ['hiragana', 'hiragana'],
155 ];
156
157 test.each(data)('%s -> %o', (string, expected) => {
158 expect(jp.convertHiraganaToKatakana(string)).toStrictEqual(expected);
159 });
160 });
161
162 describe('convertToKanaIME', () => {
163 /** @type {[input: [string, number], expected: import('language.js').KanaIMEOutput][]} */
164 const data = [
165 // Note: `|` represents the text cursor (newSelectionStart) position in the following comments
166 // hiragana
167 [['hiragana', 8], {kanaString: 'ひらがな', newSelectionStart: 4}], // hiragana| -> ひらがな|
168 [['n', 1], {kanaString: 'n', newSelectionStart: 1}], // n| -> n|
169 [['nn', 2], {kanaString: 'ん', newSelectionStart: 1}], // nn| -> ん|
170 [['nn', 1], {kanaString: 'nん', newSelectionStart: 1}], // n|n -> n|ん
171 [['nの', 1], {kanaString: 'nの', newSelectionStart: 1}], // n|の -> n|の
172 [['nnn', 3], {kanaString: 'んn', newSelectionStart: 2}], // nnn| -> んn|
173 [['nnnnano', 7], {kanaString: 'んんあの', newSelectionStart: 4}], // nnnnano| -> んんあの|
174 [['ny', 2], {kanaString: 'ny', newSelectionStart: 2}], // ny| -> ny|
175 [['nya', 3], {kanaString: 'にゃ', newSelectionStart: 2}], // nya| -> にゃ|
176 [['ttttttttttsu', 12], {kanaString: 'っっっっっっっっっつ', newSelectionStart: 10}], // ttttttttttsu| -> っっっっっっっっっつ|
177 [['tt', 2], {kanaString: 'っt', newSelectionStart: 2}], // tt| -> っt|
178 // Katakana
179 [['KATAKANA', 8], {kanaString: 'カタカナ', newSelectionStart: 4}], // KATAKANA| -> カタカナ|
180 [['N', 1], {kanaString: 'N', newSelectionStart: 1}], // N| -> N|
181 [['NN', 2], {kanaString: 'ン', newSelectionStart: 1}], // NN| -> ン|
182 [['NN', 1], {kanaString: 'Nン', newSelectionStart: 1}], // N|N -> N|ン
183 [['Nノ', 1], {kanaString: 'Nノ', newSelectionStart: 1}], // N|ノ -> N|ノ
184 [['NNN', 3], {kanaString: 'ンN', newSelectionStart: 2}], // NNN| -> ンN|
185 [['NNNNANO', 7], {kanaString: 'ンンアノ', newSelectionStart: 4}], // NNNNANO| -> ンンアノ|
186 [['NY', 2], {kanaString: 'NY', newSelectionStart: 2}], // NY| -> NY|
187 [['NYA', 3], {kanaString: 'ニャ', newSelectionStart: 2}], // NYA| -> ニャ|
188 [['TTTTTTTTTTSU', 12], {kanaString: 'ッッッッッッッッッツ', newSelectionStart: 10}], // TTTTTTTTTTSU| -> ッッッッッッッッッツ|
189 [['TT', 2], {kanaString: 'ッT', newSelectionStart: 2}], // TT| -> ッT|
190 ];
191
192 test.each(data)('%s -> %o', (dataValue, expected) => {
193 expect(jpw.convertToKanaIME(dataValue[0], dataValue[1])).toStrictEqual(expected);
194 });
195 });
196
197 describe('convertToRomaji', () => {
198 /** @type {[string: string, expected: string][]} */
199 const data = [
200 ['かたかな', 'katakana'],
201 ['ひらがな', 'hiragana'],
202 ['カタカナ', 'katakana'],
203 ['ヒラガナ', 'hiragana'],
204 ['カタカナかたかな', 'katakanakatakana'],
205 ['ヒラガナひらがな', 'hiraganahiragana'],
206 ['っかっきっくっけっこ', 'kkakkikkukkekko'],
207 ['ッカッキックッケッコ', 'kkakkikkukkekko'],
208 ['chikaraちからチカラ力', 'chikarachikarachikara力'],
209 ['katakana', 'katakana'],
210 ['hiragana', 'hiragana'],
211 ['っつ', 'ttsu'],
212 ['っっっっっっっっっつ', 'ttsu'],
213 ];
214
215 test.each(data)('%s -> %o', (string, expected) => {
216 expect(jpw.convertToRomaji(string)).toStrictEqual(expected);
217 });
218 });
219
220 describe('convertAlphanumericToFullWidth', () => {
221 /** @type {[string: string, expected: string][]} */
222 const data = [
223 ['0123456789', '0123456789'],
224 ['abcdefghij', 'abcdefghij'],
225 ['カタカナ', 'カタカナ'],
226 ['ひらがな', 'ひらがな'],
227 ];
228
229 test.each(data)('%s -> %o', (string, expected) => {
230 expect(jp.convertAlphanumericToFullWidth(string)).toStrictEqual(expected);
231 });
232 });
233
234 describe('convertHalfWidthKanaToFullWidth', () => {
235 /** @type {[string: string, expected: string][]} */
236 const data = [
237 ['0123456789', '0123456789'],
238 ['abcdefghij', 'abcdefghij'],
239 ['カタカナ', 'カタカナ'],
240 ['ひらがな', 'ひらがな'],
241 ['カキ', 'カキ'],
242 ['ガキ', 'ガキ'],
243 ['ニホン', 'ニホン'],
244 ['ニッポン', 'ニッポン'],
245 ];
246
247 for (const [string, expected] of data) {
248 test(`${string} -> ${expected}`, () => {
249 const actual1 = jp.convertHalfWidthKanaToFullWidth(string);
250 const actual2 = jp.convertHalfWidthKanaToFullWidth(string);
251 expect(actual1).toStrictEqual(expected);
252 expect(actual2).toStrictEqual(expected);
253 });
254 }
255 });
256
257 describe('convertAlphabeticToKana', () => {
258 /** @type {[string: string, expected: string][]} */
259 const data = [
260 ['0123456789', '0123456789'],
261 ['abcdefghij', 'あbcでfgひj'],
262 ['ABCDEFGHIJ', 'あbcでfgひj'], // wanakana.toHiragana converts text to lower case
263 ['カタカナ', 'カタカナ'],
264 ['ひらがな', 'ひらがな'],
265 ['chikara', 'ちから'],
266 ['CHIKARA', 'ちから'],
267 ];
268
269 for (const [string, expected] of data) {
270 test(`${string} -> ${string}`, () => {
271 const actual1 = jpw.convertAlphabeticToKana(string);
272 const actual2 = jpw.convertAlphabeticToKana(string);
273 expect(actual1).toStrictEqual(expected);
274 expect(actual2).toStrictEqual(expected);
275 });
276 }
277 });
278
279 describe('distributeFurigana', () => {
280 /** @type {[input: [term: string, reading: string], expected: {text: string, reading: string}[]][]} */
281 const data = [
282 [
283 ['有り難う', 'ありがとう'],
284 [
285 {text: '有', reading: 'あ'},
286 {text: 'り', reading: ''},
287 {text: '難', reading: 'がと'},
288 {text: 'う', reading: ''},
289 ],
290 ],
291 [
292 ['方々', 'かたがた'],
293 [
294 {text: '方々', reading: 'かたがた'},
295 ],
296 ],
297 [
298 ['お祝い', 'おいわい'],
299 [
300 {text: 'お', reading: ''},
301 {text: '祝', reading: 'いわ'},
302 {text: 'い', reading: ''},
303 ],
304 ],
305 [
306 ['美味しい', 'おいしい'],
307 [
308 {text: '美味', reading: 'おい'},
309 {text: 'しい', reading: ''},
310 ],
311 ],
312 [
313 ['食べ物', 'たべもの'],
314 [
315 {text: '食', reading: 'た'},
316 {text: 'べ', reading: ''},
317 {text: '物', reading: 'もの'},
318 ],
319 ],
320 [
321 ['試し切り', 'ためしぎり'],
322 [
323 {text: '試', reading: 'ため'},
324 {text: 'し', reading: ''},
325 {text: '切', reading: 'ぎ'},
326 {text: 'り', reading: ''},
327 ],
328 ],
329 // Ambiguous
330 [
331 ['飼い犬', 'かいいぬ'],
332 [
333 {text: '飼い犬', reading: 'かいいぬ'},
334 ],
335 ],
336 [
337 ['長い間', 'ながいあいだ'],
338 [
339 {text: '長い間', reading: 'ながいあいだ'},
340 ],
341 ],
342 // Same/empty reading
343 [
344 ['飼い犬', ''],
345 [
346 {text: '飼い犬', reading: ''},
347 ],
348 ],
349 [
350 ['かいいぬ', 'かいいぬ'],
351 [
352 {text: 'かいいぬ', reading: ''},
353 ],
354 ],
355 [
356 ['かいぬ', 'かいぬ'],
357 [
358 {text: 'かいぬ', reading: ''},
359 ],
360 ],
361 // Misc
362 [
363 ['月', 'か'],
364 [
365 {text: '月', reading: 'か'},
366 ],
367 ],
368 [
369 ['月', 'カ'],
370 [
371 {text: '月', reading: 'カ'},
372 ],
373 ],
374 // Mismatched kana readings
375 [
376 ['有り難う', 'アリガトウ'],
377 [
378 {text: '有', reading: 'ア'},
379 {text: 'り', reading: 'リ'},
380 {text: '難', reading: 'ガト'},
381 {text: 'う', reading: 'ウ'},
382 ],
383 ],
384 [
385 ['ありがとう', 'アリガトウ'],
386 [
387 {text: 'ありがとう', reading: 'アリガトウ'},
388 ],
389 ],
390 // Mismatched kana readings (real examples)
391 [
392 ['カ月', 'かげつ'],
393 [
394 {text: 'カ', reading: 'か'},
395 {text: '月', reading: 'げつ'},
396 ],
397 ],
398 [
399 ['序ノ口', 'じょのくち'],
400 [
401 {text: '序', reading: 'じょ'},
402 {text: 'ノ', reading: 'の'},
403 {text: '口', reading: 'くち'},
404 ],
405 ],
406 [
407 ['スズメの涙', 'すずめのなみだ'],
408 [
409 {text: 'スズメ', reading: 'すずめ'},
410 {text: 'の', reading: ''},
411 {text: '涙', reading: 'なみだ'},
412 ],
413 ],
414 [
415 ['二カ所', 'にかしょ'],
416 [
417 {text: '二', reading: 'に'},
418 {text: 'カ', reading: 'か'},
419 {text: '所', reading: 'しょ'},
420 ],
421 ],
422 [
423 ['八ツ橋', 'やつはし'],
424 [
425 {text: '八', reading: 'や'},
426 {text: 'ツ', reading: 'つ'},
427 {text: '橋', reading: 'はし'},
428 ],
429 ],
430 [
431 ['八ツ橋', 'やつはし'],
432 [
433 {text: '八', reading: 'や'},
434 {text: 'ツ', reading: 'つ'},
435 {text: '橋', reading: 'はし'},
436 ],
437 ],
438 [
439 ['一カ月', 'いっかげつ'],
440 [
441 {text: '一', reading: 'いっ'},
442 {text: 'カ', reading: 'か'},
443 {text: '月', reading: 'げつ'},
444 ],
445 ],
446 [
447 ['一カ所', 'いっかしょ'],
448 [
449 {text: '一', reading: 'いっ'},
450 {text: 'カ', reading: 'か'},
451 {text: '所', reading: 'しょ'},
452 ],
453 ],
454 [
455 ['カ所', 'かしょ'],
456 [
457 {text: 'カ', reading: 'か'},
458 {text: '所', reading: 'しょ'},
459 ],
460 ],
461 [
462 ['数カ月', 'すうかげつ'],
463 [
464 {text: '数', reading: 'すう'},
465 {text: 'カ', reading: 'か'},
466 {text: '月', reading: 'げつ'},
467 ],
468 ],
469 [
470 ['くノ一', 'くのいち'],
471 [
472 {text: 'く', reading: ''},
473 {text: 'ノ', reading: 'の'},
474 {text: '一', reading: 'いち'},
475 ],
476 ],
477 [
478 ['くノ一', 'くのいち'],
479 [
480 {text: 'く', reading: ''},
481 {text: 'ノ', reading: 'の'},
482 {text: '一', reading: 'いち'},
483 ],
484 ],
485 [
486 ['数カ国', 'すうかこく'],
487 [
488 {text: '数', reading: 'すう'},
489 {text: 'カ', reading: 'か'},
490 {text: '国', reading: 'こく'},
491 ],
492 ],
493 [
494 ['数カ所', 'すうかしょ'],
495 [
496 {text: '数', reading: 'すう'},
497 {text: 'カ', reading: 'か'},
498 {text: '所', reading: 'しょ'},
499 ],
500 ],
501 [
502 ['壇ノ浦の戦い', 'だんのうらのたたかい'],
503 [
504 {text: '壇', reading: 'だん'},
505 {text: 'ノ', reading: 'の'},
506 {text: '浦', reading: 'うら'},
507 {text: 'の', reading: ''},
508 {text: '戦', reading: 'たたか'},
509 {text: 'い', reading: ''},
510 ],
511 ],
512 [
513 ['壇ノ浦の戦', 'だんのうらのたたかい'],
514 [
515 {text: '壇', reading: 'だん'},
516 {text: 'ノ', reading: 'の'},
517 {text: '浦', reading: 'うら'},
518 {text: 'の', reading: ''},
519 {text: '戦', reading: 'たたかい'},
520 ],
521 ],
522 [
523 ['序ノ口格', 'じょのくちかく'],
524 [
525 {text: '序', reading: 'じょ'},
526 {text: 'ノ', reading: 'の'},
527 {text: '口格', reading: 'くちかく'},
528 ],
529 ],
530 [
531 ['二カ国語', 'にかこくご'],
532 [
533 {text: '二', reading: 'に'},
534 {text: 'カ', reading: 'か'},
535 {text: '国語', reading: 'こくご'},
536 ],
537 ],
538 [
539 ['カ国', 'かこく'],
540 [
541 {text: 'カ', reading: 'か'},
542 {text: '国', reading: 'こく'},
543 ],
544 ],
545 [
546 ['カ国語', 'かこくご'],
547 [
548 {text: 'カ', reading: 'か'},
549 {text: '国語', reading: 'こくご'},
550 ],
551 ],
552 [
553 ['壇ノ浦の合戦', 'だんのうらのかっせん'],
554 [
555 {text: '壇', reading: 'だん'},
556 {text: 'ノ', reading: 'の'},
557 {text: '浦', reading: 'うら'},
558 {text: 'の', reading: ''},
559 {text: '合戦', reading: 'かっせん'},
560 ],
561 ],
562 [
563 ['一タ偏', 'いちたへん'],
564 [
565 {text: '一', reading: 'いち'},
566 {text: 'タ', reading: 'た'},
567 {text: '偏', reading: 'へん'},
568 ],
569 ],
570 [
571 ['ル又', 'るまた'],
572 [
573 {text: 'ル', reading: 'る'},
574 {text: '又', reading: 'また'},
575 ],
576 ],
577 [
578 ['ノ木偏', 'のぎへん'],
579 [
580 {text: 'ノ', reading: 'の'},
581 {text: '木偏', reading: 'ぎへん'},
582 ],
583 ],
584 [
585 ['一ノ貝', 'いちのかい'],
586 [
587 {text: '一', reading: 'いち'},
588 {text: 'ノ', reading: 'の'},
589 {text: '貝', reading: 'かい'},
590 ],
591 ],
592 [
593 ['虎ノ門事件', 'とらのもんじけん'],
594 [
595 {text: '虎', reading: 'とら'},
596 {text: 'ノ', reading: 'の'},
597 {text: '門事件', reading: 'もんじけん'},
598 ],
599 ],
600 [
601 ['教育ニ関スル勅語', 'きょういくにかんするちょくご'],
602 [
603 {text: '教育', reading: 'きょういく'},
604 {text: 'ニ', reading: 'に'},
605 {text: '関', reading: 'かん'},
606 {text: 'スル', reading: 'する'},
607 {text: '勅語', reading: 'ちょくご'},
608 ],
609 ],
610 [
611 ['二カ年', 'にかねん'],
612 [
613 {text: '二', reading: 'に'},
614 {text: 'カ', reading: 'か'},
615 {text: '年', reading: 'ねん'},
616 ],
617 ],
618 [
619 ['三カ年', 'さんかねん'],
620 [
621 {text: '三', reading: 'さん'},
622 {text: 'カ', reading: 'か'},
623 {text: '年', reading: 'ねん'},
624 ],
625 ],
626 [
627 ['四カ年', 'よんかねん'],
628 [
629 {text: '四', reading: 'よん'},
630 {text: 'カ', reading: 'か'},
631 {text: '年', reading: 'ねん'},
632 ],
633 ],
634 [
635 ['五カ年', 'ごかねん'],
636 [
637 {text: '五', reading: 'ご'},
638 {text: 'カ', reading: 'か'},
639 {text: '年', reading: 'ねん'},
640 ],
641 ],
642 [
643 ['六カ年', 'ろっかねん'],
644 [
645 {text: '六', reading: 'ろっ'},
646 {text: 'カ', reading: 'か'},
647 {text: '年', reading: 'ねん'},
648 ],
649 ],
650 [
651 ['七カ年', 'ななかねん'],
652 [
653 {text: '七', reading: 'なな'},
654 {text: 'カ', reading: 'か'},
655 {text: '年', reading: 'ねん'},
656 ],
657 ],
658 [
659 ['八カ年', 'はちかねん'],
660 [
661 {text: '八', reading: 'はち'},
662 {text: 'カ', reading: 'か'},
663 {text: '年', reading: 'ねん'},
664 ],
665 ],
666 [
667 ['九カ年', 'きゅうかねん'],
668 [
669 {text: '九', reading: 'きゅう'},
670 {text: 'カ', reading: 'か'},
671 {text: '年', reading: 'ねん'},
672 ],
673 ],
674 [
675 ['十カ年', 'じゅうかねん'],
676 [
677 {text: '十', reading: 'じゅう'},
678 {text: 'カ', reading: 'か'},
679 {text: '年', reading: 'ねん'},
680 ],
681 ],
682 [
683 ['鏡ノ間', 'かがみのま'],
684 [
685 {text: '鏡', reading: 'かがみ'},
686 {text: 'ノ', reading: 'の'},
687 {text: '間', reading: 'ま'},
688 ],
689 ],
690 [
691 ['鏡ノ間', 'かがみのま'],
692 [
693 {text: '鏡', reading: 'かがみ'},
694 {text: 'ノ', reading: 'の'},
695 {text: '間', reading: 'ま'},
696 ],
697 ],
698 [
699 ['ページ違反', 'ぺーじいはん'],
700 [
701 {text: 'ペ', reading: 'ぺ'},
702 {text: 'ー', reading: ''},
703 {text: 'ジ', reading: 'じ'},
704 {text: '違反', reading: 'いはん'},
705 ],
706 ],
707 // Mismatched kana
708 [
709 ['サボる', 'サボル'],
710 [
711 {text: 'サボ', reading: ''},
712 {text: 'る', reading: 'ル'},
713 ],
714 ],
715 // Reading starts with term, but has remainder characters
716 [
717 ['シック', 'シック・ビルしょうこうぐん'],
718 [
719 {text: 'シック', reading: 'シック・ビルしょうこうぐん'},
720 ],
721 ],
722 // Kanji distribution tests
723 [
724 ['逸らす', 'そらす'],
725 [
726 {text: '逸', reading: 'そ'},
727 {text: 'らす', reading: ''},
728 ],
729 ],
730 [
731 ['逸らす', 'そらす'],
732 [
733 {text: '逸', reading: 'そ'},
734 {text: 'らす', reading: ''},
735 ],
736 ],
737 ];
738
739 test.each(data)('%o -> %o', (input, expected) => {
740 const [term, reading] = input;
741 const actual = jp.distributeFurigana(term, reading);
742 expect(actual).toStrictEqual(expected);
743 });
744 });
745
746 describe('distributeFuriganaInflected', () => {
747 /** @type {[input: [term: string, reading: string, source: string], expected: {text: string, reading: string}[]][]} */
748 const data = [
749 [
750 ['美味しい', 'おいしい', '美味しかた'],
751 [
752 {text: '美味', reading: 'おい'},
753 {text: 'しかた', reading: ''},
754 ],
755 ],
756 [
757 ['食べる', 'たべる', '食べた'],
758 [
759 {text: '食', reading: 'た'},
760 {text: 'べた', reading: ''},
761 ],
762 ],
763 [
764 ['迄に', 'までに', 'までに'],
765 [
766 {text: 'までに', reading: ''},
767 ],
768 ],
769 [
770 ['行う', 'おこなう', 'おこなわなかった'],
771 [
772 {text: 'おこなわなかった', reading: ''},
773 ],
774 ],
775 [
776 ['いい', 'いい', 'イイ'],
777 [
778 {text: 'イイ', reading: ''},
779 ],
780 ],
781 [
782 ['否か', 'いなか', '否カ'],
783 [
784 {text: '否', reading: 'いな'},
785 {text: 'カ', reading: 'か'},
786 ],
787 ],
788 ];
789
790 test.each(data)('%o -> %o', (input, expected) => {
791 const [term, reading, source] = input;
792 const actual = jp.distributeFuriganaInflected(term, reading, source);
793 expect(actual).toStrictEqual(expected);
794 });
795 });
796
797 describe('collapseEmphaticSequences', () => {
798 /** @type {[input: [text: string, fullCollapse: boolean], output: string][]} */
799 const data = [
800 [['かこい', false], 'かこい'],
801 [['かこい', true], 'かこい'],
802 [['かっこい', false], 'かっこい'],
803 [['かっこい', true], 'かこい'],
804 [['かっっこい', false], 'かっこい'],
805 [['かっっこい', true], 'かこい'],
806 [['かっっっこい', false], 'かっこい'],
807 [['かっっっこい', true], 'かこい'],
808
809 [['すごい', false], 'すごい'],
810 [['すごい', true], 'すごい'],
811 [['すごーい', false], 'すごーい'],
812 [['すごーい', true], 'すごい'],
813 [['すごーーい', false], 'すごーい'],
814 [['すごーーい', true], 'すごい'],
815 [['すっごーい', false], 'すっごーい'],
816 [['すっごーい', true], 'すごい'],
817 [['すっっごーーい', false], 'すっごーい'],
818 [['すっっごーーい', true], 'すごい'],
819
820 [['こい', false], 'こい'],
821 [['こい', true], 'こい'],
822 [['っこい', false], 'っこい'],
823 [['っこい', true], 'っこい'],
824 [['っっこい', false], 'っっこい'],
825 [['っっこい', true], 'っっこい'],
826 [['っっっこい', false], 'っっっこい'],
827 [['っっっこい', true], 'っっっこい'],
828 [['こいっ', false], 'こいっ'],
829 [['こいっ', true], 'こいっ'],
830 [['こいっっ', false], 'こいっっ'],
831 [['こいっっ', true], 'こいっっ'],
832 [['こいっっっ', false], 'こいっっっ'],
833 [['こいっっっ', true], 'こいっっっ'],
834 [['っこいっ', false], 'っこいっ'],
835 [['っこいっ', true], 'っこいっ'],
836 [['っっこいっっ', false], 'っっこいっっ'],
837 [['っっこいっっ', true], 'っっこいっっ'],
838 [['っっっこいっっっ', false], 'っっっこいっっっ'],
839 [['っっっこいっっっ', true], 'っっっこいっっっ'],
840
841 [['', false], ''],
842 [['', true], ''],
843 [['っ', false], 'っ'],
844 [['っ', true], 'っ'],
845 [['っっ', false], 'っっ'],
846 [['っっ', true], 'っっ'],
847 [['っっっ', false], 'っっっ'],
848 [['っっっ', true], 'っっっ'],
849
850 [['っーッかっこいいっーッ', false], 'っーッかっこいいっーッ'],
851 [['っーッかっこいいっーッ', true], 'っーッかこいいっーッ'],
852 [['っっーーッッかっこいいっっーーッッ', false], 'っっーーッッかっこいいっっーーッッ'],
853 [['っっーーッッかっこいいっっーーッッ', true], 'っっーーッッかこいいっっーーッッ'],
854
855 [['っーッ', false], 'っーッ'],
856 [['っーッ', true], 'っーッ'],
857 [['っっーーッッ', false], 'っっーーッッ'],
858 [['っっーーッッ', true], 'っっーーッッ'],
859 ];
860
861 test.each(data)('%o -> %o', (input, output) => {
862 const [text, fullCollapse] = input;
863
864 const actual1 = jp.collapseEmphaticSequences(text, fullCollapse);
865 const actual2 = jp.collapseEmphaticSequences(text, fullCollapse);
866 expect(actual1).toStrictEqual(output);
867 expect(actual2).toStrictEqual(output);
868 });
869 });
870
871 describe('isMoraPitchHigh', () => {
872 /** @type {[input: [moraIndex: number, pitchAccentDownstepPosition: number], expected: boolean][]} */
873 const data = [
874 [[0, 0], false],
875 [[1, 0], true],
876 [[2, 0], true],
877 [[3, 0], true],
878
879 [[0, 1], true],
880 [[1, 1], false],
881 [[2, 1], false],
882 [[3, 1], false],
883
884 [[0, 2], false],
885 [[1, 2], true],
886 [[2, 2], false],
887 [[3, 2], false],
888
889 [[0, 3], false],
890 [[1, 3], true],
891 [[2, 3], true],
892 [[3, 3], false],
893
894 [[0, 4], false],
895 [[1, 4], true],
896 [[2, 4], true],
897 [[3, 4], true],
898 ];
899
900 test.each(data)('%o -> %o', (input, expected) => {
901 const [moraIndex, pitchAccentDownstepPosition] = input;
902 const actual = jp.isMoraPitchHigh(moraIndex, pitchAccentDownstepPosition);
903 expect(actual).toStrictEqual(expected);
904 });
905 });
906
907 describe('getKanaMorae', () => {
908 /** @type {[text: string, expected: string[]][]} */
909 const data = [
910 ['かこ', ['か', 'こ']],
911 ['かっこ', ['か', 'っ', 'こ']],
912 ['カコ', ['カ', 'コ']],
913 ['カッコ', ['カ', 'ッ', 'コ']],
914 ['コート', ['コ', 'ー', 'ト']],
915 ['ちゃんと', ['ちゃ', 'ん', 'と']],
916 ['とうきょう', ['と', 'う', 'きょ', 'う']],
917 ['ぎゅう', ['ぎゅ', 'う']],
918 ['ディスコ', ['ディ', 'ス', 'コ']],
919 ];
920
921 test.each(data)('%s -> %o', (text, expected) => {
922 const actual = jp.getKanaMorae(text);
923 expect(actual).toStrictEqual(expected);
924 });
925 });
926});
927
928describe('combining dakuten/handakuten normalization', () => {
929 const testCasesDakuten = [
930 ['か\u3099', 'が'],
931 ['き\u3099', 'ぎ'],
932 ['く\u3099', 'ぐ'],
933 ['け\u3099', 'げ'],
934 ['こ\u3099', 'ご'],
935 ['さ\u3099', 'ざ'],
936 ['し\u3099', 'じ'],
937 ['す\u3099', 'ず'],
938 ['せ\u3099', 'ぜ'],
939 ['そ\u3099', 'ぞ'],
940 ['た\u3099', 'だ'],
941 ['ち\u3099', 'ぢ'],
942 ['つ\u3099', 'づ'],
943 ['て\u3099', 'で'],
944 ['と\u3099', 'ど'],
945 ['は\u3099', 'ば'],
946 ['ひ\u3099', 'び'],
947 ['ふ\u3099', 'ぶ'],
948 ['へ\u3099', 'べ'],
949 ['ほ\u3099', 'ぼ'],
950 ['カ\u3099', 'ガ'],
951 ['キ\u3099', 'ギ'],
952 ['ク\u3099', 'グ'],
953 ['ケ\u3099', 'ゲ'],
954 ['コ\u3099', 'ゴ'],
955 ['サ\u3099', 'ザ'],
956 ['シ\u3099', 'ジ'],
957 ['ス\u3099', 'ズ'],
958 ['セ\u3099', 'ゼ'],
959 ['ソ\u3099', 'ゾ'],
960 ['タ\u3099', 'ダ'],
961 ['チ\u3099', 'ヂ'],
962 ['ツ\u3099', 'ヅ'],
963 ['テ\u3099', 'デ'],
964 ['ト\u3099', 'ド'],
965 ['ハ\u3099', 'バ'],
966 ['ヒ\u3099', 'ビ'],
967 ['フ\u3099', 'ブ'],
968 ['ヘ\u3099', 'ベ'],
969 ['ホ\u3099', 'ボ'],
970 ];
971
972 const testCasesHandakuten = [
973 ['は\u309A', 'ぱ'],
974 ['ひ\u309A', 'ぴ'],
975 ['ふ\u309A', 'ぷ'],
976 ['へ\u309A', 'ぺ'],
977 ['ほ\u309A', 'ぽ'],
978 ['ハ\u309A', 'パ'],
979 ['ヒ\u309A', 'ピ'],
980 ['フ\u309A', 'プ'],
981 ['ヘ\u309A', 'ペ'],
982 ['ホ\u309A', 'ポ'],
983 ];
984
985 const testCasesIgnored = [
986 ['な\u3099', 'な\u3099'],
987 ['な\u309A', 'な\u309A'],
988 ['に\u3099', 'に\u3099'],
989 ['に\u309A', 'に\u309A'],
990 ['ぬ\u3099', 'ぬ\u3099'],
991 ['ぬ\u309A', 'ぬ\u309A'],
992 ['ね\u3099', 'ね\u3099'],
993 ['ね\u309A', 'ね\u309A'],
994 ['の\u3099', 'の\u3099'],
995 ['の\u309A', 'の\u309A'],
996 ['ま\u3099', 'ま\u3099'],
997 ['ま\u309A', 'ま\u309A'],
998 ['み\u3099', 'み\u3099'],
999 ['み\u309A', 'み\u309A'],
1000 ['む\u3099', 'む\u3099'],
1001 ['む\u309A', 'む\u309A'],
1002 ['め\u3099', 'め\u3099'],
1003 ['め\u309A', 'め\u309A'],
1004 ['も\u3099', 'も\u3099'],
1005 ['も\u309A', 'も\u309A'],
1006 ['ゃ\u3099', 'ゃ\u3099'],
1007 ['ゃ\u309A', 'ゃ\u309A'],
1008 ['や\u3099', 'や\u3099'],
1009 ['や\u309A', 'や\u309A'],
1010 ['ゅ\u3099', 'ゅ\u3099'],
1011 ['ゅ\u309A', 'ゅ\u309A'],
1012 ['ゆ\u3099', 'ゆ\u3099'],
1013 ['ゆ\u309A', 'ゆ\u309A'],
1014 ['ょ\u3099', 'ょ\u3099'],
1015 ['ょ\u309A', 'ょ\u309A'],
1016 ['よ\u3099', 'よ\u3099'],
1017 ['よ\u309A', 'よ\u309A'],
1018 ['ら\u3099', 'ら\u3099'],
1019 ['ら\u309A', 'ら\u309A'],
1020 ['り\u3099', 'り\u3099'],
1021 ['り\u309A', 'り\u309A'],
1022 ['る\u3099', 'る\u3099'],
1023 ['る\u309A', 'る\u309A'],
1024 ['れ\u3099', 'れ\u3099'],
1025 ['れ\u309A', 'れ\u309A'],
1026 ['ろ\u3099', 'ろ\u3099'],
1027 ['ろ\u309A', 'ろ\u309A'],
1028 ['ゎ\u3099', 'ゎ\u3099'],
1029 ['ゎ\u309A', 'ゎ\u309A'],
1030 ['わ\u3099', 'わ\u3099'],
1031 ['わ\u309A', 'わ\u309A'],
1032 ['ゐ\u3099', 'ゐ\u3099'],
1033 ['ゐ\u309A', 'ゐ\u309A'],
1034 ['ゑ\u3099', 'ゑ\u3099'],
1035 ['ゑ\u309A', 'ゑ\u309A'],
1036 ['を\u3099', 'を\u3099'],
1037 ['を\u309A', 'を\u309A'],
1038 ['ん\u3099', 'ん\u3099'],
1039 ['ん\u309A', 'ん\u309A'],
1040 ['ナ\u3099', 'ナ\u3099'],
1041 ['ナ\u309A', 'ナ\u309A'],
1042 ['ニ\u3099', 'ニ\u3099'],
1043 ['ニ\u309A', 'ニ\u309A'],
1044 ['ヌ\u3099', 'ヌ\u3099'],
1045 ['ヌ\u309A', 'ヌ\u309A'],
1046 ['ネ\u3099', 'ネ\u3099'],
1047 ['ネ\u309A', 'ネ\u309A'],
1048 ['ノ\u3099', 'ノ\u3099'],
1049 ['ノ\u309A', 'ノ\u309A'],
1050 ['マ\u3099', 'マ\u3099'],
1051 ['マ\u309A', 'マ\u309A'],
1052 ['ミ\u3099', 'ミ\u3099'],
1053 ['ミ\u309A', 'ミ\u309A'],
1054 ['ム\u3099', 'ム\u3099'],
1055 ['ム\u309A', 'ム\u309A'],
1056 ['メ\u3099', 'メ\u3099'],
1057 ['メ\u309A', 'メ\u309A'],
1058 ['モ\u3099', 'モ\u3099'],
1059 ['モ\u309A', 'モ\u309A'],
1060 ['ャ\u3099', 'ャ\u3099'],
1061 ['ャ\u309A', 'ャ\u309A'],
1062 ['ヤ\u3099', 'ヤ\u3099'],
1063 ['ヤ\u309A', 'ヤ\u309A'],
1064 ['ュ\u3099', 'ュ\u3099'],
1065 ['ュ\u309A', 'ュ\u309A'],
1066 ['ユ\u3099', 'ユ\u3099'],
1067 ['ユ\u309A', 'ユ\u309A'],
1068 ['ョ\u3099', 'ョ\u3099'],
1069 ['ョ\u309A', 'ョ\u309A'],
1070 ['ヨ\u3099', 'ヨ\u3099'],
1071 ['ヨ\u309A', 'ヨ\u309A'],
1072 ['ラ\u3099', 'ラ\u3099'],
1073 ['ラ\u309A', 'ラ\u309A'],
1074 ['リ\u3099', 'リ\u3099'],
1075 ['リ\u309A', 'リ\u309A'],
1076 ['ル\u3099', 'ル\u3099'],
1077 ['ル\u309A', 'ル\u309A'],
1078 ['レ\u3099', 'レ\u3099'],
1079 ['レ\u309A', 'レ\u309A'],
1080 ['ロ\u3099', 'ロ\u3099'],
1081 ['ロ\u309A', 'ロ\u309A'],
1082 ['ヮ\u3099', 'ヮ\u3099'],
1083 ['ヮ\u309A', 'ヮ\u309A'],
1084 ['ワ\u3099', 'ワ\u3099'],
1085 ['ワ\u309A', 'ワ\u309A'],
1086 ['ヰ\u3099', 'ヰ\u3099'],
1087 ['ヰ\u309A', 'ヰ\u309A'],
1088 ['ヱ\u3099', 'ヱ\u3099'],
1089 ['ヱ\u309A', 'ヱ\u309A'],
1090 ['ヲ\u3099', 'ヲ\u3099'],
1091 ['ヲ\u309A', 'ヲ\u309A'],
1092 ['ン\u3099', 'ン\u3099'],
1093 ['ン\u309A', 'ン\u309A'],
1094 ];
1095
1096 const textCasesMisc = [
1097 ['', ''],
1098 ['\u3099ハ', '\u3099ハ'],
1099 ['\u309Aハ', '\u309Aハ'],
1100 ['さくらし\u3099また\u3099いこん', 'さくらじまだいこん'],
1101 ['いっほ\u309Aん', 'いっぽん'],
1102 ];
1103
1104 const testCases = [...testCasesDakuten, ...testCasesHandakuten, ...testCasesIgnored, ...textCasesMisc];
1105 test.each(testCases)('%s normalizes to %s', (input, expected) => {
1106 expect(jp.normalizeCombiningCharacters(input)).toStrictEqual(expected);
1107 });
1108});
1109
1110describe('cjk compatibility characters normalization', () => {
1111 const testCases = [
1112 ['㌀', 'アパート'],
1113 ['㌁', 'アルファ'],
1114 ['㌂', 'アンペア'],
1115 ['㌃', 'アール'],
1116 ['㌄', 'イニング'],
1117 ['㌅', 'インチ'],
1118 ['㌆', 'ウォン'],
1119 ['㌇', 'エスクード'],
1120 ['㌈', 'エーカー'],
1121 ['㌉', 'オンス'],
1122 ['㌊', 'オーム'],
1123 ['㌋', 'カイリ'],
1124 ['㌌', 'カラット'],
1125 ['㌍', 'カロリー'],
1126 ['㌎', 'ガロン'],
1127 ['㌏', 'ガンマ'],
1128 ['㌐', 'ギガ'],
1129 ['㌑', 'ギニー'],
1130 ['㌒', 'キュリー'],
1131 ['㌓', 'ギルダー'],
1132 ['㌔', 'キロ'],
1133 ['㌕', 'キログラム'],
1134 ['㌖', 'キロメートル'],
1135 ['㌗', 'キロワット'],
1136 ['㌘', 'グラム'],
1137 ['㌙', 'グラムトン'],
1138 ['㌚', 'クルゼイロ'],
1139 ['㌛', 'クローネ'],
1140 ['㌜', 'ケース'],
1141 ['㌝', 'コルナ'],
1142 ['㌞', 'コーポ'],
1143 ['㌟', 'サイクル'],
1144 ['㌠', 'サンチーム'],
1145 ['㌡', 'シリング'],
1146 ['㌢', 'センチ'],
1147 ['㌣', 'セント'],
1148 ['㌤', 'ダース'],
1149 ['㌥', 'デシ'],
1150 ['㌦', 'ドル'],
1151 ['㌧', 'トン'],
1152 ['㌨', 'ナノ'],
1153 ['㌩', 'ノット'],
1154 ['㌪', 'ハイツ'],
1155 ['㌫', 'パーセント'],
1156 ['㌬', 'パーツ'],
1157 ['㌭', 'バーレル'],
1158 ['㌮', 'ピアストル'],
1159 ['㌯', 'ピクル'],
1160 ['㌰', 'ピコ'],
1161 ['㌱', 'ビル'],
1162 ['㌲', 'ファラッド'],
1163 ['㌳', 'フィート'],
1164 ['㌴', 'ブッシェル'],
1165 ['㌵', 'フラン'],
1166 ['㌶', 'ヘクタール'],
1167 ['㌷', 'ペソ'],
1168 ['㌸', 'ペニヒ'],
1169 ['㌹', 'ヘルツ'],
1170 ['㌺', 'ペンス'],
1171 ['㌻', 'ページ'],
1172 ['㌼', 'ベータ'],
1173 ['㌽', 'ポイント'],
1174 ['㌾', 'ボルト'],
1175 ['㌿', 'ホン'],
1176 ['㍀', 'ポンド'],
1177 ['㍁', 'ホール'],
1178 ['㍂', 'ホーン'],
1179 ['㍃', 'マイクロ'],
1180 ['㍄', 'マイル'],
1181 ['㍅', 'マッハ'],
1182 ['㍆', 'マルク'],
1183 ['㍇', 'マンション'],
1184 ['㍈', 'ミクロン'],
1185 ['㍉', 'ミリ'],
1186 ['㍊', 'ミリバール'],
1187 ['㍋', 'メガ'],
1188 ['㍌', 'メガトン'],
1189 ['㍍', 'メートル'],
1190 ['㍎', 'ヤード'],
1191 ['㍏', 'ヤール'],
1192 ['㍐', 'ユアン'],
1193 ['㍑', 'リットル'],
1194 ['㍒', 'リラ'],
1195 ['㍓', 'ルピー'],
1196 ['㍔', 'ルーブル'],
1197 ['㍕', 'レム'],
1198 ['㍖', 'レントゲン'],
1199 ['㍗', 'ワット'],
1200 ['㍘', '0点'],
1201 ['㍙', '1点'],
1202 ['㍚', '2点'],
1203 ['㍛', '3点'],
1204 ['㍜', '4点'],
1205 ['㍝', '5点'],
1206 ['㍞', '6点'],
1207 ['㍟', '7点'],
1208 ['㍠', '8点'],
1209 ['㍡', '9点'],
1210 ['㍢', '10点'],
1211 ['㍣', '11点'],
1212 ['㍤', '12点'],
1213 ['㍥', '13点'],
1214 ['㍦', '14点'],
1215 ['㍧', '15点'],
1216 ['㍨', '16点'],
1217 ['㍩', '17点'],
1218 ['㍪', '18点'],
1219 ['㍫', '19点'],
1220 ['㍬', '20点'],
1221 ['㍭', '21点'],
1222 ['㍮', '22点'],
1223 ['㍯', '23点'],
1224 ['㍰', '24点'],
1225 ['㍻', '平成'],
1226 ['㍼', '昭和'],
1227 ['㍽', '大正'],
1228 ['㍾', '明治'],
1229 ['㍿', '株式会社'],
1230 ['㏠', '1日'],
1231 ['㏡', '2日'],
1232 ['㏢', '3日'],
1233 ['㏣', '4日'],
1234 ['㏤', '5日'],
1235 ['㏥', '6日'],
1236 ['㏦', '7日'],
1237 ['㏧', '8日'],
1238 ['㏨', '9日'],
1239 ['㏩', '10日'],
1240 ['㏪', '11日'],
1241 ['㏫', '12日'],
1242 ['㏬', '13日'],
1243 ['㏭', '14日'],
1244 ['㏮', '15日'],
1245 ['㏯', '16日'],
1246 ['㏰', '17日'],
1247 ['㏱', '18日'],
1248 ['㏲', '19日'],
1249 ['㏳', '20日'],
1250 ['㏴', '21日'],
1251 ['㏵', '22日'],
1252 ['㏶', '23日'],
1253 ['㏷', '24日'],
1254 ['㏸', '25日'],
1255 ['㏹', '26日'],
1256 ['㏺', '27日'],
1257 ['㏻', '28日'],
1258 ['㏼', '29日'],
1259 ['㏽', '30日'],
1260 ['㏾', '31日'],
1261 ];
1262
1263 test.each(testCases)('%s normalizes to %s', (input, expected) => {
1264 expect(jp.normalizeCJKCompatibilityCharacters(input)).toStrictEqual(expected);
1265 });
1266});