Pop-up dictionary browser extension for language learning. Successor to Yomichan. (PERSONAL FORK)
at lambda-fork/main 534 lines 14 kB view raw
1/* 2 * Copyright (C) 2023-2025 Yomitan Authors 3 * Copyright (C) 2021-2022 Yomichan Authors 4 * 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <https://www.gnu.org/licenses/>. 17 */ 18 19import type * as DictionaryData from './dictionary-data'; 20 21// Common 22 23/** 24 * A generic dictionary entry which is used as the base interface. 25 */ 26export type DictionaryEntry = KanjiDictionaryEntry | TermDictionaryEntry; 27 28export type DictionaryEntryType = DictionaryEntry['type']; 29 30/** 31 * A tag represents some brief information about part of a dictionary entry. 32 */ 33export type Tag = { 34 /** 35 * The name of the tag. 36 */ 37 name: string; 38 /** 39 * The category of the tag. 40 */ 41 category: string; 42 /** 43 * A number indicating the sorting order of the tag. 44 */ 45 order: number; 46 /** 47 * A score value for the tag. 48 */ 49 score: number; 50 /** 51 * An array of descriptions for the tag. * If there are multiple entries, 52 * the values will typically have originated from different dictionaries. 53 * However, there is no correlation between the length of this array and 54 * the length of the `dictionaries` field, as duplicates are removed. 55 */ 56 content: string[]; 57 /** 58 * An array of dictionary names that contained a tag with this name and category. 59 */ 60 dictionaries: string[]; 61 /** 62 * Whether or not this tag is redundant with previous tags. 63 */ 64 redundant: boolean; 65}; 66 67// Kanji 68 69/** 70 * A dictionary entry for a kanji character. 71 */ 72export type KanjiDictionaryEntry = { 73 /** 74 * The type of the entry. 75 */ 76 type: 'kanji'; 77 /** 78 * The kanji character that was looked up. 79 */ 80 character: string; 81 /** 82 * The name of the dictionary that the information originated from. 83 */ 84 dictionary: string; 85 /** 86 * The index of the dictionary in the original list of dictionaries used for the lookup. 87 */ 88 dictionaryIndex: number; 89 /** 90 * The alias of the dictionary 91 */ 92 dictionaryAlias: string; 93 /** 94 * Onyomi readings for the kanji character. 95 */ 96 onyomi: string[]; 97 /** 98 * Kunyomi readings for the kanji character. 99 */ 100 kunyomi: string[]; 101 /** 102 * Tags for the kanji character. 103 */ 104 tags: Tag[]; 105 /** 106 * An object containing stats about the kanji character. 107 */ 108 stats: KanjiStatGroups; 109 /** 110 * Definitions for the kanji character. 111 */ 112 definitions: string[]; 113 /** 114 * Frequency information for the kanji character. 115 */ 116 frequencies: KanjiFrequency[]; 117}; 118 119/** 120 * An object with groups of stats about a kanji character. 121 */ 122export type KanjiStatGroups = { 123 /** 124 * A group of stats. 125 * @param propName The name of the group. 126 */ 127 [propName: string]: KanjiStat[]; 128}; 129 130/** 131 * A stat represents a generic piece of information about a kanji character. 132 */ 133export type KanjiStat = { 134 /** 135 * The name of the stat. 136 */ 137 name: string; 138 /** 139 * The category of the stat. 140 */ 141 category: string; 142 /** 143 * A description of the stat. 144 */ 145 content: string; 146 /** 147 * A number indicating the sorting order of the stat. 148 */ 149 order: number; 150 /** 151 * A score value for the stat. 152 */ 153 score: number; 154 /** 155 * The name of the dictionary that the stat originated from. 156 */ 157 dictionary: string; 158 /** 159 * A value for the stat. 160 */ 161 value: number | string; 162}; 163 164/** 165 * Frequency information corresponds to how frequently a character appears in a corpus, 166 * which can be a number of occurrences or an overall rank. 167 */ 168export type KanjiFrequency = { 169 /** 170 * The original order of the frequency, which is usually used for sorting. 171 */ 172 index: number; 173 /** 174 * The name of the dictionary that the frequency information originated from. 175 */ 176 dictionary: string; 177 /** 178 * The index of the dictionary in the original list of dictionaries used for the lookup. 179 */ 180 dictionaryIndex: number; 181 /** 182 * The alias of the dictionary 183 */ 184 dictionaryAlias: string; 185 /** 186 * The kanji character for the frequency. 187 */ 188 character: string; 189 /** 190 * The frequency for the character, as a number of occurrences or an overall rank. 191 */ 192 frequency: number; 193 /** 194 * A display value to show to the user. 195 */ 196 displayValue: string | null; 197 /** 198 * Whether or not the displayValue string was parsed to determine the frequency value. 199 */ 200 displayValueParsed: boolean; 201}; 202 203// Terms 204 205/** 206 * A dictionary entry for a term or group of terms. 207 */ 208export type TermDictionaryEntry = { 209 /** 210 * The type of the entry. 211 */ 212 type: 'term'; 213 /** 214 * Whether or not any of the sources is a primary source. Primary sources are derived from the 215 * original search text, while non-primary sources originate from related terms. 216 */ 217 isPrimary: boolean; 218 /** 219 * Ways that a looked-up word might be transformed into this term. 220 */ 221 textProcessorRuleChainCandidates: textProcessorRuleChainCandidate[]; 222 /** 223 * Ways that a looked-up word might be an inflected form of this term. 224 */ 225 inflectionRuleChainCandidates: InflectionRuleChainCandidate[]; 226 /** 227 * A score for the dictionary entry. 228 */ 229 score: number; 230 /** 231 * The sorting value based on the determined term frequency. 232 */ 233 frequencyOrder: number; 234 /** 235 * The index of the dictionary in the original list of dictionaries used for the lookup. 236 */ 237 dictionaryIndex: number; 238 /** 239 * The alias of the dictionary 240 */ 241 dictionaryAlias: string; 242 /** 243 * The number of primary sources that had an exact text match for the term. 244 */ 245 sourceTermExactMatchCount: number; 246 /** 247 * Whether the term reading matched the primary reading. 248 */ 249 matchPrimaryReading: boolean; 250 /** 251 * The maximum length of the original text for all primary sources. 252 */ 253 maxOriginalTextLength: number; 254 /** 255 * Headwords for the entry. 256 */ 257 headwords: TermHeadword[]; 258 /** 259 * Definitions for the entry. 260 */ 261 definitions: TermDefinition[]; 262 /** 263 * Pronunciations for the entry. 264 */ 265 pronunciations: TermPronunciation[]; 266 /** 267 * Frequencies for the entry. 268 */ 269 frequencies: TermFrequency[]; 270}; 271 272export type InflectionRuleChainCandidate = { 273 source: InflectionSource; 274 inflectionRules: InflectionRuleChain; 275}; 276 277type textProcessorRuleChainCandidate = string[]; 278 279export type InflectionRuleChain = InflectionRule[]; 280 281export type InflectionRule = { 282 name: string; 283 description?: string; 284}; 285 286export type InflectionSource = 'algorithm' | 'dictionary' | 'both'; 287 288/** 289 * A term headword is a combination of a term, reading, and auxiliary information. 290 */ 291export type TermHeadword = { 292 /** 293 * The original order of the headword, which is usually used for sorting. 294 */ 295 index: number; 296 /** 297 * The text for the term. 298 */ 299 term: string; 300 /** 301 * The reading of the term. 302 */ 303 reading: string; 304 /** 305 * The sources of the term. 306 */ 307 sources: TermSource[]; 308 /** 309 * Tags for the headword. 310 */ 311 tags: Tag[]; 312 /** 313 * List of word classes (part of speech) for the headword. 314 */ 315 wordClasses: string[]; 316}; 317 318/** 319 * A definition contains a list of entries and information about what what terms it corresponds to. 320 */ 321export type TermDefinition = { 322 /** 323 * The original order of the definition, which is usually used for sorting. 324 */ 325 index: number; 326 /** 327 * A list of headwords that this definition corresponds to. 328 */ 329 headwordIndices: number[]; 330 /** 331 * The name of the dictionary that the definition information originated from. 332 */ 333 dictionary: string; 334 /** 335 * The index of the dictionary in the original list of dictionaries used for the lookup. 336 */ 337 dictionaryIndex: number; 338 /** 339 * The alias of the dictionary 340 */ 341 dictionaryAlias: string; 342 /** 343 * Database ID for the definition. 344 */ 345 id: number; 346 /** 347 * A score for the definition. 348 */ 349 score: number; 350 /** 351 * The sorting value based on the determined term frequency. 352 */ 353 frequencyOrder: number; 354 /** 355 * A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence. 356 * The list can have multiple values if multiple definitions with different sequences have been merged. 357 * The list should always have at least one item. 358 */ 359 sequences: number[]; 360 /** 361 * Whether or not any of the sources is a primary source. Primary sources are derived from the 362 * original search text, while non-primary sources originate from related terms. 363 */ 364 isPrimary: boolean; 365 /** 366 * Tags for the definition. 367 */ 368 tags: Tag[]; 369 /** 370 * The definition entries. 371 */ 372 entries: DictionaryData.TermGlossaryContent[]; 373}; 374 375/** 376 * A term pronunciation represents different ways to pronounce one of the headwords. 377 */ 378export type TermPronunciation = { 379 /** 380 * The original order of the pronunciation, which is usually used for sorting. 381 */ 382 index: number; 383 /** 384 * Which headword this pronunciation corresponds to. 385 */ 386 headwordIndex: number; 387 /** 388 * The name of the dictionary that the proununciation information originated from. 389 */ 390 dictionary: string; 391 /** 392 * The index of the dictionary in the original list of dictionaries used for the lookup. 393 */ 394 dictionaryIndex: number; 395 /** 396 * The alias of the dictionary 397 */ 398 dictionaryAlias: string; 399 /** 400 * The pronunciations for the term. 401 */ 402 pronunciations: Pronunciation[]; 403}; 404 405export type Pronunciation = PitchAccent | PhoneticTranscription; 406 407/** 408 * Pitch accent information for a term, represented as the position of the downstep. 409 */ 410export type PitchAccent = { 411 /** 412 * Type of the pronunciation, for disambiguation between union type members. 413 */ 414 type: 'pitch-accent'; 415 /** 416 * Position of the downstep, as a number of mora. 417 */ 418 positions: number | string; 419 /** 420 * Positions of morae with a nasal sound. 421 */ 422 nasalPositions: number[]; 423 /** 424 * Positions of morae with a devoiced sound. 425 */ 426 devoicePositions: number[]; 427 /** 428 * Tags for the pitch accent. 429 */ 430 tags: Tag[]; 431}; 432 433export type PhoneticTranscription = { 434 /** 435 * Type of the pronunciation, for disambiguation between union type members. 436 */ 437 type: 'phonetic-transcription'; 438 /** 439 * An IPA transcription. 440 */ 441 ipa: string; 442 /** 443 * Tags for the IPA transcription. 444 */ 445 tags: Tag[]; 446}; 447 448export type PronunciationType = Pronunciation['type']; 449 450export type PronunciationGeneric<T extends PronunciationType> = Extract<Pronunciation, {type: T}>; 451 452/** 453 * Frequency information corresponds to how frequently a term appears in a corpus, 454 * which can be a number of occurrences or an overall rank. 455 */ 456export type TermFrequency = { 457 /** 458 * The original order of the frequency, which is usually used for sorting. 459 */ 460 index: number; 461 /** 462 * Which headword this frequency corresponds to. 463 */ 464 headwordIndex: number; 465 /** 466 * The name of the dictionary that the frequency information originated from. 467 */ 468 dictionary: string; 469 /** 470 * The index of the dictionary in the original list of dictionaries used for the lookup. 471 */ 472 dictionaryIndex: number; 473 /** 474 * The alias of the dictionary 475 */ 476 dictionaryAlias: string; 477 /** 478 * Whether or not the frequency had an explicit reading specified. 479 */ 480 hasReading: boolean; 481 /** 482 * The frequency for the term, as a number of occurrences or an overall rank. 483 */ 484 frequency: number; 485 /** 486 * A display value to show to the user. 487 */ 488 displayValue: string | null; 489 /** 490 * Whether or not the displayValue string was parsed to determine the frequency value. 491 */ 492 displayValueParsed: boolean; 493}; 494 495/** 496 * Enum representing how the search term relates to the final term. 497 */ 498export type TermSourceMatchType = 'exact' | 'prefix' | 'suffix'; 499 500/** 501 * Enum representing what database field was used to match the source term. 502 */ 503export type TermSourceMatchSource = 'term' | 'reading' | 'sequence'; 504 505/** 506 * Source information represents how the original text was transformed to get to the final term. 507 */ 508export type TermSource = { 509 /** 510 * The original text that was searched. 511 */ 512 originalText: string; 513 /** 514 * The original text after being transformed, but before applying deinflections. 515 */ 516 transformedText: string; 517 /** 518 * The final text after applying deinflections. 519 */ 520 deinflectedText: string; 521 /** 522 * How the deinflected text matches the value from the database. 523 */ 524 matchType: TermSourceMatchType; 525 /** 526 * Which field was used to match the database entry. 527 */ 528 matchSource: TermSourceMatchSource; 529 /** 530 * Whether or not this source is a primary source. Primary sources are derived from the 531 * original search text, while non-primary sources originate from related terms. 532 */ 533 isPrimary: boolean; 534};