types/ext/dictionary.d.ts at lambda-fork/main · onedeuxtriseigo.nullpo.dev/yomitan

onedeuxtriseigo.nullpo.dev / yomitan
fork atom
Pop-up dictionary browser extension for language learning. Successor to Yomichan. (PERSONAL FORK)
fork atom
yomitan / types / ext / dictionary.d.ts
at lambda-fork/main 534 lines 14 kB view raw
wrap content
Kuuuube Remove separate handling for avg freq in popup and combine with Anki handling (#2129) 6mo ago
68c20e86
  1/*
  2 * Copyright (C) 2023-2025  Yomitan Authors
  3 * Copyright (C) 2021-2022  Yomichan Authors
  4 *
  5 * This program is free software: you can redistribute it and/or modify
  6 * it under the terms of the GNU General Public License as published by
  7 * the Free Software Foundation, either version 3 of the License, or
  8 * (at your option) any later version.
  9 *
 10 * This program is distributed in the hope that it will be useful,
 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 13 * GNU General Public License for more details.
 14 *
 15 * You should have received a copy of the GNU General Public License
 16 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 17 */
 18
 19import type * as DictionaryData from './dictionary-data';
 20
 21// Common
 22
 23/**
 24 * A generic dictionary entry which is used as the base interface.
 25 */
 26export type DictionaryEntry = KanjiDictionaryEntry | TermDictionaryEntry;
 27
 28export type DictionaryEntryType = DictionaryEntry['type'];
 29
 30/**
 31 * A tag represents some brief information about part of a dictionary entry.
 32 */
 33export type Tag = {
 34    /**
 35     * The name of the tag.
 36     */
 37    name: string;
 38    /**
 39     * The category of the tag.
 40     */
 41    category: string;
 42    /**
 43     * A number indicating the sorting order of the tag.
 44     */
 45    order: number;
 46    /**
 47     * A score value for the tag.
 48     */
 49    score: number;
 50    /**
 51     * An array of descriptions for the tag. * If there are multiple entries,
 52     * the values will typically have originated from different dictionaries.
 53     * However, there is no correlation between the length of this array and
 54     * the length of the `dictionaries` field, as duplicates are removed.
 55     */
 56    content: string[];
 57    /**
 58     * An array of dictionary names that contained a tag with this name and category.
 59     */
 60    dictionaries: string[];
 61    /**
 62     * Whether or not this tag is redundant with previous tags.
 63     */
 64    redundant: boolean;
 65};
 66
 67// Kanji
 68
 69/**
 70 * A dictionary entry for a kanji character.
 71 */
 72export type KanjiDictionaryEntry = {
 73    /**
 74     * The type of the entry.
 75     */
 76    type: 'kanji';
 77    /**
 78     * The kanji character that was looked up.
 79     */
 80    character: string;
 81    /**
 82     * The name of the dictionary that the information originated from.
 83     */
 84    dictionary: string;
 85    /**
 86     * The index of the dictionary in the original list of dictionaries used for the lookup.
 87     */
 88    dictionaryIndex: number;
 89    /**
 90     * The alias of the dictionary
 91     */
 92    dictionaryAlias: string;
 93    /**
 94     * Onyomi readings for the kanji character.
 95     */
 96    onyomi: string[];
 97    /**
 98     * Kunyomi readings for the kanji character.
 99     */
100    kunyomi: string[];
101    /**
102     * Tags for the kanji character.
103     */
104    tags: Tag[];
105    /**
106     * An object containing stats about the kanji character.
107     */
108    stats: KanjiStatGroups;
109    /**
110     * Definitions for the kanji character.
111     */
112    definitions: string[];
113    /**
114     * Frequency information for the kanji character.
115     */
116    frequencies: KanjiFrequency[];
117};
118
119/**
120 * An object with groups of stats about a kanji character.
121 */
122export type KanjiStatGroups = {
123    /**
124     * A group of stats.
125     * @param propName The name of the group.
126     */
127    [propName: string]: KanjiStat[];
128};
129
130/**
131 * A stat represents a generic piece of information about a kanji character.
132 */
133export type KanjiStat = {
134    /**
135     * The name of the stat.
136     */
137    name: string;
138    /**
139     * The category of the stat.
140     */
141    category: string;
142    /**
143     * A description of the stat.
144     */
145    content: string;
146    /**
147     * A number indicating the sorting order of the stat.
148     */
149    order: number;
150    /**
151     * A score value for the stat.
152     */
153    score: number;
154    /**
155     * The name of the dictionary that the stat originated from.
156     */
157    dictionary: string;
158    /**
159     * A value for the stat.
160     */
161    value: number | string;
162};
163
164/**
165 * Frequency information corresponds to how frequently a character appears in a corpus,
166 * which can be a number of occurrences or an overall rank.
167 */
168export type KanjiFrequency = {
169    /**
170     * The original order of the frequency, which is usually used for sorting.
171     */
172    index: number;
173    /**
174     * The name of the dictionary that the frequency information originated from.
175     */
176    dictionary: string;
177    /**
178     * The index of the dictionary in the original list of dictionaries used for the lookup.
179     */
180    dictionaryIndex: number;
181    /**
182     * The alias of the dictionary
183     */
184    dictionaryAlias: string;
185    /**
186     * The kanji character for the frequency.
187     */
188    character: string;
189    /**
190     * The frequency for the character, as a number of occurrences or an overall rank.
191     */
192    frequency: number;
193    /**
194     * A display value to show to the user.
195     */
196    displayValue: string | null;
197    /**
198     * Whether or not the displayValue string was parsed to determine the frequency value.
199     */
200    displayValueParsed: boolean;
201};
202
203// Terms
204
205/**
206 * A dictionary entry for a term or group of terms.
207 */
208export type TermDictionaryEntry = {
209    /**
210     * The type of the entry.
211     */
212    type: 'term';
213    /**
214     * Whether or not any of the sources is a primary source. Primary sources are derived from the
215     * original search text, while non-primary sources originate from related terms.
216     */
217    isPrimary: boolean;
218    /**
219     * Ways that a looked-up word might be transformed into this term.
220     */
221    textProcessorRuleChainCandidates: textProcessorRuleChainCandidate[];
222    /**
223     * Ways that a looked-up word might be an inflected form of this term.
224     */
225    inflectionRuleChainCandidates: InflectionRuleChainCandidate[];
226    /**
227     * A score for the dictionary entry.
228     */
229    score: number;
230    /**
231     * The sorting value based on the determined term frequency.
232     */
233    frequencyOrder: number;
234    /**
235     * The index of the dictionary in the original list of dictionaries used for the lookup.
236     */
237    dictionaryIndex: number;
238    /**
239     * The alias of the dictionary
240     */
241    dictionaryAlias: string;
242    /**
243     * The number of primary sources that had an exact text match for the term.
244     */
245    sourceTermExactMatchCount: number;
246    /**
247     * Whether the term reading matched the primary reading.
248     */
249    matchPrimaryReading: boolean;
250    /**
251     * The maximum length of the original text for all primary sources.
252     */
253    maxOriginalTextLength: number;
254    /**
255     * Headwords for the entry.
256     */
257    headwords: TermHeadword[];
258    /**
259     * Definitions for the entry.
260     */
261    definitions: TermDefinition[];
262    /**
263     * Pronunciations for the entry.
264     */
265    pronunciations: TermPronunciation[];
266    /**
267     * Frequencies for the entry.
268     */
269    frequencies: TermFrequency[];
270};
271
272export type InflectionRuleChainCandidate = {
273    source: InflectionSource;
274    inflectionRules: InflectionRuleChain;
275};
276
277type textProcessorRuleChainCandidate = string[];
278
279export type InflectionRuleChain = InflectionRule[];
280
281export type InflectionRule = {
282    name: string;
283    description?: string;
284};
285
286export type InflectionSource = 'algorithm' | 'dictionary' | 'both';
287
288/**
289 * A term headword is a combination of a term, reading, and auxiliary information.
290 */
291export type TermHeadword = {
292    /**
293     * The original order of the headword, which is usually used for sorting.
294     */
295    index: number;
296    /**
297     * The text for the term.
298     */
299    term: string;
300    /**
301     * The reading of the term.
302     */
303    reading: string;
304    /**
305     * The sources of the term.
306     */
307    sources: TermSource[];
308    /**
309     * Tags for the headword.
310     */
311    tags: Tag[];
312    /**
313     * List of word classes (part of speech) for the headword.
314     */
315    wordClasses: string[];
316};
317
318/**
319 * A definition contains a list of entries and information about what what terms it corresponds to.
320 */
321export type TermDefinition = {
322    /**
323     * The original order of the definition, which is usually used for sorting.
324     */
325    index: number;
326    /**
327     * A list of headwords that this definition corresponds to.
328     */
329    headwordIndices: number[];
330    /**
331     * The name of the dictionary that the definition information originated from.
332     */
333    dictionary: string;
334    /**
335     * The index of the dictionary in the original list of dictionaries used for the lookup.
336     */
337    dictionaryIndex: number;
338    /**
339     * The alias of the dictionary
340     */
341    dictionaryAlias: string;
342    /**
343     * Database ID for the definition.
344     */
345    id: number;
346    /**
347     * A score for the definition.
348     */
349    score: number;
350    /**
351     * The sorting value based on the determined term frequency.
352     */
353    frequencyOrder: number;
354    /**
355     * A list of database sequence numbers for the term. A value of `-1` corresponds to no sequence.
356     * The list can have multiple values if multiple definitions with different sequences have been merged.
357     * The list should always have at least one item.
358     */
359    sequences: number[];
360    /**
361     * Whether or not any of the sources is a primary source. Primary sources are derived from the
362     * original search text, while non-primary sources originate from related terms.
363     */
364    isPrimary: boolean;
365    /**
366     * Tags for the definition.
367     */
368    tags: Tag[];
369    /**
370     * The definition entries.
371     */
372    entries: DictionaryData.TermGlossaryContent[];
373};
374
375/**
376 * A term pronunciation represents different ways to pronounce one of the headwords.
377 */
378export type TermPronunciation = {
379    /**
380     * The original order of the pronunciation, which is usually used for sorting.
381     */
382    index: number;
383    /**
384     * Which headword this pronunciation corresponds to.
385     */
386    headwordIndex: number;
387    /**
388     * The name of the dictionary that the proununciation information originated from.
389     */
390    dictionary: string;
391    /**
392     * The index of the dictionary in the original list of dictionaries used for the lookup.
393     */
394    dictionaryIndex: number;
395    /**
396     * The alias of the dictionary
397     */
398    dictionaryAlias: string;
399    /**
400     * The pronunciations for the term.
401     */
402    pronunciations: Pronunciation[];
403};
404
405export type Pronunciation = PitchAccent | PhoneticTranscription;
406
407/**
408 * Pitch accent information for a term, represented as the position of the downstep.
409 */
410export type PitchAccent = {
411    /**
412     * Type of the pronunciation, for disambiguation between union type members.
413     */
414    type: 'pitch-accent';
415    /**
416     * Position of the downstep, as a number of mora.
417     */
418    positions: number | string;
419    /**
420     * Positions of morae with a nasal sound.
421     */
422    nasalPositions: number[];
423    /**
424     * Positions of morae with a devoiced sound.
425     */
426    devoicePositions: number[];
427    /**
428     * Tags for the pitch accent.
429     */
430    tags: Tag[];
431};
432
433export type PhoneticTranscription = {
434    /**
435     * Type of the pronunciation, for disambiguation between union type members.
436     */
437    type: 'phonetic-transcription';
438    /**
439     * An IPA transcription.
440     */
441    ipa: string;
442    /**
443     * Tags for the IPA transcription.
444     */
445    tags: Tag[];
446};
447
448export type PronunciationType = Pronunciation['type'];
449
450export type PronunciationGeneric<T extends PronunciationType> = Extract<Pronunciation, {type: T}>;
451
452/**
453 * Frequency information corresponds to how frequently a term appears in a corpus,
454 * which can be a number of occurrences or an overall rank.
455 */
456export type TermFrequency = {
457    /**
458     * The original order of the frequency, which is usually used for sorting.
459     */
460    index: number;
461    /**
462     * Which headword this frequency corresponds to.
463     */
464    headwordIndex: number;
465    /**
466     * The name of the dictionary that the frequency information originated from.
467     */
468    dictionary: string;
469    /**
470     * The index of the dictionary in the original list of dictionaries used for the lookup.
471     */
472    dictionaryIndex: number;
473    /**
474     * The alias of the dictionary
475     */
476    dictionaryAlias: string;
477    /**
478     * Whether or not the frequency had an explicit reading specified.
479     */
480    hasReading: boolean;
481    /**
482     * The frequency for the term, as a number of occurrences or an overall rank.
483     */
484    frequency: number;
485    /**
486     * A display value to show to the user.
487     */
488    displayValue: string | null;
489    /**
490     * Whether or not the displayValue string was parsed to determine the frequency value.
491     */
492    displayValueParsed: boolean;
493};
494
495/**
496 * Enum representing how the search term relates to the final term.
497 */
498export type TermSourceMatchType = 'exact' | 'prefix' | 'suffix';
499
500/**
501 * Enum representing what database field was used to match the source term.
502 */
503export type TermSourceMatchSource = 'term' | 'reading' | 'sequence';
504
505/**
506 * Source information represents how the original text was transformed to get to the final term.
507 */
508export type TermSource = {
509    /**
510     * The original text that was searched.
511     */
512    originalText: string;
513    /**
514     * The original text after being transformed, but before applying deinflections.
515     */
516    transformedText: string;
517    /**
518     * The final text after applying deinflections.
519     */
520    deinflectedText: string;
521    /**
522     * How the deinflected text matches the value from the database.
523     */
524    matchType: TermSourceMatchType;
525    /**
526     * Which field was used to match the database entry.
527     */
528    matchSource: TermSourceMatchSource;
529    /**
530     * Whether or not this source is a primary source. Primary sources are derived from the
531     * original search text, while non-primary sources originate from related terms.
532     */
533    isPrimary: boolean;
534};