Pop-up dictionary browser extension for language learning. Successor to Yomichan. (PERSONAL FORK)
1/*
2 * Copyright (C) 2024-2025 Yomitan Authors
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18import {log} from '../core/log.js';
19
20export class LanguageTransformer {
21 constructor() {
22 /** @type {number} */
23 this._nextFlagIndex = 0;
24 /** @type {import('language-transformer-internal').Transform[]} */
25 this._transforms = [];
26 /** @type {Map<string, number>} */
27 this._conditionTypeToConditionFlagsMap = new Map();
28 /** @type {Map<string, number>} */
29 this._partOfSpeechToConditionFlagsMap = new Map();
30 }
31
32 /** */
33 clear() {
34 this._nextFlagIndex = 0;
35 this._transforms = [];
36 this._conditionTypeToConditionFlagsMap.clear();
37 this._partOfSpeechToConditionFlagsMap.clear();
38 }
39
40 /**
41 * @param {import('language-transformer').LanguageTransformDescriptor} descriptor
42 * @throws {Error}
43 */
44 addDescriptor(descriptor) {
45 const {conditions, transforms} = descriptor;
46 const conditionEntries = Object.entries(conditions);
47 const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex);
48
49 /** @type {import('language-transformer-internal').Transform[]} */
50 const transforms2 = [];
51
52 for (const [transformId, transform] of Object.entries(transforms)) {
53 const {name, description, rules} = transform;
54 /** @type {import('language-transformer-internal').Rule[]} */
55 const rules2 = [];
56 for (let j = 0, jj = rules.length; j < jj; ++j) {
57 const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j];
58 const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn);
59 if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform ${transformId}.rules[${j}]`); }
60 const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut);
61 if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform ${transformId}.rules[${j}]`); }
62 rules2.push({
63 type,
64 isInflected,
65 deinflect,
66 conditionsIn: conditionFlagsIn,
67 conditionsOut: conditionFlagsOut,
68 });
69 }
70 const isInflectedTests = rules.map((rule) => rule.isInflected);
71 const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|'));
72 transforms2.push({id: transformId, name, description, rules: rules2, heuristic});
73 }
74
75 this._nextFlagIndex = nextFlagIndex;
76 for (const transform of transforms2) {
77 this._transforms.push(transform);
78 }
79
80 for (const [type, {isDictionaryForm}] of conditionEntries) {
81 const flags = conditionFlagsMap.get(type);
82 if (typeof flags === 'undefined') { continue; } // This case should never happen
83 this._conditionTypeToConditionFlagsMap.set(type, flags);
84 if (isDictionaryForm) {
85 this._partOfSpeechToConditionFlagsMap.set(type, flags);
86 }
87 }
88 }
89
90 /**
91 * @param {string[]} partsOfSpeech
92 * @returns {number}
93 */
94 getConditionFlagsFromPartsOfSpeech(partsOfSpeech) {
95 return this._getConditionFlags(this._partOfSpeechToConditionFlagsMap, partsOfSpeech);
96 }
97
98 /**
99 * @param {string[]} conditionTypes
100 * @returns {number}
101 */
102 getConditionFlagsFromConditionTypes(conditionTypes) {
103 return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, conditionTypes);
104 }
105
106 /**
107 * @param {string} conditionType
108 * @returns {number}
109 */
110 getConditionFlagsFromConditionType(conditionType) {
111 return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, [conditionType]);
112 }
113
114 /**
115 * @param {string} sourceText
116 * @returns {import('language-transformer-internal').TransformedText[]}
117 */
118 transform(sourceText) {
119 const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])];
120 for (let i = 0; i < results.length; ++i) {
121 const {text, conditions, trace} = results[i];
122 for (const transform of this._transforms) {
123 if (!transform.heuristic.test(text)) { continue; }
124
125 const {id, rules} = transform;
126 for (let j = 0, jj = rules.length; j < jj; ++j) {
127 const rule = rules[j];
128 if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; }
129 const {isInflected, deinflect} = rule;
130 if (!isInflected.test(text)) { continue; }
131
132 const isCycle = trace.some((frame) => frame.transform === id && frame.ruleIndex === j && frame.text === text);
133 if (isCycle) {
134 log.warn(new Error(`Cycle detected in transform[${id}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`));
135 continue;
136 }
137
138 results.push(LanguageTransformer.createTransformedText(
139 deinflect(text),
140 rule.conditionsOut,
141 this._extendTrace(trace, {transform: id, ruleIndex: j, text}),
142 ));
143 }
144 }
145 }
146 return results;
147 }
148
149 /**
150 * @param {string[]} inflectionRules
151 * @returns {import('dictionary').InflectionRuleChain}
152 */
153 getUserFacingInflectionRules(inflectionRules) {
154 return inflectionRules.map((rule) => {
155 const fullRule = this._transforms.find((transform) => transform.id === rule);
156 if (typeof fullRule === 'undefined') { return {name: rule}; }
157 const {name, description} = fullRule;
158 return description ? {name, description} : {name};
159 });
160 }
161
162 /**
163 * @param {string} text
164 * @param {number} conditions
165 * @param {import('language-transformer-internal').Trace} trace
166 * @returns {import('language-transformer-internal').TransformedText}
167 */
168 static createTransformedText(text, conditions, trace) {
169 return {text, conditions, trace};
170 }
171
172 /**
173 * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned.
174 * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`.
175 * @param {number} currentConditions
176 * @param {number} nextConditions
177 * @returns {boolean}
178 */
179 static conditionsMatch(currentConditions, nextConditions) {
180 return currentConditions === 0 || (currentConditions & nextConditions) !== 0;
181 }
182
183 /**
184 * @param {import('language-transformer').ConditionMapEntries} conditions
185 * @param {number} nextFlagIndex
186 * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}}
187 * @throws {Error}
188 */
189 _getConditionFlagsMap(conditions, nextFlagIndex) {
190 /** @type {Map<string, number>} */
191 const conditionFlagsMap = new Map();
192 /** @type {import('language-transformer').ConditionMapEntries} */
193 let targets = conditions;
194 while (targets.length > 0) {
195 const nextTargets = [];
196 for (const target of targets) {
197 const [type, condition] = target;
198 const {subConditions} = condition;
199 let flags = 0;
200 if (typeof subConditions === 'undefined') {
201 if (nextFlagIndex >= 32) {
202 // Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations
203 throw new Error('Maximum number of conditions was exceeded');
204 }
205 flags = 1 << nextFlagIndex;
206 ++nextFlagIndex;
207 } else {
208 const multiFlags = this._getConditionFlagsStrict(conditionFlagsMap, subConditions);
209 if (multiFlags === null) {
210 nextTargets.push(target);
211 continue;
212 } else {
213 flags = multiFlags;
214 }
215 }
216 conditionFlagsMap.set(type, flags);
217 }
218 if (nextTargets.length === targets.length) {
219 // Cycle in subRule declaration
220 throw new Error('Maximum number of conditions was exceeded');
221 }
222 targets = nextTargets;
223 }
224 return {conditionFlagsMap, nextFlagIndex};
225 }
226
227 /**
228 * @param {Map<string, number>} conditionFlagsMap
229 * @param {string[]} conditionTypes
230 * @returns {?number}
231 */
232 _getConditionFlagsStrict(conditionFlagsMap, conditionTypes) {
233 let flags = 0;
234 for (const conditionType of conditionTypes) {
235 const flags2 = conditionFlagsMap.get(conditionType);
236 if (typeof flags2 === 'undefined') {
237 return null;
238 }
239 flags |= flags2;
240 }
241 return flags;
242 }
243
244 /**
245 * @param {Map<string, number>} conditionFlagsMap
246 * @param {string[]} conditionTypes
247 * @returns {number}
248 */
249 _getConditionFlags(conditionFlagsMap, conditionTypes) {
250 let flags = 0;
251 for (const conditionType of conditionTypes) {
252 let flags2 = conditionFlagsMap.get(conditionType);
253 if (typeof flags2 === 'undefined') {
254 flags2 = 0;
255 }
256 flags |= flags2;
257 }
258 return flags;
259 }
260
261 /**
262 * @param {import('language-transformer-internal').Trace} trace
263 * @param {import('language-transformer-internal').TraceFrame} newFrame
264 * @returns {import('language-transformer-internal').Trace}
265 */
266 _extendTrace(trace, newFrame) {
267 const newTrace = [newFrame];
268 for (const {transform, ruleIndex, text} of trace) {
269 newTrace.push({transform, ruleIndex, text});
270 }
271 return newTrace;
272 }
273}