Pop-up dictionary browser extension for language learning. Successor to Yomichan. (PERSONAL FORK)
at lambda-fork/main 273 lines 11 kB view raw
1/* 2 * Copyright (C) 2024-2025 Yomitan Authors 3 * 4 * This program is free software: you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License as published by 6 * the Free Software Foundation, either version 3 of the License, or 7 * (at your option) any later version. 8 * 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18import {log} from '../core/log.js'; 19 20export class LanguageTransformer { 21 constructor() { 22 /** @type {number} */ 23 this._nextFlagIndex = 0; 24 /** @type {import('language-transformer-internal').Transform[]} */ 25 this._transforms = []; 26 /** @type {Map<string, number>} */ 27 this._conditionTypeToConditionFlagsMap = new Map(); 28 /** @type {Map<string, number>} */ 29 this._partOfSpeechToConditionFlagsMap = new Map(); 30 } 31 32 /** */ 33 clear() { 34 this._nextFlagIndex = 0; 35 this._transforms = []; 36 this._conditionTypeToConditionFlagsMap.clear(); 37 this._partOfSpeechToConditionFlagsMap.clear(); 38 } 39 40 /** 41 * @param {import('language-transformer').LanguageTransformDescriptor} descriptor 42 * @throws {Error} 43 */ 44 addDescriptor(descriptor) { 45 const {conditions, transforms} = descriptor; 46 const conditionEntries = Object.entries(conditions); 47 const {conditionFlagsMap, nextFlagIndex} = this._getConditionFlagsMap(conditionEntries, this._nextFlagIndex); 48 49 /** @type {import('language-transformer-internal').Transform[]} */ 50 const transforms2 = []; 51 52 for (const [transformId, transform] of Object.entries(transforms)) { 53 const {name, description, rules} = transform; 54 /** @type {import('language-transformer-internal').Rule[]} */ 55 const rules2 = []; 56 for (let j = 0, jj = rules.length; j < jj; ++j) { 57 const {type, isInflected, deinflect, conditionsIn, conditionsOut} = rules[j]; 58 const conditionFlagsIn = this._getConditionFlagsStrict(conditionFlagsMap, conditionsIn); 59 if (conditionFlagsIn === null) { throw new Error(`Invalid conditionsIn for transform ${transformId}.rules[${j}]`); } 60 const conditionFlagsOut = this._getConditionFlagsStrict(conditionFlagsMap, conditionsOut); 61 if (conditionFlagsOut === null) { throw new Error(`Invalid conditionsOut for transform ${transformId}.rules[${j}]`); } 62 rules2.push({ 63 type, 64 isInflected, 65 deinflect, 66 conditionsIn: conditionFlagsIn, 67 conditionsOut: conditionFlagsOut, 68 }); 69 } 70 const isInflectedTests = rules.map((rule) => rule.isInflected); 71 const heuristic = new RegExp(isInflectedTests.map((regExp) => regExp.source).join('|')); 72 transforms2.push({id: transformId, name, description, rules: rules2, heuristic}); 73 } 74 75 this._nextFlagIndex = nextFlagIndex; 76 for (const transform of transforms2) { 77 this._transforms.push(transform); 78 } 79 80 for (const [type, {isDictionaryForm}] of conditionEntries) { 81 const flags = conditionFlagsMap.get(type); 82 if (typeof flags === 'undefined') { continue; } // This case should never happen 83 this._conditionTypeToConditionFlagsMap.set(type, flags); 84 if (isDictionaryForm) { 85 this._partOfSpeechToConditionFlagsMap.set(type, flags); 86 } 87 } 88 } 89 90 /** 91 * @param {string[]} partsOfSpeech 92 * @returns {number} 93 */ 94 getConditionFlagsFromPartsOfSpeech(partsOfSpeech) { 95 return this._getConditionFlags(this._partOfSpeechToConditionFlagsMap, partsOfSpeech); 96 } 97 98 /** 99 * @param {string[]} conditionTypes 100 * @returns {number} 101 */ 102 getConditionFlagsFromConditionTypes(conditionTypes) { 103 return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, conditionTypes); 104 } 105 106 /** 107 * @param {string} conditionType 108 * @returns {number} 109 */ 110 getConditionFlagsFromConditionType(conditionType) { 111 return this._getConditionFlags(this._conditionTypeToConditionFlagsMap, [conditionType]); 112 } 113 114 /** 115 * @param {string} sourceText 116 * @returns {import('language-transformer-internal').TransformedText[]} 117 */ 118 transform(sourceText) { 119 const results = [LanguageTransformer.createTransformedText(sourceText, 0, [])]; 120 for (let i = 0; i < results.length; ++i) { 121 const {text, conditions, trace} = results[i]; 122 for (const transform of this._transforms) { 123 if (!transform.heuristic.test(text)) { continue; } 124 125 const {id, rules} = transform; 126 for (let j = 0, jj = rules.length; j < jj; ++j) { 127 const rule = rules[j]; 128 if (!LanguageTransformer.conditionsMatch(conditions, rule.conditionsIn)) { continue; } 129 const {isInflected, deinflect} = rule; 130 if (!isInflected.test(text)) { continue; } 131 132 const isCycle = trace.some((frame) => frame.transform === id && frame.ruleIndex === j && frame.text === text); 133 if (isCycle) { 134 log.warn(new Error(`Cycle detected in transform[${id}] rule[${j}] for text: ${text}\nTrace: ${JSON.stringify(trace)}`)); 135 continue; 136 } 137 138 results.push(LanguageTransformer.createTransformedText( 139 deinflect(text), 140 rule.conditionsOut, 141 this._extendTrace(trace, {transform: id, ruleIndex: j, text}), 142 )); 143 } 144 } 145 } 146 return results; 147 } 148 149 /** 150 * @param {string[]} inflectionRules 151 * @returns {import('dictionary').InflectionRuleChain} 152 */ 153 getUserFacingInflectionRules(inflectionRules) { 154 return inflectionRules.map((rule) => { 155 const fullRule = this._transforms.find((transform) => transform.id === rule); 156 if (typeof fullRule === 'undefined') { return {name: rule}; } 157 const {name, description} = fullRule; 158 return description ? {name, description} : {name}; 159 }); 160 } 161 162 /** 163 * @param {string} text 164 * @param {number} conditions 165 * @param {import('language-transformer-internal').Trace} trace 166 * @returns {import('language-transformer-internal').TransformedText} 167 */ 168 static createTransformedText(text, conditions, trace) { 169 return {text, conditions, trace}; 170 } 171 172 /** 173 * If `currentConditions` is `0`, then `nextConditions` is ignored and `true` is returned. 174 * Otherwise, there must be at least one shared condition between `currentConditions` and `nextConditions`. 175 * @param {number} currentConditions 176 * @param {number} nextConditions 177 * @returns {boolean} 178 */ 179 static conditionsMatch(currentConditions, nextConditions) { 180 return currentConditions === 0 || (currentConditions & nextConditions) !== 0; 181 } 182 183 /** 184 * @param {import('language-transformer').ConditionMapEntries} conditions 185 * @param {number} nextFlagIndex 186 * @returns {{conditionFlagsMap: Map<string, number>, nextFlagIndex: number}} 187 * @throws {Error} 188 */ 189 _getConditionFlagsMap(conditions, nextFlagIndex) { 190 /** @type {Map<string, number>} */ 191 const conditionFlagsMap = new Map(); 192 /** @type {import('language-transformer').ConditionMapEntries} */ 193 let targets = conditions; 194 while (targets.length > 0) { 195 const nextTargets = []; 196 for (const target of targets) { 197 const [type, condition] = target; 198 const {subConditions} = condition; 199 let flags = 0; 200 if (typeof subConditions === 'undefined') { 201 if (nextFlagIndex >= 32) { 202 // Flags greater than or equal to 32 don't work because JavaScript only supports up to 32-bit integer operations 203 throw new Error('Maximum number of conditions was exceeded'); 204 } 205 flags = 1 << nextFlagIndex; 206 ++nextFlagIndex; 207 } else { 208 const multiFlags = this._getConditionFlagsStrict(conditionFlagsMap, subConditions); 209 if (multiFlags === null) { 210 nextTargets.push(target); 211 continue; 212 } else { 213 flags = multiFlags; 214 } 215 } 216 conditionFlagsMap.set(type, flags); 217 } 218 if (nextTargets.length === targets.length) { 219 // Cycle in subRule declaration 220 throw new Error('Maximum number of conditions was exceeded'); 221 } 222 targets = nextTargets; 223 } 224 return {conditionFlagsMap, nextFlagIndex}; 225 } 226 227 /** 228 * @param {Map<string, number>} conditionFlagsMap 229 * @param {string[]} conditionTypes 230 * @returns {?number} 231 */ 232 _getConditionFlagsStrict(conditionFlagsMap, conditionTypes) { 233 let flags = 0; 234 for (const conditionType of conditionTypes) { 235 const flags2 = conditionFlagsMap.get(conditionType); 236 if (typeof flags2 === 'undefined') { 237 return null; 238 } 239 flags |= flags2; 240 } 241 return flags; 242 } 243 244 /** 245 * @param {Map<string, number>} conditionFlagsMap 246 * @param {string[]} conditionTypes 247 * @returns {number} 248 */ 249 _getConditionFlags(conditionFlagsMap, conditionTypes) { 250 let flags = 0; 251 for (const conditionType of conditionTypes) { 252 let flags2 = conditionFlagsMap.get(conditionType); 253 if (typeof flags2 === 'undefined') { 254 flags2 = 0; 255 } 256 flags |= flags2; 257 } 258 return flags; 259 } 260 261 /** 262 * @param {import('language-transformer-internal').Trace} trace 263 * @param {import('language-transformer-internal').TraceFrame} newFrame 264 * @returns {import('language-transformer-internal').Trace} 265 */ 266 _extendTrace(trace, newFrame) { 267 const newTrace = [newFrame]; 268 for (const {transform, ruleIndex, text} of trace) { 269 newTrace.push({transform, ruleIndex, text}); 270 } 271 return newTrace; 272 } 273}