OR-1 dataflow CPU sketch
at ba08ffded3d3b2badb2a7e22816feafaacea5ded 376 lines 13 kB view raw
1#!/usr/bin/env python3 2""" 3Convert dfasm.sublime-syntax (Sublime Text) to dfasm.tmLanguage.json (TextMate). 4 5This script reads the Sublime Text syntax grammar and produces a TextMate-compatible 6JSON grammar. Key transformations: 7- Inlines {{variable}} references 8- Flattens qualified reference contexts into single patterns with captures 9- Degrades port-specific scopes to generic numeric scopes (TextMate limitation) 10- Converts context stacks to TextMate patterns with repositories 11 12Usage: 13 python editor/scripts/sublime_to_textmate.py 14 python editor/scripts/sublime_to_textmate.py INPUT.sublime-syntax OUTPUT.tmLanguage.json 15""" 16 17import sys 18import json 19import re 20import yaml 21from pathlib import Path 22from typing import Any, Dict, List, Optional, Tuple 23 24 25def load_sublime_syntax(path: str) -> Dict[str, Any]: 26 """Load YAML sublime-syntax file.""" 27 with open(path, 'r') as f: 28 return yaml.safe_load(f) 29 30 31def inline_variables(text: str, variables: Dict[str, str]) -> str: 32 """Inline {{variable}} references with variable values. 33 34 Uses non-capturing groups (?:...) to avoid creating extra capture groups 35 that break capture numbering in TextMate patterns. 36 """ 37 def replace_var(match): 38 var_name = match.group(1) 39 if var_name in variables: 40 return f"(?:{variables[var_name]})" 41 return match.group(0) 42 43 return re.sub(r'\{\{(\w+)\}\}', replace_var, text) 44 45 46def build_string_pattern(quote_char: str, prefix: str = '', escaped: bool = True) -> Dict[str, Any]: 47 """ 48 Build a begin/end pattern for strings/char literals. 49 50 Args: 51 quote_char: '"' or "'" 52 prefix: 'r' (raw), 'b' (byte), or '' (normal) 53 escaped: Whether to include escape patterns 54 55 Returns: Pattern dict with begin/end structure 56 """ 57 escaped_quote = re.escape(quote_char) 58 full_begin = f"{prefix}{quote_char}" 59 scope_suffix = '' 60 61 if quote_char == '"': 62 if prefix == 'r': 63 scope_base = 'string.quoted.double.raw.dfasm' 64 elif prefix == 'b': 65 scope_base = 'string.quoted.double.byte.dfasm' 66 else: 67 scope_base = 'string.quoted.double.dfasm' 68 else: 69 scope_base = 'string.quoted.single.dfasm' 70 71 pattern = { 72 'begin': re.escape(full_begin), 73 'beginCaptures': { 74 '0': {'name': scope_base} 75 }, 76 'end': escaped_quote, 77 'endCaptures': { 78 '0': {'name': scope_base} 79 }, 80 'name': scope_base, 81 } 82 83 if escaped: 84 # Add escape patterns 85 patterns = [ 86 {'match': r'\\[ntr0\\\\\'\"]', 'name': 'constant.character.escape.dfasm'}, 87 {'match': r'\\x[0-9a-fA-F]{2}', 'name': 'constant.character.escape.dfasm'}, 88 ] 89 pattern['patterns'] = patterns 90 91 return pattern 92 93 94def build_func_body_pattern() -> Dict[str, Any]: 95 """Build a begin/end pattern for function bodies.""" 96 return { 97 'begin': r'\{', 98 'beginCaptures': { 99 '0': {'name': 'punctuation.section.block.dfasm'} 100 }, 101 'end': r'\}', 102 'endCaptures': { 103 '0': {'name': 'punctuation.section.block.dfasm'} 104 }, 105 'name': 'meta.function.body.dfasm', 106 'patterns': [ 107 {'include': '$self'} 108 ] 109 } 110 111 112def build_qualified_ref_pattern(sigil: str, ref_type: str) -> Tuple[str, Dict[str, Any]]: 113 """ 114 Build a single regex pattern with captures for qualified refs. 115 116 Patterns: 117 - @ for nodes: @name|placement:port 118 - & for labels: &name|placement:port 119 - $ for functions: $name|placement:port 120 121 Returns: (pattern, captures_dict) 122 """ 123 identifier = r'[a-zA-Z_][a-zA-Z0-9_]*' 124 hex_lit = r'0x[0-9a-fA-F]+' 125 dec_lit = r'[0-9]+' 126 127 # Pattern captures: 128 # 1: sigil 129 # 2: name 130 # 3: placement separator (|) [optional] 131 # 4: placement name [optional] 132 # 5: port separator (:) [optional] 133 # 6: port (identifier or number) [optional] 134 135 pattern = ( 136 f"({re.escape(sigil)})({identifier})" 137 f"(?:(\\|)({identifier}))?" 138 f"(?:(:)({identifier}|{hex_lit}|{dec_lit}))?" 139 ) 140 141 scope_prefix = { 142 '@': 'node', 143 '&': 'label', 144 '$': 'function' 145 } 146 147 scope_type = scope_prefix.get(sigil, 'ref') 148 149 name_scope = { 150 '@': 'entity.name.tag.dfasm', 151 '&': 'entity.name.label.dfasm', 152 '$': 'entity.name.function.dfasm', 153 }[sigil] 154 155 captures = { 156 '1': {'name': degrade_scope(f'punctuation.definition.reference.{scope_type}.dfasm')}, 157 '2': {'name': degrade_scope(name_scope)}, 158 '3': {'name': degrade_scope('punctuation.separator.placement.dfasm')}, 159 '4': {'name': degrade_scope('entity.other.attribute-name.placement.dfasm')}, 160 '5': {'name': degrade_scope('punctuation.separator.port.dfasm')}, 161 '6': {'name': degrade_scope('constant.numeric.dfasm')}, 162 } 163 164 return pattern, captures 165 166 167def build_textmate_patterns(contexts: Dict[str, List[Dict]], variables: Dict[str, str]) -> Tuple[List[Dict], Dict[str, Dict]]: 168 """ 169 Convert sublime-syntax contexts to TextMate patterns and repository. 170 171 Returns: (top_level_patterns, repository) 172 """ 173 patterns = [] 174 repository = {} 175 176 # Helper to inline variables in regex patterns 177 def process_pattern(pattern_dict: Dict) -> Optional[Dict]: 178 """Process a single pattern, inlining variables and handling special cases.""" 179 result = {} 180 181 # Skip empty-match patterns (from pop: true) — MINOR FIX 1 182 if pattern_dict.get('match') == '': 183 return None 184 185 # CRITICAL FIX 2: Handle context-based patterns by converting to begin/end 186 match_val = pattern_dict.get('match', '') 187 push_stack = pattern_dict.get('push') 188 189 # String patterns with push context 190 if match_val == 'r"' and push_stack == 'raw_string': 191 return build_string_pattern('"', 'r', escaped=False) 192 elif match_val == 'b"' and push_stack == 'byte_string': 193 return build_string_pattern('"', 'b', escaped=True) 194 elif match_val == '"' and push_stack == 'string': 195 return build_string_pattern('"', '', escaped=True) 196 elif match_val == "'" and push_stack == 'char_literal': 197 return build_string_pattern("'", '', escaped=True) 198 elif match_val == '\\{' and push_stack == 'func_body': 199 return build_func_body_pattern() 200 201 # Process main pattern fields 202 for key, value in pattern_dict.items(): 203 if key == 'match': 204 result[key] = inline_variables(value, variables) 205 elif key == 'scope': 206 remapped = degrade_scope(value) 207 if remapped: 208 result['name'] = remapped 209 elif key == 'captures': 210 processed_captures = {} 211 for cap_idx, cap_value in value.items(): 212 if isinstance(cap_value, dict): 213 scope_name = degrade_scope(cap_value.get('name', '')) 214 else: 215 scope_name = degrade_scope(cap_value) 216 if scope_name: 217 processed_captures[cap_idx] = {'name': scope_name} 218 219 if processed_captures: 220 result['captures'] = processed_captures 221 elif key == 'meta_scope': 222 result['contentName'] = degrade_scope(value) 223 elif key == 'include': 224 # IMPORTANT FIX 3: Use $self instead of #main for top-level inclusion 225 include_val = value.replace('main', '$self') 226 result[key] = include_val 227 elif key in ('push', 'set', 'pop'): 228 # Don't include context stack directives in TextMate 229 pass 230 else: 231 result[key] = value 232 233 return result if result else None 234 235 # Process main context 236 if 'main' in contexts: 237 for item in contexts['main']: 238 if isinstance(item, dict): 239 processed = process_pattern(item) 240 if processed: 241 patterns.append(processed) 242 243 # Build repository from specific non-main contexts that need to be referenced 244 # IMPORTANT FIX 1: Only include contexts that are actually referenced or needed 245 referenced_contexts = set() 246 247 # Scan main patterns for includes 248 for pattern in patterns: 249 if 'include' in pattern: 250 include_ref = pattern['include'] 251 if include_ref.startswith('#'): 252 referenced_contexts.add(include_ref[1:]) 253 254 # Process contexts selectively 255 for ctx_name, ctx_items in contexts.items(): 256 if ctx_name == 'main': 257 continue 258 259 # Only include if referenced from main patterns 260 if ctx_name not in referenced_contexts: 261 continue 262 263 repo_patterns = [] 264 for item in ctx_items: 265 if isinstance(item, dict): 266 processed = process_pattern(item) 267 if processed: 268 repo_patterns.append(processed) 269 270 if repo_patterns: 271 repository[ctx_name] = {'patterns': repo_patterns} 272 273 return patterns, repository 274 275 276SCOPE_REMAP = { 277 # Port numbers degrade to generic numeric (TextMate has no context stacks) 278 'constant.numeric.port.dfasm': 'constant.numeric.dfasm', 279 # Opcodes → support.function for cyan colour in most themes (distinct from keyword) 280 'keyword.other.opcode.dfasm': 'support.function.opcode.dfasm', 281 # Pragma → variable scope for distinct colour from both keywords and opcodes 282 'keyword.control.pragma.dfasm': 'variable.language.pragma.dfasm', 283 # Labels → entity.name.function for strong blue/green colour 284 'entity.name.label.dfasm': 'entity.name.function.label.dfasm', 285 # Nodes and functions → entity.name.type for visible colour (cyan/green in most themes) 286 'entity.name.tag.dfasm': 'entity.name.type.node.dfasm', 287 'entity.name.function.dfasm': 'entity.name.type.function.dfasm', 288 # Named params → entity.other.attribute-name for distinct attribute colour 289 'variable.parameter.dfasm': 'entity.other.attribute-name.dfasm', 290 # Assignment operator → no special colour (default punctuation) 291 'keyword.operator.assignment.dfasm': '', 292} 293 294 295def degrade_scope(scope: str) -> str: 296 """Remap scopes for better colour differentiation in TextMate themes.""" 297 return SCOPE_REMAP.get(scope, scope) 298 299 300def build_textmate_grammar(sublime_syntax: Dict[str, Any]) -> Dict[str, Any]: 301 """ 302 Convert sublime-syntax to TextMate grammar structure. 303 """ 304 # Extract metadata 305 name = sublime_syntax.get('name', 'dfasm') 306 file_extensions = sublime_syntax.get('file_extensions', []) 307 scope = sublime_syntax.get('scope', 'source.dfasm') 308 309 variables = sublime_syntax.get('variables', {}) 310 contexts = sublime_syntax.get('contexts', {}) 311 312 # Build patterns and repository from contexts 313 top_patterns, repository = build_textmate_patterns(contexts, variables) 314 315 # Build qualified reference patterns 316 # CRITICAL FIX 3: Remove bare sigil patterns first (positions 5-7) 317 # Filter out bare sigil patterns from top_patterns 318 filtered_patterns = [] 319 for pattern in top_patterns: 320 # Skip bare sigil patterns 321 if pattern.get('match') in ('@', '&', r'\$'): 322 continue 323 filtered_patterns.append(pattern) 324 325 # MINOR FIX 2: Build qualified ref patterns efficiently (single call per sigil) 326 qualified_ref_patterns = [] 327 for sigil, ref_type in [('@', 'node'), ('&', 'label'), ('$', 'function')]: 328 pattern_str, captures_dict = build_qualified_ref_pattern(sigil, ref_type) 329 qualified_ref_patterns.append({ 330 'match': pattern_str, 331 'captures': captures_dict 332 }) 333 334 # TextMate grammar structure 335 grammar = { 336 'scopeName': scope, 337 'name': name, 338 'fileTypes': file_extensions, 339 'patterns': qualified_ref_patterns + filtered_patterns, 340 'repository': repository 341 } 342 343 return grammar 344 345 346def main(): 347 """Main entry point.""" 348 # Parse arguments 349 input_file = sys.argv[1] if len(sys.argv) > 1 else 'editor/sublime/dfasm.sublime-syntax' 350 output_file = sys.argv[2] if len(sys.argv) > 2 else 'editor/textmate/dfasm.tmLanguage.json' 351 352 # Make paths absolute if not already 353 input_path = Path(input_file).resolve() 354 output_path = Path(output_file).resolve() 355 356 # Load sublime-syntax 357 print(f"Loading {input_path}...") 358 sublime_syntax = load_sublime_syntax(str(input_path)) 359 360 # Convert to TextMate 361 print("Converting to TextMate format...") 362 grammar = build_textmate_grammar(sublime_syntax) 363 364 # Ensure output directory exists 365 output_path.parent.mkdir(parents=True, exist_ok=True) 366 367 # Write TextMate grammar 368 print(f"Writing {output_path}...") 369 with open(output_path, 'w') as f: 370 json.dump(grammar, f, indent=2) 371 372 print(f"✓ Conversion complete: {output_path}") 373 374 375if __name__ == '__main__': 376 main()