OR-1 dataflow CPU sketch
1#!/usr/bin/env python3
2"""
3Convert dfasm.sublime-syntax (Sublime Text) to dfasm.tmLanguage.json (TextMate).
4
5This script reads the Sublime Text syntax grammar and produces a TextMate-compatible
6JSON grammar. Key transformations:
7- Inlines {{variable}} references
8- Flattens qualified reference contexts into single patterns with captures
9- Degrades port-specific scopes to generic numeric scopes (TextMate limitation)
10- Converts context stacks to TextMate patterns with repositories
11
12Usage:
13 python editor/scripts/sublime_to_textmate.py
14 python editor/scripts/sublime_to_textmate.py INPUT.sublime-syntax OUTPUT.tmLanguage.json
15"""
16
17import sys
18import json
19import re
20import yaml
21from pathlib import Path
22from typing import Any, Dict, List, Optional, Tuple
23
24
25def load_sublime_syntax(path: str) -> Dict[str, Any]:
26 """Load YAML sublime-syntax file."""
27 with open(path, 'r') as f:
28 return yaml.safe_load(f)
29
30
31def inline_variables(text: str, variables: Dict[str, str]) -> str:
32 """Inline {{variable}} references with variable values.
33
34 Uses non-capturing groups (?:...) to avoid creating extra capture groups
35 that break capture numbering in TextMate patterns.
36 """
37 def replace_var(match):
38 var_name = match.group(1)
39 if var_name in variables:
40 return f"(?:{variables[var_name]})"
41 return match.group(0)
42
43 return re.sub(r'\{\{(\w+)\}\}', replace_var, text)
44
45
46def build_string_pattern(quote_char: str, prefix: str = '', escaped: bool = True) -> Dict[str, Any]:
47 """
48 Build a begin/end pattern for strings/char literals.
49
50 Args:
51 quote_char: '"' or "'"
52 prefix: 'r' (raw), 'b' (byte), or '' (normal)
53 escaped: Whether to include escape patterns
54
55 Returns: Pattern dict with begin/end structure
56 """
57 escaped_quote = re.escape(quote_char)
58 full_begin = f"{prefix}{quote_char}"
59 scope_suffix = ''
60
61 if quote_char == '"':
62 if prefix == 'r':
63 scope_base = 'string.quoted.double.raw.dfasm'
64 elif prefix == 'b':
65 scope_base = 'string.quoted.double.byte.dfasm'
66 else:
67 scope_base = 'string.quoted.double.dfasm'
68 else:
69 scope_base = 'string.quoted.single.dfasm'
70
71 pattern = {
72 'begin': re.escape(full_begin),
73 'beginCaptures': {
74 '0': {'name': scope_base}
75 },
76 'end': escaped_quote,
77 'endCaptures': {
78 '0': {'name': scope_base}
79 },
80 'name': scope_base,
81 }
82
83 if escaped:
84 # Add escape patterns
85 patterns = [
86 {'match': r'\\[ntr0\\\\\'\"]', 'name': 'constant.character.escape.dfasm'},
87 {'match': r'\\x[0-9a-fA-F]{2}', 'name': 'constant.character.escape.dfasm'},
88 ]
89 pattern['patterns'] = patterns
90
91 return pattern
92
93
94def build_func_body_pattern() -> Dict[str, Any]:
95 """Build a begin/end pattern for function bodies."""
96 return {
97 'begin': r'\{',
98 'beginCaptures': {
99 '0': {'name': 'punctuation.section.block.dfasm'}
100 },
101 'end': r'\}',
102 'endCaptures': {
103 '0': {'name': 'punctuation.section.block.dfasm'}
104 },
105 'name': 'meta.function.body.dfasm',
106 'patterns': [
107 {'include': '$self'}
108 ]
109 }
110
111
112def build_qualified_ref_pattern(sigil: str, ref_type: str) -> Tuple[str, Dict[str, Any]]:
113 """
114 Build a single regex pattern with captures for qualified refs.
115
116 Patterns:
117 - @ for nodes: @name|placement:port
118 - & for labels: &name|placement:port
119 - $ for functions: $name|placement:port
120
121 Returns: (pattern, captures_dict)
122 """
123 identifier = r'[a-zA-Z_][a-zA-Z0-9_]*'
124 hex_lit = r'0x[0-9a-fA-F]+'
125 dec_lit = r'[0-9]+'
126
127 # Pattern captures:
128 # 1: sigil
129 # 2: name
130 # 3: placement separator (|) [optional]
131 # 4: placement name [optional]
132 # 5: port separator (:) [optional]
133 # 6: port (identifier or number) [optional]
134
135 pattern = (
136 f"({re.escape(sigil)})({identifier})"
137 f"(?:(\\|)({identifier}))?"
138 f"(?:(:)({identifier}|{hex_lit}|{dec_lit}))?"
139 )
140
141 scope_prefix = {
142 '@': 'node',
143 '&': 'label',
144 '$': 'function'
145 }
146
147 scope_type = scope_prefix.get(sigil, 'ref')
148
149 name_scope = {
150 '@': 'entity.name.tag.dfasm',
151 '&': 'entity.name.label.dfasm',
152 '$': 'entity.name.function.dfasm',
153 }[sigil]
154
155 captures = {
156 '1': {'name': degrade_scope(f'punctuation.definition.reference.{scope_type}.dfasm')},
157 '2': {'name': degrade_scope(name_scope)},
158 '3': {'name': degrade_scope('punctuation.separator.placement.dfasm')},
159 '4': {'name': degrade_scope('entity.other.attribute-name.placement.dfasm')},
160 '5': {'name': degrade_scope('punctuation.separator.port.dfasm')},
161 '6': {'name': degrade_scope('constant.numeric.dfasm')},
162 }
163
164 return pattern, captures
165
166
167def build_textmate_patterns(contexts: Dict[str, List[Dict]], variables: Dict[str, str]) -> Tuple[List[Dict], Dict[str, Dict]]:
168 """
169 Convert sublime-syntax contexts to TextMate patterns and repository.
170
171 Returns: (top_level_patterns, repository)
172 """
173 patterns = []
174 repository = {}
175
176 # Helper to inline variables in regex patterns
177 def process_pattern(pattern_dict: Dict) -> Optional[Dict]:
178 """Process a single pattern, inlining variables and handling special cases."""
179 result = {}
180
181 # Skip empty-match patterns (from pop: true) — MINOR FIX 1
182 if pattern_dict.get('match') == '':
183 return None
184
185 # CRITICAL FIX 2: Handle context-based patterns by converting to begin/end
186 match_val = pattern_dict.get('match', '')
187 push_stack = pattern_dict.get('push')
188
189 # String patterns with push context
190 if match_val == 'r"' and push_stack == 'raw_string':
191 return build_string_pattern('"', 'r', escaped=False)
192 elif match_val == 'b"' and push_stack == 'byte_string':
193 return build_string_pattern('"', 'b', escaped=True)
194 elif match_val == '"' and push_stack == 'string':
195 return build_string_pattern('"', '', escaped=True)
196 elif match_val == "'" and push_stack == 'char_literal':
197 return build_string_pattern("'", '', escaped=True)
198 elif match_val == '\\{' and push_stack == 'func_body':
199 return build_func_body_pattern()
200
201 # Process main pattern fields
202 for key, value in pattern_dict.items():
203 if key == 'match':
204 result[key] = inline_variables(value, variables)
205 elif key == 'scope':
206 remapped = degrade_scope(value)
207 if remapped:
208 result['name'] = remapped
209 elif key == 'captures':
210 processed_captures = {}
211 for cap_idx, cap_value in value.items():
212 if isinstance(cap_value, dict):
213 scope_name = degrade_scope(cap_value.get('name', ''))
214 else:
215 scope_name = degrade_scope(cap_value)
216 if scope_name:
217 processed_captures[cap_idx] = {'name': scope_name}
218
219 if processed_captures:
220 result['captures'] = processed_captures
221 elif key == 'meta_scope':
222 result['contentName'] = degrade_scope(value)
223 elif key == 'include':
224 # IMPORTANT FIX 3: Use $self instead of #main for top-level inclusion
225 include_val = value.replace('main', '$self')
226 result[key] = include_val
227 elif key in ('push', 'set', 'pop'):
228 # Don't include context stack directives in TextMate
229 pass
230 else:
231 result[key] = value
232
233 return result if result else None
234
235 # Process main context
236 if 'main' in contexts:
237 for item in contexts['main']:
238 if isinstance(item, dict):
239 processed = process_pattern(item)
240 if processed:
241 patterns.append(processed)
242
243 # Build repository from specific non-main contexts that need to be referenced
244 # IMPORTANT FIX 1: Only include contexts that are actually referenced or needed
245 referenced_contexts = set()
246
247 # Scan main patterns for includes
248 for pattern in patterns:
249 if 'include' in pattern:
250 include_ref = pattern['include']
251 if include_ref.startswith('#'):
252 referenced_contexts.add(include_ref[1:])
253
254 # Process contexts selectively
255 for ctx_name, ctx_items in contexts.items():
256 if ctx_name == 'main':
257 continue
258
259 # Only include if referenced from main patterns
260 if ctx_name not in referenced_contexts:
261 continue
262
263 repo_patterns = []
264 for item in ctx_items:
265 if isinstance(item, dict):
266 processed = process_pattern(item)
267 if processed:
268 repo_patterns.append(processed)
269
270 if repo_patterns:
271 repository[ctx_name] = {'patterns': repo_patterns}
272
273 return patterns, repository
274
275
276SCOPE_REMAP = {
277 # Port numbers degrade to generic numeric (TextMate has no context stacks)
278 'constant.numeric.port.dfasm': 'constant.numeric.dfasm',
279 # Opcodes → support.function for cyan colour in most themes (distinct from keyword)
280 'keyword.other.opcode.dfasm': 'support.function.opcode.dfasm',
281 # Pragma → variable scope for distinct colour from both keywords and opcodes
282 'keyword.control.pragma.dfasm': 'variable.language.pragma.dfasm',
283 # Labels → entity.name.function for strong blue/green colour
284 'entity.name.label.dfasm': 'entity.name.function.label.dfasm',
285 # Nodes and functions → entity.name.type for visible colour (cyan/green in most themes)
286 'entity.name.tag.dfasm': 'entity.name.type.node.dfasm',
287 'entity.name.function.dfasm': 'entity.name.type.function.dfasm',
288 # Named params → entity.other.attribute-name for distinct attribute colour
289 'variable.parameter.dfasm': 'entity.other.attribute-name.dfasm',
290 # Assignment operator → no special colour (default punctuation)
291 'keyword.operator.assignment.dfasm': '',
292}
293
294
295def degrade_scope(scope: str) -> str:
296 """Remap scopes for better colour differentiation in TextMate themes."""
297 return SCOPE_REMAP.get(scope, scope)
298
299
300def build_textmate_grammar(sublime_syntax: Dict[str, Any]) -> Dict[str, Any]:
301 """
302 Convert sublime-syntax to TextMate grammar structure.
303 """
304 # Extract metadata
305 name = sublime_syntax.get('name', 'dfasm')
306 file_extensions = sublime_syntax.get('file_extensions', [])
307 scope = sublime_syntax.get('scope', 'source.dfasm')
308
309 variables = sublime_syntax.get('variables', {})
310 contexts = sublime_syntax.get('contexts', {})
311
312 # Build patterns and repository from contexts
313 top_patterns, repository = build_textmate_patterns(contexts, variables)
314
315 # Build qualified reference patterns
316 # CRITICAL FIX 3: Remove bare sigil patterns first (positions 5-7)
317 # Filter out bare sigil patterns from top_patterns
318 filtered_patterns = []
319 for pattern in top_patterns:
320 # Skip bare sigil patterns
321 if pattern.get('match') in ('@', '&', r'\$'):
322 continue
323 filtered_patterns.append(pattern)
324
325 # MINOR FIX 2: Build qualified ref patterns efficiently (single call per sigil)
326 qualified_ref_patterns = []
327 for sigil, ref_type in [('@', 'node'), ('&', 'label'), ('$', 'function')]:
328 pattern_str, captures_dict = build_qualified_ref_pattern(sigil, ref_type)
329 qualified_ref_patterns.append({
330 'match': pattern_str,
331 'captures': captures_dict
332 })
333
334 # TextMate grammar structure
335 grammar = {
336 'scopeName': scope,
337 'name': name,
338 'fileTypes': file_extensions,
339 'patterns': qualified_ref_patterns + filtered_patterns,
340 'repository': repository
341 }
342
343 return grammar
344
345
346def main():
347 """Main entry point."""
348 # Parse arguments
349 input_file = sys.argv[1] if len(sys.argv) > 1 else 'editor/sublime/dfasm.sublime-syntax'
350 output_file = sys.argv[2] if len(sys.argv) > 2 else 'editor/textmate/dfasm.tmLanguage.json'
351
352 # Make paths absolute if not already
353 input_path = Path(input_file).resolve()
354 output_path = Path(output_file).resolve()
355
356 # Load sublime-syntax
357 print(f"Loading {input_path}...")
358 sublime_syntax = load_sublime_syntax(str(input_path))
359
360 # Convert to TextMate
361 print("Converting to TextMate format...")
362 grammar = build_textmate_grammar(sublime_syntax)
363
364 # Ensure output directory exists
365 output_path.parent.mkdir(parents=True, exist_ok=True)
366
367 # Write TextMate grammar
368 print(f"Writing {output_path}...")
369 with open(output_path, 'w') as f:
370 json.dump(grammar, f, indent=2)
371
372 print(f"✓ Conversion complete: {output_path}")
373
374
375if __name__ == '__main__':
376 main()