OR-1 dataflow CPU sketch
at main 540 lines 20 kB view raw
1"""Code generation for OR1 assembly. 2 3Converts fully allocated IRGraphs to emulator-ready configuration objects and 4token streams. Two output modes: 51. Direct mode: Produces PEConfig/SMConfig lists + seed tokens (for direct setup) 62. Token stream mode: Produces bootstrap sequence (SM init → IRAM writes → ALLOC → frame slot writes → seeds) 7 8Reference: Phase 6 design doc, Tasks 1-3. 9""" 10 11from dataclasses import dataclass 12from collections import defaultdict 13 14from asm.ir import ( 15 IRGraph, IRNode, IREdge, ResolvedDest, collect_all_nodes_and_edges, collect_all_data_defs, 16 DEFAULT_IRAM_CAPACITY, DEFAULT_FRAME_COUNT 17) 18from asm.opcodes import is_dyadic 19from cm_inst import ( 20 Instruction, OutputStyle, TokenKind, FrameOp, FrameDest, 21 MemOp, Port, RoutingOp 22) 23from emu.types import PEConfig, SMConfig 24from tokens import ( 25 DyadToken, MonadToken, SMToken, Token, 26 PELocalWriteToken, FrameControlToken 27) 28from encoding import pack_instruction, pack_flit1, pack_token 29from sm_mod import Presence 30 31 32@dataclass(frozen=True) 33class AssemblyResult: 34 """Result of code generation in direct mode. 35 36 Attributes: 37 pe_configs: List of PEConfig objects, one per PE 38 sm_configs: List of SMConfig objects, one per SM with data_defs 39 seed_tokens: List of CMTokens (DyadToken/MonadToken) for seed injections 40 setup_tokens: List of tokens for frame setup sequence (SM init → IRAM writes → ALLOC → frame slots) 41 """ 42 pe_configs: list[PEConfig] 43 sm_configs: list[SMConfig] 44 seed_tokens: list[Token] 45 setup_tokens: list[Token] 46 47 48 49 50def _build_iram_for_pe( 51 nodes_on_pe: list[IRNode], 52 all_nodes: dict[str, IRNode], 53 all_edges: list[IREdge], 54) -> dict[int, Instruction]: 55 """Build IRAM entries as Instruction objects for a single PE. 56 57 Args: 58 nodes_on_pe: List of IRNodes on this PE 59 all_nodes: All nodes in graph (for lookups) 60 all_edges: All edges in graph (unused in frame model) 61 62 Returns: 63 Dict mapping IRAM offset to Instruction object 64 """ 65 iram: dict[int, Instruction] = {} 66 67 for node in nodes_on_pe: 68 # Skip seed nodes and unallocated nodes 69 if node.seed or node.iram_offset is None: 70 continue 71 72 # Skip nodes without mode (output style) allocation 73 if node.mode is None: 74 continue 75 76 output_style, has_const, dest_count = node.mode 77 78 # Build Instruction object 79 inst = Instruction( 80 opcode=node.opcode, 81 output=output_style, 82 has_const=has_const, 83 dest_count=dest_count, 84 wide=node.wide, 85 fref=node.fref or 0, 86 ) 87 88 iram[node.iram_offset] = inst 89 90 return iram 91 92 93def _find_const_for_slot( 94 act_nodes: list[IRNode], 95 slot_idx: int, 96 frame_layout, 97) -> int | None: 98 """Find the constant value for a frame slot, if any. 99 100 Args: 101 act_nodes: List of nodes in this activation 102 slot_idx: Frame slot index 103 frame_layout: FrameLayout from the activation 104 105 Returns: 106 Constant value (0-65535) or None if slot is not a constant slot 107 """ 108 # Scan nodes to find one that stores a constant in this slot 109 # The frame layout's const_slots tell us which slots are for constants. 110 # We map from slot_idx to the node's fref to find the source node. 111 for node in act_nodes: 112 if node.fref is None or node.const is None or not isinstance(node.const, int): 113 continue 114 if not node.mode: 115 continue 116 117 output_style, has_const, dest_count = node.mode 118 119 # If this node has a constant (has_const=True), it occupies fref slot 120 if has_const and node.fref == slot_idx: 121 return node.const & 0xFFFF 122 123 return None 124 125 126def _find_dest_for_slot( 127 act_nodes: list[IRNode], 128 slot_idx: int, 129 frame_layout, 130 all_nodes: dict[str, IRNode], 131 all_edges: list[IREdge], 132) -> FrameDest | None: 133 """Find the destination routing for a frame slot. 134 135 Args: 136 act_nodes: List of nodes in this activation 137 slot_idx: Frame slot index 138 frame_layout: FrameLayout from the activation 139 all_nodes: All nodes in graph (for lookups) 140 all_edges: All edges in graph (unused) 141 142 Returns: 143 FrameDest if this slot is a destination slot, None otherwise 144 """ 145 # For each node in this activation, destinations are allocated after the constant slot(s). 146 # The frame layout's dest_slots tell us which slots are for destinations. 147 # We map from slot_idx to find which node's destination it represents. 148 149 for node in act_nodes: 150 if node.fref is None or node.mode is None: 151 continue 152 153 output_style, has_const, dest_count = node.mode 154 155 # Compute where this node's destination slots start 156 const_slots_count = int(has_const) # 0 or 1 157 dest_start = node.fref + const_slots_count 158 159 # Check if slot_idx falls within this node's destination slots 160 if dest_start <= slot_idx < dest_start + dest_count: 161 # Determine which destination (left or right, 0-indexed) 162 dest_idx = slot_idx - dest_start 163 164 # Get the destination from the node 165 if dest_idx == 0 and node.dest_l is not None: 166 if isinstance(node.dest_l, ResolvedDest) and node.dest_l.frame_dest is not None: 167 return node.dest_l.frame_dest 168 elif dest_idx == 1 and node.dest_r is not None: 169 if isinstance(node.dest_r, ResolvedDest) and node.dest_r.frame_dest is not None: 170 return node.dest_r.frame_dest 171 172 return None 173 174 175def _generate_setup_tokens( 176 pe_configs: list[PEConfig], 177 sm_configs: list[SMConfig], 178 nodes_by_pe: dict[int, list[IRNode]], 179 all_nodes: dict[str, IRNode], 180 all_edges: list[IREdge], 181 data_defs: list, 182) -> list[Token]: 183 """Generate the ordered setup token sequence for frame-based bootstrap. 184 185 Order: SM init → IRAM writes → ALLOC → frame slot writes → (seed tokens added separately) 186 187 Args: 188 pe_configs: List of PEConfig objects (with populated iram) 189 sm_configs: List of SMConfig objects 190 nodes_by_pe: Dict mapping PE ID to list of IRNodes 191 all_nodes: All nodes in graph 192 all_edges: All edges in graph 193 data_defs: List of IRDataDef objects 194 195 Returns: 196 List of setup tokens in bootstrap order 197 """ 198 tokens: list[Token] = [] 199 200 # 1. SM init (WRITE ops to populate I-structure cells) 201 for data_def in sorted(data_defs, key=lambda d: (d.sm_id or 0, d.cell_addr or 0)): 202 if data_def.sm_id is not None and data_def.cell_addr is not None: 203 tokens.append(SMToken( 204 target=data_def.sm_id, 205 addr=data_def.cell_addr, 206 op=MemOp.WRITE, 207 flags=None, 208 data=data_def.value, 209 ret=None, 210 )) 211 212 # 2. IRAM writes via PELocalWriteToken(region=0) 213 for pe_cfg in pe_configs: 214 for offset in sorted(pe_cfg.iram.keys()): 215 inst = pe_cfg.iram[offset] 216 tokens.append(PELocalWriteToken( 217 target=pe_cfg.pe_id, 218 act_id=0, # IRAM writes are activation-independent 219 region=0, 220 slot=offset, 221 data=pack_instruction(inst), 222 is_dest=False, 223 )) 224 225 # 3. ALLOC — one per activation per PE 226 for pe_id in sorted(nodes_by_pe.keys()): 227 nodes = nodes_by_pe[pe_id] 228 # Collect unique act_ids on this PE (excluding seed nodes) 229 act_ids = sorted({n.act_id for n in nodes if n.act_id is not None and not n.seed}) 230 for act_id in act_ids: 231 tokens.append(FrameControlToken( 232 target=pe_id, 233 act_id=act_id, 234 op=FrameOp.ALLOC, 235 payload=0, 236 )) 237 238 # 4. Frame slot writes via PELocalWriteToken(region=1) 239 for pe_id in sorted(nodes_by_pe.keys()): 240 nodes = nodes_by_pe[pe_id] 241 # Collect unique act_ids on this PE (excluding seed nodes) 242 act_ids = sorted({n.act_id for n in nodes if n.act_id is not None and not n.seed}) 243 for act_id in act_ids: 244 act_nodes = [n for n in nodes if n.act_id == act_id and not n.seed] 245 if not act_nodes: 246 continue 247 # Get frame layout from first node (canonical per activation) 248 layout = act_nodes[0].frame_layout 249 if layout is None: 250 continue 251 252 # Write const and dest values per node using fref 253 # (slot_map regions are approximate; node fref is authoritative) 254 for node in act_nodes: 255 if node.fref is None or node.mode is None: 256 continue 257 _, has_const, dest_count = node.mode 258 slot = node.fref 259 260 if has_const and isinstance(node.const, int): 261 tokens.append(PELocalWriteToken( 262 target=pe_id, 263 act_id=act_id, 264 region=1, 265 slot=slot, 266 data=node.const & 0xFFFF, 267 is_dest=False, 268 )) 269 slot += 1 270 271 # Destinations from resolved dest_l/dest_r 272 dests = [] 273 if node.dest_l and hasattr(node.dest_l, 'frame_dest') and node.dest_l.frame_dest: 274 dests.append(node.dest_l.frame_dest) 275 if node.dest_r and hasattr(node.dest_r, 'frame_dest') and node.dest_r.frame_dest: 276 dests.append(node.dest_r.frame_dest) 277 for i, fd in enumerate(dests[:dest_count]): 278 tokens.append(PELocalWriteToken( 279 target=pe_id, 280 act_id=act_id, 281 region=1, 282 slot=slot + i, 283 data=pack_flit1(fd), 284 is_dest=True, 285 )) 286 287 return tokens 288 289 290def _compute_route_restrictions( 291 nodes_by_pe: dict[int, list[IRNode]], 292 all_edges: list[IREdge], 293 all_nodes: dict[str, IRNode], 294 pe_id: int, 295) -> tuple[set[int], set[int]]: 296 """Compute allowed PE and SM routes for a given PE. 297 298 Analyzes all edges involving nodes on this PE to determine which other 299 PEs and SMs it can route to. Includes self-routes. 300 301 Args: 302 nodes_by_pe: Dict mapping PE ID to list of nodes on that PE 303 all_edges: List of all edges in graph 304 all_nodes: Dict of all nodes 305 pe_id: The PE we're computing routes for 306 307 Returns: 308 Tuple of (allowed_pe_routes set, allowed_sm_routes set) 309 """ 310 nodes_on_pe_set = {node.name for node in nodes_by_pe.get(pe_id, [])} 311 312 pe_routes = {pe_id} # Always include self-route 313 sm_routes = set() 314 315 # Scan all edges for those sourced from this PE 316 for edge in all_edges: 317 if edge.source in nodes_on_pe_set: 318 # This edge originates from our PE 319 dest_node = all_nodes.get(edge.dest) 320 if dest_node is not None: 321 if dest_node.pe is not None: 322 pe_routes.add(dest_node.pe) 323 324 # Scan all nodes on this PE for SM instructions 325 for node in nodes_by_pe.get(pe_id, []): 326 if isinstance(node.opcode, MemOp) and node.sm_id is not None: 327 sm_routes.add(node.sm_id) 328 329 return pe_routes, sm_routes 330 331 332def generate_direct(graph: IRGraph) -> AssemblyResult: 333 """Generate PEConfig, SMConfig, seed tokens, and setup tokens from an allocated IRGraph. 334 335 Args: 336 graph: A fully allocated IRGraph (after allocate pass) 337 338 Returns: 339 AssemblyResult with pe_configs, sm_configs, seed_tokens, and setup_tokens 340 """ 341 all_nodes, all_edges = collect_all_nodes_and_edges(graph) 342 all_data_defs = collect_all_data_defs(graph) 343 344 # Group nodes by PE 345 nodes_by_pe: dict[int, list[IRNode]] = defaultdict(list) 346 for node in all_nodes.values(): 347 if node.pe is not None: 348 nodes_by_pe[node.pe].append(node) 349 350 # Build PEConfigs 351 pe_configs = [] 352 for pe_id in sorted(nodes_by_pe.keys()): 353 nodes_on_pe = nodes_by_pe[pe_id] 354 355 # Build IRAM for this PE (Task 1) 356 iram = _build_iram_for_pe(nodes_on_pe, all_nodes, all_edges) 357 358 # Compute route restrictions 359 allowed_pe_routes, allowed_sm_routes = _compute_route_restrictions( 360 nodes_by_pe, all_edges, all_nodes, pe_id 361 ) 362 363 # Compute frame configuration from system and node layout 364 frame_count = graph.system.frame_count if graph.system else DEFAULT_FRAME_COUNT 365 frame_slots = graph.system.frame_slots if graph.system and hasattr(graph.system, 'frame_slots') else 64 366 matchable_offsets = graph.system.matchable_offsets if graph.system and hasattr(graph.system, 'matchable_offsets') else 8 367 368 # Build initial_frames and initial_tag_store from node data 369 # Map each activation on this PE to its frame ID and initial slot values 370 initial_frames = {} 371 initial_tag_store = {} 372 373 act_ids = sorted({n.act_id for n in nodes_on_pe if n.act_id is not None and not n.seed}) 374 for frame_id, act_id in enumerate(act_ids): 375 act_nodes = [n for n in nodes_on_pe if n.act_id == act_id and not n.seed] 376 if not act_nodes: 377 continue 378 379 # Get frame layout from first node 380 layout = act_nodes[0].frame_layout 381 if layout is None: 382 initial_frames[frame_id] = {} 383 initial_tag_store[act_id] = (frame_id, 0) 384 continue 385 386 # Build frame slot values for this activation as a sparse dict. 387 # Walk nodes directly using their fref to place const and dest values, 388 # since slot_map regions are approximate (interleaved per-node layout). 389 frame_slots_dict: dict[int, int] = {} 390 391 for node in act_nodes: 392 if node.fref is None or node.mode is None or node.seed: 393 continue 394 _, has_const, dest_count = node.mode 395 slot = node.fref 396 397 # Constant at fref position 398 if has_const and isinstance(node.const, int): 399 frame_slots_dict[slot] = node.const & 0xFFFF 400 slot += 1 401 402 # Destinations follow const (or start at fref if no const) 403 dests = [] 404 if node.dest_l and hasattr(node.dest_l, 'frame_dest') and node.dest_l.frame_dest: 405 dests.append(node.dest_l.frame_dest) 406 if node.dest_r and hasattr(node.dest_r, 'frame_dest') and node.dest_r.frame_dest: 407 dests.append(node.dest_r.frame_dest) 408 for i, fd in enumerate(dests[:dest_count]): 409 frame_slots_dict[slot + i] = pack_flit1(fd) 410 411 initial_frames[frame_id] = frame_slots_dict 412 initial_tag_store[act_id] = (frame_id, 0) 413 414 # Create PEConfig 415 config = PEConfig( 416 pe_id=pe_id, 417 iram=iram, 418 frame_count=frame_count, 419 frame_slots=frame_slots, 420 matchable_offsets=matchable_offsets, 421 initial_frames=initial_frames if initial_frames else None, 422 initial_tag_store=initial_tag_store if initial_tag_store else None, 423 allowed_pe_routes=allowed_pe_routes, 424 allowed_sm_routes=allowed_sm_routes, 425 ) 426 pe_configs.append(config) 427 428 # Build SMConfigs from data_defs 429 sm_configs_by_id: dict[int, dict[int, tuple[Presence, int]]] = defaultdict(dict) 430 for data_def in all_data_defs: 431 if data_def.sm_id is not None and data_def.cell_addr is not None: 432 sm_configs_by_id[data_def.sm_id][data_def.cell_addr] = ( 433 Presence.FULL, data_def.value 434 ) 435 436 sm_count = max(1, graph.system.sm_count if graph.system else 1) 437 for sm_id in range(sm_count): 438 if sm_id not in sm_configs_by_id: 439 sm_configs_by_id[sm_id] = {} 440 441 sm_configs = [] 442 for sm_id in sorted(sm_configs_by_id.keys()): 443 initial_cells = sm_configs_by_id[sm_id] 444 config = SMConfig( 445 sm_id=sm_id, 446 initial_cells=initial_cells if initial_cells else None, 447 ) 448 sm_configs.append(config) 449 450 # Generate setup tokens (Task 2) 451 setup_tokens = _generate_setup_tokens( 452 pe_configs, 453 sm_configs, 454 nodes_by_pe, 455 all_nodes, 456 all_edges, 457 all_data_defs, 458 ) 459 460 # Detect seed tokens (Task 3) 461 seed_tokens = [] 462 463 # Build edge indices 464 edges_by_dest = defaultdict(list) 465 edges_by_source = defaultdict(list) 466 for edge in all_edges: 467 edges_by_dest[edge.dest].append(edge) 468 edges_by_source[edge.source].append(edge) 469 470 for node in all_nodes.values(): 471 if node.seed: 472 # Seed node: generate token(s) targeted at destination(s) 473 out_edges = edges_by_source.get(node.name, []) 474 for edge in out_edges: 475 dest_node = all_nodes.get(edge.dest) 476 if dest_node is None or dest_node.pe is None: 477 continue 478 dest_is_dyadic = is_dyadic(dest_node.opcode, dest_node.const) 479 if dest_is_dyadic: 480 # Task 3: Use act_id, not ctx; no gen field 481 token = DyadToken( 482 target=dest_node.pe, 483 offset=dest_node.iram_offset if dest_node.iram_offset is not None else 0, 484 act_id=dest_node.act_id if dest_node.act_id is not None else 0, 485 data=node.const if node.const is not None else 0, 486 port=edge.port, 487 ) 488 else: 489 # Task 3: Use act_id, not ctx 490 token = MonadToken( 491 target=dest_node.pe, 492 offset=dest_node.iram_offset if dest_node.iram_offset is not None else 0, 493 act_id=dest_node.act_id if dest_node.act_id is not None else 0, 494 data=node.const if node.const is not None else 0, 495 inline=False, 496 ) 497 seed_tokens.append(token) 498 elif node.opcode == RoutingOp.CONST: 499 # Triggerable constant: CONST node in IRAM with no incoming edges 500 if node.name not in edges_by_dest: 501 token = MonadToken( 502 target=node.pe if node.pe is not None else 0, 503 offset=node.iram_offset if node.iram_offset is not None else 0, 504 act_id=node.act_id if node.act_id is not None else 0, 505 data=node.const if node.const is not None else 0, 506 inline=False, 507 ) 508 seed_tokens.append(token) 509 510 return AssemblyResult( 511 pe_configs=pe_configs, 512 sm_configs=sm_configs, 513 seed_tokens=seed_tokens, 514 setup_tokens=setup_tokens, 515 ) 516 517 518def generate_tokens(graph: IRGraph) -> list[Token]: 519 """Generate bootstrap token sequence from an allocated IRGraph. 520 521 Produces tokens in order: SM init → IRAM writes → ALLOC → frame slot writes → seeds 522 523 Args: 524 graph: A fully allocated IRGraph (after allocate pass) 525 526 Returns: 527 List of tokens in bootstrap order 528 """ 529 # Use direct mode to get configs, setup tokens, and seeds 530 result = generate_direct(graph) 531 532 tokens = [] 533 534 # 1. Setup tokens (SM init → IRAM writes → ALLOC → frame slot writes) 535 tokens.extend(result.setup_tokens) 536 537 # 2. Seed tokens (Task 3) 538 tokens.extend(result.seed_tokens) 539 540 return tokens