OR-1 dataflow CPU sketch
1// Dataflow Graph Assembly — Lark EBNF Grammar v0.2
2// Parser: Earley (LALR blocked by macro_ref vs macro_call_stmt reduce/reduce conflict: both are #IDENT prefix)
3
4start: (_NL* statement)* _NL*
5
6?statement: func_def
7 | macro_def
8 | inst_def
9 | strong_edge
10 | weak_edge
11 | plain_edge
12 | data_def
13 | system_pragma
14 | location_dir
15 | macro_call_stmt
16 | call_stmt
17 | repetition_block
18
19// --- Function / subgraph definition ---
20// $name |> { body }
21func_def: func_ref FLOW_OUT "{" (_NL* statement)* _NL* "}"
22
23// --- Macro definition ---
24// #name [param, param, ...] |> { body }
25macro_def: "#" IDENT macro_params? FLOW_OUT "{" (_NL* statement)* _NL* "}"
26macro_params: macro_param ("," macro_param)*
27macro_param: VARIADIC IDENT -> variadic_param
28 | IDENT -> regular_param
29
30// Variadic marker for macro parameters
31VARIADIC: "*"
32
33// --- Repetition block in macro body ---
34// $( body ),* expands body once per variadic argument
35repetition_block: "$(" (_NL* statement)* _NL* ")," "*"
36
37// --- Instruction definition (named node) ---
38// &label <| opcode [inline_const] [, arg ...]
39// inline_const allows e.g. "&foo <| add 7" as shorthand for "&foo <| add, 7"
40inst_def: qualified_ref FLOW_IN opcode inline_const? ("," argument)*
41
42inline_const: DEC_LIT | HEX_LIT | param_ref
43
44// --- Strong inline edge (internal route, anonymous node) ---
45// opcode input [, input ...] |> output [, output ...]
46strong_edge: opcode argument ("," argument)* FLOW_OUT ref_list
47
48// --- Weak inline edge (token output, anonymous node) ---
49// output [, output ...] opcode <| input [, input ...]
50weak_edge: ref_list opcode FLOW_IN argument ("," argument)*
51
52// --- Plain edge (wiring between named nodes) ---
53// source |> dest [, dest ...]
54plain_edge: qualified_ref FLOW_OUT ref_list
55
56// --- Data / initialisation ---
57// ref = value | ref = #macro args
58data_def: qualified_ref "=" (macro_call | value_list)
59
60// --- Location directive (bare qualified ref with trailing colon) ---
61// Sets location context for subsequent definitions.
62location_dir: qualified_ref ":"
63
64// --- System pragma (hardware configuration) ---
65// @system pe=4, sm=1, iram=128, ctx=2
66system_pragma: "@system" system_param ("," system_param)*
67system_param: IDENT "=" (DEC_LIT | HEX_LIT)
68
69// === Shared productions ===
70
71ref_list: qualified_ref ("," qualified_ref)*
72
73// === References ===
74// Qualifier chain: max one placement (|ident) and one port (:spec).
75// @name — node reference
76// &name — local label reference
77// $name — function / subgraph reference
78// #name — macro reference
79// Chaining: @sum|pe0:L (placement + port)
80
81qualified_ref: (node_ref | label_ref | func_ref | macro_ref | scoped_ref | param_ref) placement? ctx_slot? port?
82
83node_ref: "@" IDENT
84label_ref: "&" IDENT
85func_ref: "$" IDENT
86macro_ref: "#" IDENT
87
88scoped_ref: (func_ref | macro_ref) "." (label_ref | node_ref)
89
90// ${name} — macro parameter reference (substituted during expansion)
91param_ref: PARAM_REF_START IDENT "}"
92PARAM_REF_START.3: "${"
93
94placement: "|" (IDENT | param_ref)
95ctx_slot: "[" (DEC_LIT | ctx_range | param_ref) "]"
96ctx_range: DEC_LIT ".." DEC_LIT
97port: ":" (PORT_SPEC | param_ref)
98
99PORT_SPEC: IDENT | HEX_LIT | DEC_LIT
100
101// === Arguments ===
102// An argument is a value, a qualified ref, or a named key=value pair.
103// Named args are syntactically valid on any instruction.
104// Semantic validation (which ops accept named args) is deferred to the assembler.
105
106?argument: named_arg | positional_arg
107named_arg: IDENT "=" positional_arg
108?positional_arg: value | qualified_ref | OPCODE | IDENT
109
110// === Values (literals) ===
111
112?value: HEX_LIT -> hex_literal
113 | DEC_LIT -> dec_literal
114 | CHAR_LIT -> char_literal
115 | STRING_LIT -> string_literal
116 | RAW_STRING_LIT -> raw_string_literal
117 | BYTE_STRING_LIT -> byte_string_literal
118
119value_list: value ("," value)*
120
121// === Macros ===
122// #name arg [arg ...] — expanded in a later pass, not during parsing.
123
124macro_call: "#" IDENT (value | qualified_ref)*
125
126// #name arg [, arg ...] [|> output, ...] — standalone macro invocation (as statement)
127macro_call_stmt: "#" IDENT (argument ("," argument)*)? (FLOW_OUT call_output_list)?
128
129// --- Function call ---
130// $func a=&x, b=&y |> @output [, name=@output2]
131call_stmt: func_ref argument ("," argument)* FLOW_OUT call_output_list
132
133call_output_list: call_output ("," call_output)*
134call_output: IDENT "=" qualified_ref -> named_output
135 | qualified_ref -> positional_output
136
137// === Opcodes ===
138// Exhaustive keyword terminal. Priority 2 ensures opcodes win over IDENT
139// at the lexer level. Semantic validation (monadic/dyadic arity, valid
140// argument combinations) is deferred to the assembler.
141
142opcode: OPCODE | param_ref
143
144OPCODE.2: "add" | "sub" | "inc" | "dec"
145 | "shl" | "shr" | "asr"
146 | "and" | "or" | "xor" | "not"
147 | "eq" | "lt" | "lte" | "gt" | "gte"
148 | "breq" | "brgt" | "brge" | "brof" | "brty"
149 | "sweq" | "swgt" | "swge" | "swof" | "swty"
150 | "gate" | "sel" | "merge"
151 | "pass" | "const" | "free_frame" | "extract_tag" | "alloc_remote"
152 | "read" | "write" | "clear" | "exec" | "alloc" | "free" | "rd_inc" | "rd_dec" | "cmp_sw"
153 | "ior" | "iow" | "iorw"
154 | "load_inst" | "route_set"
155
156// === Flow operators ===
157// Priority 3 to win over any partial match of | or < or >
158
159FLOW_IN.3: "<|"
160FLOW_OUT.3: "|>"
161
162// === Terminals ===
163
164HEX_LIT: /0x[0-9a-fA-F]+/
165DEC_LIT: /[0-9]+/
166
167// Character literals: single char or escape sequence.
168// Supported escapes: \n \t \r \0 \\ \' \xNN
169CHAR_LIT: /'([^'\\]|\\[ntr0\\']|\\x[0-9a-fA-F]{2})'/
170
171// String literals — Rust-style semantics.
172// "..." regular string, escape sequences processed by assembler
173// r"..." raw string, no escape processing
174// b"..." byte string, semantic difference only (raw byte values)
175// Multi-line strings are permitted: /s flag makes . match \n.
176STRING_LIT: /\"([^\"\\]|\\.)*\"/s
177RAW_STRING_LIT: /r\"[^\"]*\"/s
178BYTE_STRING_LIT: /b\"([^\"\\]|\\.)*\"/s
179
180IDENT: /[a-zA-Z_][a-zA-Z0-9_]*/
181
182// === Whitespace & Comments ===
183// ; starts a comment to end of line (traditional asm behaviour).
184// Newlines are significant as statement separators.
185
186COMMENT: /;[^\n]*/
187_NL: (NEWLINE | COMMENT) (NEWLINE | COMMENT)*
188
189%import common.NEWLINE
190%import common.WS_INLINE
191%ignore WS_INLINE
192%ignore COMMENT