Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
1// Copyright (C) 2025 Thom Hayward.
2//
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the GNU General Public License as published by the Free Software
5// Foundation, version 3.
6//
7// This program is distributed in the hope that it will be useful, but WITHOUT
8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10// details.
11//
12// You should have received a copy of the GNU General Public License along with
13// this program. If not, see <https://www.gnu.org/licenses/>.
14//
15
16mod lexer;
17mod parse;
18
19use crate::reg::Register;
20use lexer::Token;
21use parse::{Instruction, Node, NodeType, PragmaType};
22use std::collections::HashMap;
23
24#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
25enum Section {
26 Text,
27 Data,
28}
29
30/// Assemble a Universal Machine program.
31///
32/// # Panics
33///
34/// Panics if `source` cannot be parsed.
35///
36#[must_use]
37#[allow(clippy::too_many_lines)]
38pub fn assemble<'s>(source: &'s str) -> Vec<u32> {
39 let parsed = parse::parse("", source).unwrap();
40
41 let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new();
42 let mut offsets: HashMap<Section, usize> = HashMap::new();
43 let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new();
44 for node in parsed.nodes() {
45 match node.entity {
46 NodeType::Pragma(_) => {
47 let loc = *offsets
48 .entry(Section::Data)
49 .and_modify(|loc| *loc += node.size())
50 .or_default();
51
52 sections
53 .entry(Section::Data)
54 .and_modify(|section| section.push(node))
55 .or_insert_with(|| vec![node]);
56
57 for label in &node.labels {
58 label_locations.insert(label, (Section::Data, loc));
59 }
60 }
61 NodeType::Instruction(_) => {
62 let loc = *offsets
63 .entry(Section::Text)
64 .and_modify(|loc| *loc += node.size())
65 .or_default();
66
67 sections
68 .entry(Section::Text)
69 .and_modify(|section| section.push(node))
70 .or_insert_with(|| vec![node]);
71
72 for label in &node.labels {
73 label_locations.insert(label, (Section::Text, loc));
74 }
75 }
76 NodeType::Comment(_) => {}
77 }
78 }
79
80 let text = sections.remove(&Section::Text).unwrap();
81 let data_offset = text.len();
82
83 let mut program = vec![];
84 for node in text {
85 let NodeType::Instruction(instruction) = &node.entity else {
86 panic!("invalid node in .text section");
87 };
88
89 let encoded = match *instruction {
90 Instruction::ConditionalMove {
91 destination,
92 source,
93 condition,
94 } => encode_standard(0x00, destination, source, condition),
95 Instruction::Load {
96 destination,
97 address,
98 } => {
99 let parse::Location { block, offset } = address;
100 encode_standard(0x01, destination, block, offset)
101 }
102 Instruction::Store { source, address } => {
103 let parse::Location { block, offset } = address;
104 encode_standard(0x02, block, offset, source)
105 }
106 Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b),
107 Instruction::AddAssign { destination, a } => {
108 encode_standard(0x03, destination, destination, a)
109 }
110 Instruction::AddSelf { destination } => {
111 encode_standard(0x03, destination, destination, destination)
112 }
113 Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b),
114 Instruction::MulAssign { destination, a } => {
115 encode_standard(0x04, destination, destination, a)
116 }
117 Instruction::MulSelf { destination } => {
118 encode_standard(0x04, destination, destination, destination)
119 }
120 Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b),
121 Instruction::DivAssign { destination, a } => {
122 encode_standard(0x05, destination, destination, a)
123 }
124 Instruction::DivSelf { destination } => {
125 encode_standard(0x05, destination, destination, destination)
126 }
127 Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b),
128 Instruction::NandAssign { destination, a } => {
129 encode_standard(0x06, destination, destination, a)
130 }
131 Instruction::NandSelf { destination } => {
132 encode_standard(0x06, destination, destination, destination)
133 }
134 Instruction::Halt => encode_standard(
135 0x07,
136 Register::default(),
137 Register::default(),
138 Register::default(),
139 ),
140 Instruction::Alloc {
141 destination,
142 length,
143 } => encode_standard(0x08, Register::default(), destination, length),
144 Instruction::Free { block } => {
145 encode_standard(0x09, Register::default(), Register::default(), block)
146 }
147 Instruction::Out { source } => {
148 encode_standard(0x0a, Register::default(), Register::default(), source)
149 }
150 Instruction::In { destination } => {
151 encode_standard(0x0b, Register::default(), Register::default(), destination)
152 }
153 Instruction::Jmp { location } => {
154 let parse::Location { block, offset } = location;
155 encode_standard(0x0c, Register::default(), block, offset)
156 }
157 Instruction::Address {
158 destination,
159 ref reference,
160 } => {
161 // lookup reference
162 let Some((section, offset)) = label_locations.get(reference.label) else {
163 panic!("failed to resolve {}", reference.label);
164 };
165
166 let value = match section {
167 Section::Text => *offset,
168 Section::Data => data_offset + *offset,
169 };
170
171 0xd000_0000
172 | destination.encode_a_ortho()
173 | encode_literal(u32::try_from(value).unwrap())
174 }
175 Instruction::LiteralMove {
176 destination,
177 literal,
178 } => 0xd000_0000 | destination.encode_a_ortho() | encode_literal(literal),
179 };
180
181 program.push(encoded);
182 }
183
184 if let Some(data) = sections.remove(&Section::Data) {
185 for node in data {
186 let NodeType::Pragma(pragma) = &node.entity else {
187 panic!("invalid node in .data section. {node:?}");
188 };
189
190 let encoded = match &pragma.payload {
191 PragmaType::WideString { value } => {
192 for &byte in value.as_bytes() {
193 program.push(u32::from(byte));
194 }
195 Some(0) // terminating byte.
196 }
197 PragmaType::U32 { value } => Some(*value),
198 };
199
200 if let Some(encoded) = encoded {
201 program.push(encoded);
202 }
203 }
204 }
205
206 program
207}
208
209fn encode_literal(value: u32) -> u32 {
210 const LITERAL_MAX: u32 = 0x1ff_ffff;
211 assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})");
212 value
213}
214
215const fn encode_standard(op: u32, a: Register, b: Register, c: Register) -> u32 {
216 (op << 28) | a.encode_a() | b.encode_b() | c.encode_c()
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use crate::ops::Operation;
223 use crate::reg::Register::*;
224
225 #[test]
226 fn wide_str() {
227 // Embed a wide string and get a reference to it.
228 let program = assemble(
229 r#"
230 adr r0, msg
231 msg: .wstr "Hello"
232 "#,
233 );
234
235 let ops = crate::ops::decode(&program);
236 assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 });
237
238 let mut platters = program.into_iter().skip(1);
239 assert_eq!(platters.next(), Some('H' as u32));
240 assert_eq!(platters.next(), Some('e' as u32));
241 assert_eq!(platters.next(), Some('l' as u32));
242 assert_eq!(platters.next(), Some('l' as u32));
243 assert_eq!(platters.next(), Some('o' as u32));
244 assert_eq!(platters.next(), Some(0));
245 assert_eq!(platters.next(), None);
246 }
247
248 #[test]
249 fn addresses() {
250 let program = assemble(
251 r#"
252 halt
253 start:
254 ldr r2, [r0, r1]
255 str r2, [r0, r1]
256 adr r3, start
257 halt
258 "#,
259 );
260
261 let mut ops = crate::ops::decode(&program).into_iter();
262
263 assert_eq!(ops.next(), Some(Operation::Halt));
264 assert_eq!(
265 ops.next(),
266 Some(Operation::ArrayIndex {
267 a: R2,
268 b: R0,
269 c: R1
270 })
271 );
272 assert_eq!(
273 ops.next(),
274 Some(Operation::ArrayAmendment {
275 a: R0,
276 b: R1,
277 c: R2
278 })
279 );
280 assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 }));
281 assert_eq!(ops.next(), Some(Operation::Halt));
282 assert_eq!(ops.next(), None);
283 }
284
285 #[test]
286 fn load_store() {
287 let state = crate::Um::new(assemble(
288 r#"
289 adr r1, loc
290 ldr r2, [r0, r1]
291 mov r3, 56
292 str r3, [r0, r1]
293 halt
294 loc:.u32 42
295 "#,
296 ))
297 .run();
298 assert_eq!(state.registers[R2], 42);
299 assert_eq!(state.memory[0][5], 56);
300 }
301
302 #[test]
303 fn addition() {
304 let state = crate::Um::new(assemble(
305 r#"
306 mov r0, 42
307 mov r1, 64
308 mov r2, 8192
309
310 add r3, r0, r1 ; r3 = r0 + r1 = 106
311 add r1, r2 ; r1 = r1 + r2 = 8256
312 add r0 ; r0 = r0 + r0 = 84
313
314 halt
315 "#,
316 ))
317 .run();
318
319 assert_eq!(state.registers[R0], 84);
320 assert_eq!(state.registers[R1], 8256);
321 assert_eq!(state.registers[R2], 8192);
322 assert_eq!(state.registers[R3], 106);
323 }
324
325 #[test]
326 fn alloc() {
327 let state = crate::Um::new(assemble(
328 r#"
329 ; Allocate 1000 bytes.
330 mov r0, 1000
331 alloc r1, r0
332 halt
333 "#,
334 ))
335 .run();
336 assert_eq!(state.registers[R0], 1000);
337 assert_ne!(state.registers[R1], 0);
338 assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000);
339 }
340
341 #[test]
342 fn free() {
343 let state = crate::Um::new(assemble(
344 r#"
345 ; Allocate 1000 bytes.
346 mov r0, 1000
347 alloc r1, r0
348 free r1
349 halt
350 "#,
351 ))
352 .run();
353 assert_eq!(state.registers[R0], 1000);
354 assert_ne!(state.registers[R1], 0);
355 assert_eq!(
356 state.memory[state.registers[R1] as usize].len(),
357 0,
358 "memory not free'd"
359 );
360 }
361}