Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
1// Copyright (C) 2025 Thom Hayward.
2//
3// This program is free software: you can redistribute it and/or modify it under
4// the terms of the GNU General Public License as published by the Free Software
5// Foundation, version 3.
6//
7// This program is distributed in the hope that it will be useful, but WITHOUT
8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
10// details.
11//
12// You should have received a copy of the GNU General Public License along with
13// this program. If not, see <https://www.gnu.org/licenses/>.
14//
15
16use std::io::Read;
17use std::io::Write;
18
19use smallvec::SmallVec;
20
21use crate::ops::Operation;
22use crate::reg::{Page, Register};
23
24pub const SMALLVEC_SIZE: usize = 24;
25
26/// Lossless conversion to `usize`.
27///
28/// This should only be implemented on types which can be losslessly
29/// cast to a `usize`.
30trait IntoIndex: Sized + Copy {
31 fn into_index(self) -> usize;
32}
33
34macro_rules! impl_into_index {
35 ($t:ty) => {
36 impl IntoIndex for $t {
37 fn into_index(self) -> usize {
38 self as usize
39 }
40 }
41 };
42}
43
44#[cfg(target_pointer_width = "16")]
45compile_error!("16 bit architectures are unsupported");
46
47// usize *may* be 16 bits, so only implement if it is 32 or 64 bits.
48#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))]
49impl_into_index!(u32);
50
51#[derive(Default)]
52pub struct Um<'a> {
53 pub program_counter: u32,
54 pub registers: Page,
55 /// Program memory, modelled as a `Vec` of `SmallVec`.
56 ///
57 /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory
58 /// indirection penalty for every memory access within that block.
59 pub memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>,
60 pub free_blocks: Vec<u32>,
61 /// Partially decoded operations cache.
62 pub ops: Vec<Operation>,
63 pub stdin: Option<&'a mut dyn Read>,
64 pub stdout: Option<&'a mut dyn Write>,
65}
66
67impl<'a> Um<'a> {
68 /// Initialise a Universal Machine with the specified program scroll.
69 #[must_use]
70 pub fn new(program: Vec<u32>) -> Self {
71 let ops = crate::ops::decode(&program);
72 Self {
73 memory: vec![program.into()],
74 ops,
75 ..Default::default()
76 }
77 }
78
79 /// Sets the output for the universal machine.
80 #[must_use]
81 pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self {
82 self.stdout.replace(stdout);
83 self
84 }
85
86 /// Sets the input for the universal machine.
87 #[must_use]
88 pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self {
89 self.stdin.replace(stdin);
90 self
91 }
92
93 /// Begin the spin-cycle of the Universal Machine.
94 ///
95 /// # Panics
96 ///
97 /// Panics if the machine encounters an illegal instruction.
98 ///
99 #[inline(never)]
100 #[allow(clippy::return_self_not_must_use, clippy::must_use_candidate)]
101 pub fn run(mut self) -> Self {
102 loop {
103 // println!(
104 // "{:?}, pc: {:08x}, r: {:08x?}",
105 // self.ops[self.program_counter as usize], self.program_counter, self.registers
106 // );
107 match self.ops[self.program_counter as usize] {
108 Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
109 Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
110 Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
111 Operation::Addition { a, b, c } => self.addition(a, b, c),
112 Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
113 Operation::Division { a, b, c } => self.division(a, b, c),
114 Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
115 Operation::Halt => break,
116 Operation::Allocation { b, c } => self.allocation(b, c),
117 Operation::Abandonment { c } => self.abandonment(c),
118 Operation::Output { c } => self.output(c),
119 Operation::Input { c } => self.input(c),
120 Operation::LoadProgram { b, c } => {
121 self.load_program(b, c);
122 continue;
123 }
124 Operation::Orthography { a, value } => self.orthography(a, value),
125 Operation::IllegalInstruction => self.illegal_instruction(),
126 }
127 self.program_counter += 1;
128 }
129
130 self
131 }
132
133 // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even
134 // though it has *no* interaction with the code path in Um::run().
135 //
136 // /// Steps one instruction.
137 // #[inline(never)]
138 // pub fn step(&mut self) -> bool {
139 // match self.ops[self.program_counter as usize] {
140 // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c),
141 // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c),
142 // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c),
143 // Operation::Addition { a, b, c } => self.addition(a, b, c),
144 // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c),
145 // Operation::Division { a, b, c } => self.division(a, b, c),
146 // Operation::NotAnd { a, b, c } => self.not_and(a, b, c),
147 // Operation::Halt => return false,
148 // Operation::Allocation { b, c } => self.allocation(b, c),
149 // Operation::Abandonment { c } => self.abandonment(c),
150 // Operation::Output { c } => self.output(c),
151 // Operation::Input { c } => self.input(c),
152 // Operation::LoadProgram { b, c } => {
153 // self.load_program(b, c);
154 // return true;
155 // }
156 // Operation::Orthography { a, value } => self.orthography(a, value),
157 // Operation::IllegalInstruction => self.illegal_instruction(),
158 // }
159 // self.program_counter += 1;
160 // true
161 // }
162
163 /// Loads the value from the specified register.
164 #[must_use]
165 pub fn load_register(&self, register: Register) -> u32 {
166 self.registers[register]
167 }
168
169 /// Saves a value to the specified register.
170 pub fn save_register(&mut self, register: Register, value: u32) {
171 self.registers[register] = value;
172 }
173
174 pub fn conditional_move(&mut self, a: Register, b: Register, c: Register) {
175 if self.load_register(c) != 0 {
176 self.save_register(a, self.load_register(b));
177 }
178 }
179
180 pub fn array_index(&mut self, a: Register, b: Register, c: Register) {
181 let block = self.load_register(b);
182 let offset = self.load_register(c);
183 self.save_register(a, self.load_memory(block, offset));
184 }
185
186 pub fn array_amendment(&mut self, a: Register, b: Register, c: Register) {
187 let block = self.load_register(a);
188 let offset = self.load_register(b);
189 let value = self.load_register(c);
190 self.store_memory(block, offset, value);
191 }
192
193 pub fn addition(&mut self, a: Register, b: Register, c: Register) {
194 self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c)));
195 }
196
197 pub fn multiplication(&mut self, a: Register, b: Register, c: Register) {
198 self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c)));
199 }
200
201 pub fn division(&mut self, a: Register, b: Register, c: Register) {
202 self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c)));
203 }
204
205 pub fn not_and(&mut self, a: Register, b: Register, c: Register) {
206 self.save_register(a, !(self.load_register(b) & self.load_register(c)));
207 }
208
209 pub fn allocation(&mut self, b: Register, c: Register) {
210 let length = self.load_register(c);
211 let index = self.allocate_memory(length);
212 self.save_register(b, index);
213 }
214
215 pub fn abandonment(&mut self, c: Register) {
216 let block = self.load_register(c);
217 self.free_memory(block);
218 }
219
220 /// Write the value in the specified register to stdout.
221 ///
222 /// # Panics
223 ///
224 /// Panics if writing to stdout fails.
225 ///
226 pub fn output(&mut self, c: Register) {
227 let value = self.load_register(c);
228 if let Some(stdout) = self.stdout.as_mut() {
229 let buffer = [(value & 0xff) as u8];
230 stdout.write_all(&buffer).unwrap();
231 }
232 }
233
234 /// Read a value from stdin into the specifed register.
235 //
236 // The `as` cast below benchmarks faster than using u32::from.
237 #[allow(clippy::cast_lossless)]
238 pub fn input(&mut self, c: Register) {
239 if let Some(stdin) = self.stdin.as_mut() {
240 let mut buffer = vec![0];
241 match stdin.read_exact(&mut buffer) {
242 Ok(()) => self.save_register(c, buffer[0] as u32),
243 Err(_) => self.save_register(c, u32::MAX),
244 }
245 } else {
246 self.save_register(c, u32::MAX);
247 }
248 }
249
250 pub fn load_program(&mut self, b: Register, c: Register) {
251 let block = self.load_register(b);
252
253 // Source array is always copied to array[0], but there
254 // is no point copying array[0] to array[0].
255 if block != 0 {
256 let duplicated = self.duplicate_memory(block);
257 let ops = crate::ops::decode(duplicated);
258 self.ops = ops;
259 }
260
261 self.program_counter = self.load_register(c);
262 }
263
264 pub fn orthography(&mut self, a: Register, value: u32) {
265 self.save_register(a, value);
266 }
267
268 /// Print the current instruction, program counter, and registers to stderr and quit.
269 ///
270 /// # Panics
271 ///
272 /// Panics when called. Every time.
273 ///
274 #[cold]
275 #[inline(never)]
276 pub fn illegal_instruction(&self) -> ! {
277 panic!(
278 "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}",
279 self.memory[0][self.program_counter.into_index()],
280 self.program_counter,
281 self.registers
282 )
283 }
284
285 /// Load a value from `offset` in the specified memory `block`.
286 ///
287 /// # Panics
288 ///
289 /// Panics if the `block` is not an allocated block, or if `offset` overflows the current
290 /// memory block.
291 ///
292 #[must_use]
293 pub fn load_memory(&self, block: u32, offset: u32) -> u32 {
294 let block = block.into_index();
295 let offset = offset.into_index();
296 assert!(block < self.memory.len() && offset < self.memory[block].len());
297 self.memory[block][offset]
298 }
299
300 /// Store a `value` at `offset` in the specified memory `block`.
301 ///
302 /// # Panics
303 ///
304 /// Panics if the `block` is not an allocated block, or if `offset` overflows the current
305 /// memory block.
306 ///
307 pub fn store_memory(&mut self, block: u32, offset: u32, value: u32) {
308 let block = block.into_index();
309 let offset = offset.into_index();
310 assert!(block < self.memory.len() && offset < self.memory[block].len());
311 self.memory[block][offset] = value;
312 }
313
314 /// Duplicates a block of memory.
315 ///
316 /// The block is copied to the first block of memory.
317 ///
318 /// # Panics
319 ///
320 /// Panics if the `block` is not an allocated block of memory.
321 ///
322 pub fn duplicate_memory(&mut self, block: u32) -> &[u32] {
323 let block = block.into_index();
324 assert!(block < self.memory.len());
325 self.memory[0] = self.memory[block].clone();
326 &self.memory[0]
327 }
328
329 /// Allocates a block of memory of the specified length.
330 #[allow(clippy::cast_possible_truncation)]
331 pub fn allocate_memory(&mut self, length: u32) -> u32 {
332 if let Some(index) = self.free_blocks.pop() {
333 self.memory[index.into_index()] = Self::new_block(length.into_index());
334 index
335 } else {
336 self.memory.push(Self::new_block(length.into_index()));
337
338 // The Universal Machine only deals with 32-bit values, so truncation here is
339 // impossible.
340 (self.memory.len() - 1) as u32
341 }
342 }
343
344 /// Free a block of memory.
345 ///
346 /// # Panics
347 ///
348 /// Panics if `block` is not an allocated block of memory.
349 ///
350 pub fn free_memory(&mut self, block: u32) {
351 assert!(block.into_index() < self.memory.len());
352 self.free_blocks.push(block);
353 self.memory[block.into_index()] = Self::new_block(0);
354 }
355
356 /// Create a new block of memory.
357 ///
358 /// The block is initialised with `len` zeroes.
359 ///
360 #[must_use]
361 pub fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> {
362 smallvec::smallvec![0; len]
363 }
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369
370 #[test]
371 #[should_panic]
372 fn empty_program() {
373 Um::new(vec![]).run();
374 }
375
376 #[test]
377 fn just_halt() {
378 Um::new(vec![0x70000000]).run();
379 }
380
381 #[test]
382 #[cfg(feature = "asm")]
383 fn hello_world() {
384 let program = crate::asm::assemble(include_str!("../files/hello-world.asm"));
385 let mut buffer = Vec::new();
386 Um::new(program).stdout(&mut buffer).run();
387 assert_eq!(&buffer, b"Hello, world!\n");
388 }
389
390 #[test]
391 #[cfg(feature = "asm")]
392 fn cat() {
393 let program = crate::asm::assemble(include_str!("../files/cat.asm"));
394 let input = include_bytes!("lib.rs");
395
396 let mut reader = std::io::Cursor::new(input);
397 let mut buffer = Vec::new();
398
399 Um::new(program)
400 .stdin(&mut reader)
401 .stdout(&mut buffer)
402 .run();
403
404 assert_eq!(&buffer, &input);
405 }
406}