Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
at main 406 lines 14 kB view raw
1// Copyright (C) 2025 Thom Hayward. 2// 3// This program is free software: you can redistribute it and/or modify it under 4// the terms of the GNU General Public License as published by the Free Software 5// Foundation, version 3. 6// 7// This program is distributed in the hope that it will be useful, but WITHOUT 8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10// details. 11// 12// You should have received a copy of the GNU General Public License along with 13// this program. If not, see <https://www.gnu.org/licenses/>. 14// 15 16use std::io::Read; 17use std::io::Write; 18 19use smallvec::SmallVec; 20 21use crate::ops::Operation; 22use crate::reg::{Page, Register}; 23 24pub const SMALLVEC_SIZE: usize = 24; 25 26/// Lossless conversion to `usize`. 27/// 28/// This should only be implemented on types which can be losslessly 29/// cast to a `usize`. 30trait IntoIndex: Sized + Copy { 31 fn into_index(self) -> usize; 32} 33 34macro_rules! impl_into_index { 35 ($t:ty) => { 36 impl IntoIndex for $t { 37 fn into_index(self) -> usize { 38 self as usize 39 } 40 } 41 }; 42} 43 44#[cfg(target_pointer_width = "16")] 45compile_error!("16 bit architectures are unsupported"); 46 47// usize *may* be 16 bits, so only implement if it is 32 or 64 bits. 48#[cfg(any(target_pointer_width = "64", target_pointer_width = "32"))] 49impl_into_index!(u32); 50 51#[derive(Default)] 52pub struct Um<'a> { 53 pub program_counter: u32, 54 pub registers: Page, 55 /// Program memory, modelled as a `Vec` of `SmallVec`. 56 /// 57 /// Memory allocations greater than `SMALLVEC_SIZE` will incur a memory 58 /// indirection penalty for every memory access within that block. 59 pub memory: Vec<SmallVec<[u32; SMALLVEC_SIZE]>>, 60 pub free_blocks: Vec<u32>, 61 /// Partially decoded operations cache. 62 pub ops: Vec<Operation>, 63 pub stdin: Option<&'a mut dyn Read>, 64 pub stdout: Option<&'a mut dyn Write>, 65} 66 67impl<'a> Um<'a> { 68 /// Initialise a Universal Machine with the specified program scroll. 69 #[must_use] 70 pub fn new(program: Vec<u32>) -> Self { 71 let ops = crate::ops::decode(&program); 72 Self { 73 memory: vec![program.into()], 74 ops, 75 ..Default::default() 76 } 77 } 78 79 /// Sets the output for the universal machine. 80 #[must_use] 81 pub fn stdout<T: Write>(mut self, stdout: &'a mut T) -> Self { 82 self.stdout.replace(stdout); 83 self 84 } 85 86 /// Sets the input for the universal machine. 87 #[must_use] 88 pub fn stdin<T: Read>(mut self, stdin: &'a mut T) -> Self { 89 self.stdin.replace(stdin); 90 self 91 } 92 93 /// Begin the spin-cycle of the Universal Machine. 94 /// 95 /// # Panics 96 /// 97 /// Panics if the machine encounters an illegal instruction. 98 /// 99 #[inline(never)] 100 #[allow(clippy::return_self_not_must_use, clippy::must_use_candidate)] 101 pub fn run(mut self) -> Self { 102 loop { 103 // println!( 104 // "{:?}, pc: {:08x}, r: {:08x?}", 105 // self.ops[self.program_counter as usize], self.program_counter, self.registers 106 // ); 107 match self.ops[self.program_counter as usize] { 108 Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 109 Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 110 Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 111 Operation::Addition { a, b, c } => self.addition(a, b, c), 112 Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 113 Operation::Division { a, b, c } => self.division(a, b, c), 114 Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 115 Operation::Halt => break, 116 Operation::Allocation { b, c } => self.allocation(b, c), 117 Operation::Abandonment { c } => self.abandonment(c), 118 Operation::Output { c } => self.output(c), 119 Operation::Input { c } => self.input(c), 120 Operation::LoadProgram { b, c } => { 121 self.load_program(b, c); 122 continue; 123 } 124 Operation::Orthography { a, value } => self.orthography(a, value), 125 Operation::IllegalInstruction => self.illegal_instruction(), 126 } 127 self.program_counter += 1; 128 } 129 130 self 131 } 132 133 // Un-commenting step() slows down the sandmark benchmark by ~3-5 seconds, even 134 // though it has *no* interaction with the code path in Um::run(). 135 // 136 // /// Steps one instruction. 137 // #[inline(never)] 138 // pub fn step(&mut self) -> bool { 139 // match self.ops[self.program_counter as usize] { 140 // Operation::ConditionalMove { a, b, c } => self.conditional_move(a, b, c), 141 // Operation::ArrayIndex { a, b, c } => self.array_index(a, b, c), 142 // Operation::ArrayAmendment { a, b, c } => self.array_amendment(a, b, c), 143 // Operation::Addition { a, b, c } => self.addition(a, b, c), 144 // Operation::Multiplication { a, b, c } => self.multiplication(a, b, c), 145 // Operation::Division { a, b, c } => self.division(a, b, c), 146 // Operation::NotAnd { a, b, c } => self.not_and(a, b, c), 147 // Operation::Halt => return false, 148 // Operation::Allocation { b, c } => self.allocation(b, c), 149 // Operation::Abandonment { c } => self.abandonment(c), 150 // Operation::Output { c } => self.output(c), 151 // Operation::Input { c } => self.input(c), 152 // Operation::LoadProgram { b, c } => { 153 // self.load_program(b, c); 154 // return true; 155 // } 156 // Operation::Orthography { a, value } => self.orthography(a, value), 157 // Operation::IllegalInstruction => self.illegal_instruction(), 158 // } 159 // self.program_counter += 1; 160 // true 161 // } 162 163 /// Loads the value from the specified register. 164 #[must_use] 165 pub fn load_register(&self, register: Register) -> u32 { 166 self.registers[register] 167 } 168 169 /// Saves a value to the specified register. 170 pub fn save_register(&mut self, register: Register, value: u32) { 171 self.registers[register] = value; 172 } 173 174 pub fn conditional_move(&mut self, a: Register, b: Register, c: Register) { 175 if self.load_register(c) != 0 { 176 self.save_register(a, self.load_register(b)); 177 } 178 } 179 180 pub fn array_index(&mut self, a: Register, b: Register, c: Register) { 181 let block = self.load_register(b); 182 let offset = self.load_register(c); 183 self.save_register(a, self.load_memory(block, offset)); 184 } 185 186 pub fn array_amendment(&mut self, a: Register, b: Register, c: Register) { 187 let block = self.load_register(a); 188 let offset = self.load_register(b); 189 let value = self.load_register(c); 190 self.store_memory(block, offset, value); 191 } 192 193 pub fn addition(&mut self, a: Register, b: Register, c: Register) { 194 self.save_register(a, self.load_register(b).wrapping_add(self.load_register(c))); 195 } 196 197 pub fn multiplication(&mut self, a: Register, b: Register, c: Register) { 198 self.save_register(a, self.load_register(b).wrapping_mul(self.load_register(c))); 199 } 200 201 pub fn division(&mut self, a: Register, b: Register, c: Register) { 202 self.save_register(a, self.load_register(b).wrapping_div(self.load_register(c))); 203 } 204 205 pub fn not_and(&mut self, a: Register, b: Register, c: Register) { 206 self.save_register(a, !(self.load_register(b) & self.load_register(c))); 207 } 208 209 pub fn allocation(&mut self, b: Register, c: Register) { 210 let length = self.load_register(c); 211 let index = self.allocate_memory(length); 212 self.save_register(b, index); 213 } 214 215 pub fn abandonment(&mut self, c: Register) { 216 let block = self.load_register(c); 217 self.free_memory(block); 218 } 219 220 /// Write the value in the specified register to stdout. 221 /// 222 /// # Panics 223 /// 224 /// Panics if writing to stdout fails. 225 /// 226 pub fn output(&mut self, c: Register) { 227 let value = self.load_register(c); 228 if let Some(stdout) = self.stdout.as_mut() { 229 let buffer = [(value & 0xff) as u8]; 230 stdout.write_all(&buffer).unwrap(); 231 } 232 } 233 234 /// Read a value from stdin into the specifed register. 235 // 236 // The `as` cast below benchmarks faster than using u32::from. 237 #[allow(clippy::cast_lossless)] 238 pub fn input(&mut self, c: Register) { 239 if let Some(stdin) = self.stdin.as_mut() { 240 let mut buffer = vec![0]; 241 match stdin.read_exact(&mut buffer) { 242 Ok(()) => self.save_register(c, buffer[0] as u32), 243 Err(_) => self.save_register(c, u32::MAX), 244 } 245 } else { 246 self.save_register(c, u32::MAX); 247 } 248 } 249 250 pub fn load_program(&mut self, b: Register, c: Register) { 251 let block = self.load_register(b); 252 253 // Source array is always copied to array[0], but there 254 // is no point copying array[0] to array[0]. 255 if block != 0 { 256 let duplicated = self.duplicate_memory(block); 257 let ops = crate::ops::decode(duplicated); 258 self.ops = ops; 259 } 260 261 self.program_counter = self.load_register(c); 262 } 263 264 pub fn orthography(&mut self, a: Register, value: u32) { 265 self.save_register(a, value); 266 } 267 268 /// Print the current instruction, program counter, and registers to stderr and quit. 269 /// 270 /// # Panics 271 /// 272 /// Panics when called. Every time. 273 /// 274 #[cold] 275 #[inline(never)] 276 pub fn illegal_instruction(&self) -> ! { 277 panic!( 278 "illegal instruction: {:08x}, pc: {:08x}, r: {:08x?}", 279 self.memory[0][self.program_counter.into_index()], 280 self.program_counter, 281 self.registers 282 ) 283 } 284 285 /// Load a value from `offset` in the specified memory `block`. 286 /// 287 /// # Panics 288 /// 289 /// Panics if the `block` is not an allocated block, or if `offset` overflows the current 290 /// memory block. 291 /// 292 #[must_use] 293 pub fn load_memory(&self, block: u32, offset: u32) -> u32 { 294 let block = block.into_index(); 295 let offset = offset.into_index(); 296 assert!(block < self.memory.len() && offset < self.memory[block].len()); 297 self.memory[block][offset] 298 } 299 300 /// Store a `value` at `offset` in the specified memory `block`. 301 /// 302 /// # Panics 303 /// 304 /// Panics if the `block` is not an allocated block, or if `offset` overflows the current 305 /// memory block. 306 /// 307 pub fn store_memory(&mut self, block: u32, offset: u32, value: u32) { 308 let block = block.into_index(); 309 let offset = offset.into_index(); 310 assert!(block < self.memory.len() && offset < self.memory[block].len()); 311 self.memory[block][offset] = value; 312 } 313 314 /// Duplicates a block of memory. 315 /// 316 /// The block is copied to the first block of memory. 317 /// 318 /// # Panics 319 /// 320 /// Panics if the `block` is not an allocated block of memory. 321 /// 322 pub fn duplicate_memory(&mut self, block: u32) -> &[u32] { 323 let block = block.into_index(); 324 assert!(block < self.memory.len()); 325 self.memory[0] = self.memory[block].clone(); 326 &self.memory[0] 327 } 328 329 /// Allocates a block of memory of the specified length. 330 #[allow(clippy::cast_possible_truncation)] 331 pub fn allocate_memory(&mut self, length: u32) -> u32 { 332 if let Some(index) = self.free_blocks.pop() { 333 self.memory[index.into_index()] = Self::new_block(length.into_index()); 334 index 335 } else { 336 self.memory.push(Self::new_block(length.into_index())); 337 338 // The Universal Machine only deals with 32-bit values, so truncation here is 339 // impossible. 340 (self.memory.len() - 1) as u32 341 } 342 } 343 344 /// Free a block of memory. 345 /// 346 /// # Panics 347 /// 348 /// Panics if `block` is not an allocated block of memory. 349 /// 350 pub fn free_memory(&mut self, block: u32) { 351 assert!(block.into_index() < self.memory.len()); 352 self.free_blocks.push(block); 353 self.memory[block.into_index()] = Self::new_block(0); 354 } 355 356 /// Create a new block of memory. 357 /// 358 /// The block is initialised with `len` zeroes. 359 /// 360 #[must_use] 361 pub fn new_block(len: usize) -> SmallVec<[u32; SMALLVEC_SIZE]> { 362 smallvec::smallvec![0; len] 363 } 364} 365 366#[cfg(test)] 367mod tests { 368 use super::*; 369 370 #[test] 371 #[should_panic] 372 fn empty_program() { 373 Um::new(vec![]).run(); 374 } 375 376 #[test] 377 fn just_halt() { 378 Um::new(vec![0x70000000]).run(); 379 } 380 381 #[test] 382 #[cfg(feature = "asm")] 383 fn hello_world() { 384 let program = crate::asm::assemble(include_str!("../files/hello-world.asm")); 385 let mut buffer = Vec::new(); 386 Um::new(program).stdout(&mut buffer).run(); 387 assert_eq!(&buffer, b"Hello, world!\n"); 388 } 389 390 #[test] 391 #[cfg(feature = "asm")] 392 fn cat() { 393 let program = crate::asm::assemble(include_str!("../files/cat.asm")); 394 let input = include_bytes!("lib.rs"); 395 396 let mut reader = std::io::Cursor::new(input); 397 let mut buffer = Vec::new(); 398 399 Um::new(program) 400 .stdin(&mut reader) 401 .stdout(&mut buffer) 402 .run(); 403 404 assert_eq!(&buffer, &input); 405 } 406}