Implementation of the UM-32 "Universal Machine" as described by the Cult of the Bound Variable
at main 361 lines 12 kB view raw
1// Copyright (C) 2025 Thom Hayward. 2// 3// This program is free software: you can redistribute it and/or modify it under 4// the terms of the GNU General Public License as published by the Free Software 5// Foundation, version 3. 6// 7// This program is distributed in the hope that it will be useful, but WITHOUT 8// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 9// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 10// details. 11// 12// You should have received a copy of the GNU General Public License along with 13// this program. If not, see <https://www.gnu.org/licenses/>. 14// 15 16mod lexer; 17mod parse; 18 19use crate::reg::Register; 20use lexer::Token; 21use parse::{Instruction, Node, NodeType, PragmaType}; 22use std::collections::HashMap; 23 24#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] 25enum Section { 26 Text, 27 Data, 28} 29 30/// Assemble a Universal Machine program. 31/// 32/// # Panics 33/// 34/// Panics if `source` cannot be parsed. 35/// 36#[must_use] 37#[allow(clippy::too_many_lines)] 38pub fn assemble<'s>(source: &'s str) -> Vec<u32> { 39 let parsed = parse::parse("", source).unwrap(); 40 41 let mut sections: HashMap<Section, Vec<&Node<'s>>> = HashMap::new(); 42 let mut offsets: HashMap<Section, usize> = HashMap::new(); 43 let mut label_locations: HashMap<&'s str, (Section, usize)> = HashMap::new(); 44 for node in parsed.nodes() { 45 match node.entity { 46 NodeType::Pragma(_) => { 47 let loc = *offsets 48 .entry(Section::Data) 49 .and_modify(|loc| *loc += node.size()) 50 .or_default(); 51 52 sections 53 .entry(Section::Data) 54 .and_modify(|section| section.push(node)) 55 .or_insert_with(|| vec![node]); 56 57 for label in &node.labels { 58 label_locations.insert(label, (Section::Data, loc)); 59 } 60 } 61 NodeType::Instruction(_) => { 62 let loc = *offsets 63 .entry(Section::Text) 64 .and_modify(|loc| *loc += node.size()) 65 .or_default(); 66 67 sections 68 .entry(Section::Text) 69 .and_modify(|section| section.push(node)) 70 .or_insert_with(|| vec![node]); 71 72 for label in &node.labels { 73 label_locations.insert(label, (Section::Text, loc)); 74 } 75 } 76 NodeType::Comment(_) => {} 77 } 78 } 79 80 let text = sections.remove(&Section::Text).unwrap(); 81 let data_offset = text.len(); 82 83 let mut program = vec![]; 84 for node in text { 85 let NodeType::Instruction(instruction) = &node.entity else { 86 panic!("invalid node in .text section"); 87 }; 88 89 let encoded = match *instruction { 90 Instruction::ConditionalMove { 91 destination, 92 source, 93 condition, 94 } => encode_standard(0x00, destination, source, condition), 95 Instruction::Load { 96 destination, 97 address, 98 } => { 99 let parse::Location { block, offset } = address; 100 encode_standard(0x01, destination, block, offset) 101 } 102 Instruction::Store { source, address } => { 103 let parse::Location { block, offset } = address; 104 encode_standard(0x02, block, offset, source) 105 } 106 Instruction::Add { destination, a, b } => encode_standard(0x03, destination, a, b), 107 Instruction::AddAssign { destination, a } => { 108 encode_standard(0x03, destination, destination, a) 109 } 110 Instruction::AddSelf { destination } => { 111 encode_standard(0x03, destination, destination, destination) 112 } 113 Instruction::Mul { destination, a, b } => encode_standard(0x04, destination, a, b), 114 Instruction::MulAssign { destination, a } => { 115 encode_standard(0x04, destination, destination, a) 116 } 117 Instruction::MulSelf { destination } => { 118 encode_standard(0x04, destination, destination, destination) 119 } 120 Instruction::Div { destination, a, b } => encode_standard(0x05, destination, a, b), 121 Instruction::DivAssign { destination, a } => { 122 encode_standard(0x05, destination, destination, a) 123 } 124 Instruction::DivSelf { destination } => { 125 encode_standard(0x05, destination, destination, destination) 126 } 127 Instruction::Nand { destination, a, b } => encode_standard(0x06, destination, a, b), 128 Instruction::NandAssign { destination, a } => { 129 encode_standard(0x06, destination, destination, a) 130 } 131 Instruction::NandSelf { destination } => { 132 encode_standard(0x06, destination, destination, destination) 133 } 134 Instruction::Halt => encode_standard( 135 0x07, 136 Register::default(), 137 Register::default(), 138 Register::default(), 139 ), 140 Instruction::Alloc { 141 destination, 142 length, 143 } => encode_standard(0x08, Register::default(), destination, length), 144 Instruction::Free { block } => { 145 encode_standard(0x09, Register::default(), Register::default(), block) 146 } 147 Instruction::Out { source } => { 148 encode_standard(0x0a, Register::default(), Register::default(), source) 149 } 150 Instruction::In { destination } => { 151 encode_standard(0x0b, Register::default(), Register::default(), destination) 152 } 153 Instruction::Jmp { location } => { 154 let parse::Location { block, offset } = location; 155 encode_standard(0x0c, Register::default(), block, offset) 156 } 157 Instruction::Address { 158 destination, 159 ref reference, 160 } => { 161 // lookup reference 162 let Some((section, offset)) = label_locations.get(reference.label) else { 163 panic!("failed to resolve {}", reference.label); 164 }; 165 166 let value = match section { 167 Section::Text => *offset, 168 Section::Data => data_offset + *offset, 169 }; 170 171 0xd000_0000 172 | destination.encode_a_ortho() 173 | encode_literal(u32::try_from(value).unwrap()) 174 } 175 Instruction::LiteralMove { 176 destination, 177 literal, 178 } => 0xd000_0000 | destination.encode_a_ortho() | encode_literal(literal), 179 }; 180 181 program.push(encoded); 182 } 183 184 if let Some(data) = sections.remove(&Section::Data) { 185 for node in data { 186 let NodeType::Pragma(pragma) = &node.entity else { 187 panic!("invalid node in .data section. {node:?}"); 188 }; 189 190 let encoded = match &pragma.payload { 191 PragmaType::WideString { value } => { 192 for &byte in value.as_bytes() { 193 program.push(u32::from(byte)); 194 } 195 Some(0) // terminating byte. 196 } 197 PragmaType::U32 { value } => Some(*value), 198 }; 199 200 if let Some(encoded) = encoded { 201 program.push(encoded); 202 } 203 } 204 } 205 206 program 207} 208 209fn encode_literal(value: u32) -> u32 { 210 const LITERAL_MAX: u32 = 0x1ff_ffff; 211 assert!(value <= LITERAL_MAX, "literal value exceeds available bits. value: {value} (0x{value:x}), max: {LITERAL_MAX} (0x{LITERAL_MAX:x})"); 212 value 213} 214 215const fn encode_standard(op: u32, a: Register, b: Register, c: Register) -> u32 { 216 (op << 28) | a.encode_a() | b.encode_b() | c.encode_c() 217} 218 219#[cfg(test)] 220mod tests { 221 use super::*; 222 use crate::ops::Operation; 223 use crate::reg::Register::*; 224 225 #[test] 226 fn wide_str() { 227 // Embed a wide string and get a reference to it. 228 let program = assemble( 229 r#" 230 adr r0, msg 231 msg: .wstr "Hello" 232 "#, 233 ); 234 235 let ops = crate::ops::decode(&program); 236 assert_eq!(ops[0], Operation::Orthography { a: R0, value: 1 }); 237 238 let mut platters = program.into_iter().skip(1); 239 assert_eq!(platters.next(), Some('H' as u32)); 240 assert_eq!(platters.next(), Some('e' as u32)); 241 assert_eq!(platters.next(), Some('l' as u32)); 242 assert_eq!(platters.next(), Some('l' as u32)); 243 assert_eq!(platters.next(), Some('o' as u32)); 244 assert_eq!(platters.next(), Some(0)); 245 assert_eq!(platters.next(), None); 246 } 247 248 #[test] 249 fn addresses() { 250 let program = assemble( 251 r#" 252 halt 253 start: 254 ldr r2, [r0, r1] 255 str r2, [r0, r1] 256 adr r3, start 257 halt 258 "#, 259 ); 260 261 let mut ops = crate::ops::decode(&program).into_iter(); 262 263 assert_eq!(ops.next(), Some(Operation::Halt)); 264 assert_eq!( 265 ops.next(), 266 Some(Operation::ArrayIndex { 267 a: R2, 268 b: R0, 269 c: R1 270 }) 271 ); 272 assert_eq!( 273 ops.next(), 274 Some(Operation::ArrayAmendment { 275 a: R0, 276 b: R1, 277 c: R2 278 }) 279 ); 280 assert_eq!(ops.next(), Some(Operation::Orthography { a: R3, value: 1 })); 281 assert_eq!(ops.next(), Some(Operation::Halt)); 282 assert_eq!(ops.next(), None); 283 } 284 285 #[test] 286 fn load_store() { 287 let state = crate::Um::new(assemble( 288 r#" 289 adr r1, loc 290 ldr r2, [r0, r1] 291 mov r3, 56 292 str r3, [r0, r1] 293 halt 294 loc:.u32 42 295 "#, 296 )) 297 .run(); 298 assert_eq!(state.registers[R2], 42); 299 assert_eq!(state.memory[0][5], 56); 300 } 301 302 #[test] 303 fn addition() { 304 let state = crate::Um::new(assemble( 305 r#" 306 mov r0, 42 307 mov r1, 64 308 mov r2, 8192 309 310 add r3, r0, r1 ; r3 = r0 + r1 = 106 311 add r1, r2 ; r1 = r1 + r2 = 8256 312 add r0 ; r0 = r0 + r0 = 84 313 314 halt 315 "#, 316 )) 317 .run(); 318 319 assert_eq!(state.registers[R0], 84); 320 assert_eq!(state.registers[R1], 8256); 321 assert_eq!(state.registers[R2], 8192); 322 assert_eq!(state.registers[R3], 106); 323 } 324 325 #[test] 326 fn alloc() { 327 let state = crate::Um::new(assemble( 328 r#" 329 ; Allocate 1000 bytes. 330 mov r0, 1000 331 alloc r1, r0 332 halt 333 "#, 334 )) 335 .run(); 336 assert_eq!(state.registers[R0], 1000); 337 assert_ne!(state.registers[R1], 0); 338 assert_eq!(state.memory[state.registers[R1] as usize].len(), 1000); 339 } 340 341 #[test] 342 fn free() { 343 let state = crate::Um::new(assemble( 344 r#" 345 ; Allocate 1000 bytes. 346 mov r0, 1000 347 alloc r1, r0 348 free r1 349 halt 350 "#, 351 )) 352 .run(); 353 assert_eq!(state.registers[R0], 1000); 354 assert_ne!(state.registers[R1], 0); 355 assert_eq!( 356 state.memory[state.registers[R1] as usize].len(), 357 0, 358 "memory not free'd" 359 ); 360 } 361}