pub mod boot_info; pub mod cap; pub mod clock; pub mod cnode; pub mod debug; pub mod dma; pub mod exit; pub mod fb; pub mod frame; pub mod ipc; pub mod irq; pub mod log; pub mod module; pub mod notify; pub mod pci; pub mod proc; pub mod retype; pub mod ring; pub mod sched; use crate::error::KernelError; use crate::proc::context::CpuContext; use crate::types::{MAX_PIDS, Pid}; #[repr(transparent)] #[derive(Debug, Clone, Copy)] pub struct SyscallResult(u64); impl SyscallResult { pub const fn success(val: u64) -> Self { debug_assert!( (val as i64) >= 0, "SyscallResult::success value collides with errno range" ); Self(val) } pub const fn ok() -> Self { Self(0) } pub const fn error(e: KernelError) -> Self { Self(e.to_errno() as u64) } #[allow(dead_code)] pub const fn is_err(self) -> bool { (self.0 as i64) < 0 } pub const fn raw(self) -> u64 { self.0 } } impl From for SyscallResult { fn from(e: KernelError) -> Self { Self::error(e) } } impl From> for SyscallResult where T: Into, { fn from(r: Result) -> Self { match r { Ok(v) => v.into(), Err(e) => Self::error(e), } } } macro_rules! try_syscall { ($ctx:expr, $expr:expr) => { match $expr { Ok(val) => val, Err(e) => { $ctx.rax = SyscallResult::error(e).raw(); return; } } }; } pub(crate) use try_syscall; macro_rules! syscall_enum { ($( $name:ident = $val:expr ),+ $(,)?) => { #[repr(u64)] #[derive(Debug, Clone, Copy)] pub enum SyscallNr { $( $name = $val ),+ } impl TryFrom for SyscallNr { type Error = (); fn try_from(nr: u64) -> Result { match nr { $( $val => Ok(Self::$name), )+ _ => Err(()), } } } }; } syscall_enum! { Nop = 0, DebugPrint = 1, Exit = 2, Getpid = 3, SysLog = 4, GetProcName = 5, CapDerive = 11, CapRevoke = 12, CapIdentify = 13, CapGrant = 14, CnodeCreate = 15, CnodeCopy = 16, CnodeDelete = 17, Send = 20, Recv = 21, Call = 22, ReplyRecv = 23, NbRecv = 24, NotifySignal = 30, NotifyWait = 31, NotifyPoll = 32, NtfnBind = 33, NtfnUnbind = 34, FrameMap = 44, FrameUnmap = 45, FrameMapChild = 46, RingRegister = 50, RingEnter = 51, SchedAttach = 61, SchedYield = 62, SchedConfigure = 63, ProcCreate = 70, ProcSetRegs = 72, ProcStart = 73, ProcDestroy = 74, ProcLoadModule = 75, ProcBindDeathNotif = 76, ThreadCreate = 77, SetFsBase = 78, IrqBind = 80, IrqAck = 81, IoIn8 = 82, IoOut8 = 83, IrqConfigure = 85, ModuleInfo = 90, FbInfo = 91, FbMap = 92, PciDeviceCount = 100, PciDeviceInfo = 101, PciBarMap = 102, PciBarUnmap = 103, PciConfigRead32 = 104, PciConfigWrite32 = 105, PciMsixConfigure = 106, ClockMonotonicMs = 110, DmaAlloc = 120, DmaFree = 121, BlackboxRead = 130, UntypedRetype = 140, BootUntypedInfo = 141, } pub use crate::mem::typed_addr::{MIN_USER_VADDR, USER_ADDR_LIMIT, UserVirtAddr}; const PAGE_SIZE: u64 = 4096; const PT_INDEX_MASK: u64 = 0x1FF; const PTE_ADDR_MASK: u64 = 0x000F_FFFF_FFFF_F000; const PTE_PRESENT: u64 = 1 << 0; const PTE_WRITABLE: u64 = 1 << 1; const PTE_USER: u64 = 1 << 2; const PTE_HUGE: u64 = 1 << 7; const PTE_NX: u64 = 1 << 63; const PTE_SIZE: u64 = 8; fn read_pte(table_phys: u64, index: u64, hhdm: u64) -> u64 { unsafe { core::ptr::read_volatile((table_phys + hhdm + index * PTE_SIZE) as *const u64) } } const L4_SHIFT: u32 = 39; const L3_SHIFT: u32 = 30; const L2_SHIFT: u32 = 21; const L1_SHIFT: u32 = 12; const HUGE_1G_OFFSET_MASK: u64 = (1 << L3_SHIFT) - 1; const HUGE_2M_OFFSET_MASK: u64 = (1 << L2_SHIFT) - 1; pub(crate) fn resolve_user_page( pml4_phys: x86_64::PhysAddr, vaddr: u64, needs_write: bool, needs_execute: bool, ) -> Option { let hhdm = crate::mem::addr::hhdm_offset(); let required_bits: u64 = if needs_write { PTE_PRESENT | PTE_WRITABLE | PTE_USER } else { PTE_PRESENT | PTE_USER }; let indices: [u64; 4] = [ (vaddr >> L4_SHIFT) & PT_INDEX_MASK, (vaddr >> L3_SHIFT) & PT_INDEX_MASK, (vaddr >> L2_SHIFT) & PT_INDEX_MASK, (vaddr >> L1_SHIFT) & PT_INDEX_MASK, ]; #[derive(Clone, Copy)] enum Walk { Next(u64), Huge(u64, u8), Fault, } let result = indices .iter() .enumerate() .fold( Walk::Next(pml4_phys.as_u64()), |state, (level, &idx)| match state { Walk::Next(table_phys) => { let entry = read_pte(table_phys, idx, hhdm); if (entry & required_bits != required_bits) || (needs_execute && (entry & PTE_NX) != 0) { Walk::Fault } else if level > 0 && level < 3 && (entry & PTE_HUGE) != 0 { Walk::Huge(entry & PTE_ADDR_MASK, level as u8) } else { Walk::Next(entry & PTE_ADDR_MASK) } } other => other, }, ); match result { Walk::Next(frame_phys) => Some(frame_phys), Walk::Huge(base_phys, level) => { let offset_within = match level { 1 => vaddr & HUGE_1G_OFFSET_MASK, _ => vaddr & HUGE_2M_OFFSET_MASK, }; Some((base_phys + offset_within) & !0xFFF) } Walk::Fault => None, } } struct PageChunk { frame_offset: u64, len: usize, } fn intersect_page(range_start: u64, range_end: u64, page_base: u64) -> PageChunk { let chunk_start = range_start.max(page_base); let chunk_end = range_end.min(page_base + PAGE_SIZE); PageChunk { frame_offset: chunk_start - page_base, len: (chunk_end - chunk_start) as usize, } } pub fn copy_from_user( src: u64, dst: &mut [u8], len: usize, _proof: &crate::sync::InterruptsDisabledToken, ) -> Result<(), KernelError> { if len == 0 { return Ok(()); } if len > dst.len() || src < MIN_USER_VADDR { return Err(KernelError::InvalidAddress); } let end = src .checked_add(len as u64) .ok_or(KernelError::InvalidAddress)?; if end >= USER_ADDR_LIMIT { return Err(KernelError::InvalidAddress); } let pid = crate::arch::syscall::current_pid(); let pml4 = { let ptable = crate::proc::PROCESSES.lock(); ptable .get(pid) .map(|p| p.pml4_phys.raw()) .ok_or(KernelError::InvalidObject)? }; let hhdm = crate::mem::addr::hhdm_offset(); let max_phys = (crate::mem::phys::BitmapFrameAllocator::total_frames() as u64) * PAGE_SIZE; let start_page = src / PAGE_SIZE; let end_page = (end - 1) / PAGE_SIZE; (start_page..=end_page).try_fold(0usize, |copied, page| { let page_base = page * PAGE_SIZE; let frame_phys = resolve_user_page(pml4, page_base, false, false) .filter(|&fp| fp < max_phys) .ok_or(KernelError::InvalidAddress)?; let chunk = intersect_page(src, end, page_base); let hhdm_src = (frame_phys + hhdm + chunk.frame_offset) as *const u8; unsafe { core::ptr::copy_nonoverlapping(hhdm_src, dst.as_mut_ptr().add(copied), chunk.len); } Ok(copied + chunk.len) })?; Ok(()) } #[allow(dead_code)] pub fn copy_to_user( dst: u64, src: &[u8], len: usize, _proof: &crate::sync::InterruptsDisabledToken, ) -> Result<(), KernelError> { if len == 0 { return Ok(()); } if len > src.len() || dst < MIN_USER_VADDR { return Err(KernelError::InvalidAddress); } let end = dst .checked_add(len as u64) .ok_or(KernelError::InvalidAddress)?; if end >= USER_ADDR_LIMIT { return Err(KernelError::InvalidAddress); } let pid = crate::arch::syscall::current_pid(); let pml4 = { let ptable = crate::proc::PROCESSES.lock(); ptable .get(pid) .map(|p| p.pml4_phys.raw()) .ok_or(KernelError::InvalidObject)? }; let hhdm = crate::mem::addr::hhdm_offset(); let max_phys = (crate::mem::phys::BitmapFrameAllocator::total_frames() as u64) * PAGE_SIZE; let start_page = dst / PAGE_SIZE; let end_page = (end - 1) / PAGE_SIZE; (start_page..=end_page).try_fold(0usize, |copied, page| { let page_base = page * PAGE_SIZE; let frame_phys = resolve_user_page(pml4, page_base, true, false) .filter(|&fp| fp < max_phys) .ok_or(KernelError::InvalidAddress)?; let chunk = intersect_page(dst, end, page_base); let hhdm_dst = (frame_phys + hhdm + chunk.frame_offset) as *mut u8; unsafe { core::ptr::copy_nonoverlapping(src.as_ptr().add(copied), hhdm_dst, chunk.len); } Ok(copied + chunk.len) })?; Ok(()) } pub fn validate_user_vaddr(addr: u64) -> Result { UserVirtAddr::new(addr) } pub fn u8_from_reg(val: u64) -> Result { u8::try_from(val).map_err(|_| KernelError::InvalidParameter) } pub fn u16_from_reg(val: u64) -> Result { u16::try_from(val).map_err(|_| KernelError::InvalidParameter) } pub fn u32_from_reg(val: u64) -> Result { u32::try_from(val).map_err(|_| KernelError::InvalidParameter) } #[allow(dead_code)] pub fn pid_from_u64(val: u64) -> Result { if val < MAX_PIDS as u64 { Ok(Pid::new(val as u16)) } else { Err(KernelError::InvalidParameter) } } #[unsafe(no_mangle)] pub extern "C" fn syscall_dispatch(ctx: *mut CpuContext, nr: u64) { let ctx = unsafe { &mut *ctx }; let pc = unsafe { &mut *crate::arch::syscall::this_cpu() }; pc.syscall_count += 1; let count = pc.syscall_count; match SyscallNr::try_from(nr) { Ok(SyscallNr::Nop) => { if count < 5 { crate::show!(sys, "nop from ring 3 count {}", count); } ctx.rax = 0; } Ok(SyscallNr::DebugPrint) => debug::sys_debug_print(ctx), Ok(SyscallNr::Exit) => exit::sys_exit(ctx), Ok(SyscallNr::SysLog) => log::sys_log(ctx), Ok(SyscallNr::GetProcName) => log::sys_get_proc_name(ctx), Ok(SyscallNr::Getpid) => { ctx.rax = crate::arch::syscall::current_pid().raw() as u64; } Ok(SyscallNr::CapDerive) => cap::sys_cap_derive(ctx), Ok(SyscallNr::CapRevoke) => cap::sys_cap_revoke(ctx), Ok(SyscallNr::CapIdentify) => cap::sys_cap_identify(ctx), Ok(SyscallNr::CapGrant) => cap::sys_cap_grant(ctx), Ok(SyscallNr::CnodeCreate) => cnode::sys_cnode_create(ctx), Ok(SyscallNr::CnodeCopy) => cnode::sys_cnode_copy(ctx), Ok(SyscallNr::CnodeDelete) => cnode::sys_cnode_delete(ctx), Ok(SyscallNr::Send) => ipc::sys_send(ctx), Ok(SyscallNr::Recv) => ipc::sys_recv(ctx), Ok(SyscallNr::NbRecv) => ipc::sys_nb_recv(ctx), Ok(SyscallNr::Call) => ipc::sys_call(ctx), Ok(SyscallNr::ReplyRecv) => ipc::sys_reply_recv(ctx), Ok(SyscallNr::NotifySignal) => notify::sys_notify_signal(ctx), Ok(SyscallNr::NotifyWait) => notify::sys_notify_wait(ctx), Ok(SyscallNr::NotifyPoll) => notify::sys_notify_poll(ctx), Ok(SyscallNr::NtfnBind) => notify::sys_ntfn_bind(ctx), Ok(SyscallNr::NtfnUnbind) => notify::sys_ntfn_unbind(ctx), Ok(SyscallNr::FrameMap) => frame::sys_frame_map(ctx), Ok(SyscallNr::FrameUnmap) => frame::sys_frame_unmap(ctx), Ok(SyscallNr::FrameMapChild) => frame::sys_frame_map_child(ctx), Ok(SyscallNr::RingRegister) => ring::sys_ring_register(ctx), Ok(SyscallNr::RingEnter) => ring::sys_ring_enter(ctx), Ok(SyscallNr::SchedAttach) => sched::sys_sched_attach(ctx), Ok(SyscallNr::SchedYield) => sched::sys_sched_yield(ctx), Ok(SyscallNr::SchedConfigure) => sched::sys_sched_configure(ctx), Ok(SyscallNr::ProcCreate) => proc::sys_proc_create(ctx), Ok(SyscallNr::ProcSetRegs) => proc::sys_proc_set_regs(ctx), Ok(SyscallNr::ProcStart) => proc::sys_proc_start(ctx), Ok(SyscallNr::ProcDestroy) => proc::sys_proc_destroy(ctx), Ok(SyscallNr::ProcLoadModule) => proc::sys_proc_load_module(ctx), Ok(SyscallNr::ProcBindDeathNotif) => proc::sys_proc_bind_death_notif(ctx), Ok(SyscallNr::ThreadCreate) => proc::sys_thread_create(ctx), Ok(SyscallNr::SetFsBase) => proc::sys_set_fsbase(ctx), Ok(SyscallNr::IrqBind) => irq::sys_irq_bind(ctx), Ok(SyscallNr::IrqAck) => irq::sys_irq_ack(ctx), Ok(SyscallNr::IoIn8) => irq::sys_io_in8(ctx), Ok(SyscallNr::IoOut8) => irq::sys_io_out8(ctx), Ok(SyscallNr::IrqConfigure) => irq::sys_irq_configure(ctx), Ok(SyscallNr::ModuleInfo) => module::sys_module_info(ctx), Ok(SyscallNr::FbInfo) => fb::sys_fb_info(ctx), Ok(SyscallNr::FbMap) => fb::sys_fb_map(ctx), Ok(SyscallNr::PciDeviceCount) => pci::sys_pci_device_count(ctx), Ok(SyscallNr::PciDeviceInfo) => pci::sys_pci_device_info(ctx), Ok(SyscallNr::PciBarMap) => pci::sys_pci_bar_map(ctx), Ok(SyscallNr::PciBarUnmap) => pci::sys_pci_bar_unmap(ctx), Ok(SyscallNr::PciConfigRead32) => pci::sys_pci_config_read32(ctx), Ok(SyscallNr::PciConfigWrite32) => pci::sys_pci_config_write32(ctx), Ok(SyscallNr::PciMsixConfigure) => pci::sys_pci_msix_configure(ctx), Ok(SyscallNr::ClockMonotonicMs) => clock::sys_clock_monotonic_ms(ctx), Ok(SyscallNr::DmaAlloc) => dma::sys_dma_alloc(ctx), Ok(SyscallNr::DmaFree) => dma::sys_dma_free(ctx), Ok(SyscallNr::BlackboxRead) => log::sys_blackbox_read(ctx), Ok(SyscallNr::UntypedRetype) => retype::sys_untyped_retype(ctx), Ok(SyscallNr::BootUntypedInfo) => boot_info::sys_boot_untyped_info(ctx), Err(()) => { crate::show!(sys, warn, "unknown syscall nr {}", nr); ctx.rax = SyscallResult::error(KernelError::InvalidParameter).raw(); } } }