use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; pub const BLOCK_SIZE_MIN: u32 = 4096; pub const BLOCK_SIZE_MAX: u32 = 65536; pub const BLOCK_SIZE_MIN_LOG2: u8 = 12; pub const BLOCK_SIZE_MAX_LOG2: u8 = 16; pub const BLOCKREF_SIZE: usize = 64; pub const INODE_SIZE: usize = 1024; pub const BTREE_NODE_SIZE: usize = 4096; pub const SUPERBLOCK_SIZE: usize = 4096; pub const BTREE_MAX_ENTRIES: usize = 63; pub const BTREE_MIN_ENTRIES: usize = 31; pub const INODE_DIRECT_REFS: usize = 4; pub const INODE_INLINE_MAX: usize = INODE_DIRECT_REFS * BLOCKREF_SIZE; pub const FREEMAP_BITS_PER_LEAF: usize = BTREE_NODE_SIZE * 8; pub const FREEMAP_COVERAGE_PER_LEAF: u64 = FREEMAP_BITS_PER_LEAF as u64 * BLOCK_SIZE_MIN as u64; pub const BTREE_NODE_MAGIC: u16 = 0x4C42; pub const SUPERBLOCK_MAGIC: u64 = 0x4C61_6E63_6572_4653; pub const SUPERBLOCK_VERSION: u32 = 2; pub const DEDUP_SHARDS: usize = 16; pub const ADDR_MASK: u64 = !0xF; pub const SIZE_SHIFT_MASK: u64 = 0xF; pub const MAX_SIZE_SHIFT: u8 = BLOCK_SIZE_MAX_LOG2 - BLOCK_SIZE_MIN_LOG2; pub const MAX_SYMLINK_DEPTH: u8 = 8; #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum BlockType { Data = 0, Inode = 1, Indirect = 2, Freemap = 3, Directory = 4, } impl BlockType { pub const fn from_u8(v: u8) -> Option { match v { 0 => Some(Self::Data), 1 => Some(Self::Inode), 2 => Some(Self::Indirect), 3 => Some(Self::Freemap), 4 => Some(Self::Directory), _ => None, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum Compression { None = 0, Lz4 = 1, } impl Compression { pub const fn from_u8(v: u8) -> Option { match v { 0 => Some(Self::None), 1 => Some(Self::Lz4), _ => None, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum InodeType { File = 0, Directory = 1, Symlink = 2, } impl InodeType { pub const fn from_u8(v: u8) -> Option { match v { 0 => Some(Self::File), 1 => Some(Self::Directory), 2 => Some(Self::Symlink), _ => None, } } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum CompressionPolicy { Inherit = 0, Lz4 = 1, Disabled = 2, } impl CompressionPolicy { pub const fn from_u8(v: u8) -> Option { match v { 0 => Some(Self::Inherit), 1 => Some(Self::Lz4), 2 => Some(Self::Disabled), _ => None, } } } pub struct InodeFlags; impl InodeFlags { pub const INLINE: u8 = 0x01; pub const INDIRECT: u8 = 0x02; } #[derive(Clone, Copy, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct BlockRef { pub physical_addr: u64, pub key: u64, pub transaction_id: u64, pub content_hash: [u8; 16], pub integrity_crc: u32, pub logical_size: u32, pub block_type: u8, pub compression: u8, pub flags: u8, _reserved: [u8; 13], } const _: () = assert!(core::mem::size_of::() == BLOCKREF_SIZE); impl BlockRef { pub const ZERO: Self = Self { physical_addr: 0, key: 0, transaction_id: 0, content_hash: [0; 16], integrity_crc: 0, logical_size: 0, block_type: 0, compression: 0, flags: 0, _reserved: [0; 13], }; pub const fn is_null(&self) -> bool { self.physical_addr == 0 && self.key == 0 } pub const fn physical_block_addr(&self) -> u64 { self.physical_addr & ADDR_MASK } pub const fn size_shift(&self) -> u8 { (self.physical_addr & SIZE_SHIFT_MASK) as u8 } pub const fn is_valid_size_shift(&self) -> bool { self.size_shift() <= MAX_SIZE_SHIFT } pub const fn block_size_log2(&self) -> Option { match self.is_valid_size_shift() { true => Some(self.size_shift() + BLOCK_SIZE_MIN_LOG2), false => None, } } pub const fn block_size(&self) -> Option { match self.block_size_log2() { Some(log2) => Some(1u32 << log2), None => None, } } pub const fn block_type_enum(&self) -> Option { BlockType::from_u8(self.block_type) } pub const fn compression_enum(&self) -> Option { Compression::from_u8(self.compression) } pub const fn content_hash_u128(&self) -> u128 { u128::from_le_bytes(self.content_hash) } #[allow(clippy::too_many_arguments)] pub const fn new( addr: u64, size_log2: u8, key: u64, transaction_id: u64, content_hash: u128, integrity_crc: u32, block_type: BlockType, compression: Compression, logical_size: u32, flags: u8, ) -> Self { let size_shift = size_log2 - BLOCK_SIZE_MIN_LOG2; assert!(size_shift <= MAX_SIZE_SHIFT); assert!(addr & 0xF == 0); Self { physical_addr: addr | (size_shift as u64), key, transaction_id, content_hash: content_hash.to_le_bytes(), integrity_crc, logical_size, block_type: block_type as u8, compression: compression as u8, flags, _reserved: [0; 13], } } } impl core::fmt::Debug for BlockRef { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("BlockRef") .field("addr", &self.physical_block_addr()) .field("size", &self.block_size().unwrap_or(0)) .field("key", &self.key) .field("txn", &self.transaction_id) .field("type", &self.block_type_enum()) .field("compression", &self.compression_enum()) .field("crc", &self.integrity_crc) .field("flags", &self.flags) .finish() } } #[derive(Clone, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct Inode { pub object_id: u64, pub generation: u64, pub transaction_id: u64, pub size: u64, pub block_count: u64, pub create_time: u64, pub modify_time: u64, pub subtree_hash: [u8; 16], pub link_count: u32, pub rights_template: u16, pub inode_type: u8, pub compression_policy: u8, pub flags: u8, _reserved: [u8; 7], pub direct: [BlockRef; INODE_DIRECT_REFS], pub extended_attrs: [u8; 680], } const _: () = assert!(core::mem::size_of::() == INODE_SIZE); impl Inode { pub const ZERO: Self = Self { object_id: 0, generation: 0, transaction_id: 0, size: 0, block_count: 0, create_time: 0, modify_time: 0, subtree_hash: [0; 16], link_count: 0, rights_template: 0, inode_type: 0, compression_policy: 0, flags: 0, _reserved: [0; 7], direct: [BlockRef::ZERO; INODE_DIRECT_REFS], extended_attrs: [0; 680], }; pub const fn inode_type_enum(&self) -> Option { InodeType::from_u8(self.inode_type) } pub const fn compression_policy_enum(&self) -> Option { CompressionPolicy::from_u8(self.compression_policy) } pub const fn is_inline(&self) -> bool { self.flags & InodeFlags::INLINE != 0 } pub const fn subtree_hash_u128(&self) -> u128 { u128::from_le_bytes(self.subtree_hash) } pub fn inline_data(&self) -> Option<&[u8]> { match self.is_inline() { true => { let len = core::cmp::min(self.size as usize, INODE_INLINE_MAX); Some(&self.direct.as_bytes()[..len]) } false => None, } } pub fn new_file( object_id: u64, generation: u64, transaction_id: u64, rights_template: u16, create_time: u64, ) -> Self { Self { object_id, generation, transaction_id, inode_type: InodeType::File as u8, compression_policy: CompressionPolicy::Inherit as u8, rights_template, create_time, modify_time: create_time, link_count: 1, ..Self::ZERO } } pub fn new_directory( object_id: u64, generation: u64, transaction_id: u64, rights_template: u16, create_time: u64, ) -> Self { Self { object_id, generation, transaction_id, inode_type: InodeType::Directory as u8, compression_policy: CompressionPolicy::Inherit as u8, rights_template, create_time, modify_time: create_time, link_count: 1, ..Self::ZERO } } pub fn new_symlink( object_id: u64, generation: u64, transaction_id: u64, rights_template: u16, create_time: u64, target: &[u8], ) -> Self { assert!(target.len() <= INODE_INLINE_MAX); let mut inode = Self { object_id, generation, transaction_id, inode_type: InodeType::Symlink as u8, compression_policy: CompressionPolicy::Disabled as u8, rights_template, create_time, modify_time: create_time, size: target.len() as u64, flags: InodeFlags::INLINE, link_count: 1, ..Self::ZERO }; inode.direct.as_mut_bytes()[..target.len()].copy_from_slice(target); inode } } impl core::fmt::Debug for Inode { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("Inode") .field("object_id", &self.object_id) .field("generation", &self.generation) .field("txn", &self.transaction_id) .field("type", &self.inode_type_enum()) .field("size", &self.size) .field("blocks", &self.block_count) .field("links", &self.link_count) .field("inline", &self.is_inline()) .finish() } } #[derive(Clone, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct BTreeNodeHeader { pub magic: u16, pub entry_count: u16, pub level: u8, pub flags: u8, _reserved: [u8; 2], } const _: () = assert!(core::mem::size_of::() == 8); impl BTreeNodeHeader { pub const fn new(level: u8) -> Self { Self { magic: BTREE_NODE_MAGIC, entry_count: 0, level, flags: 0, _reserved: [0; 2], } } pub const fn is_valid(&self) -> bool { self.magic == BTREE_NODE_MAGIC } pub const fn is_leaf(&self) -> bool { self.level == 0 } } impl core::fmt::Debug for BTreeNodeHeader { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("BTreeNodeHeader") .field("magic", &format_args!("{:#06x}", self.magic)) .field("level", &self.level) .field("entries", &self.entry_count) .field("flags", &self.flags) .finish() } } #[derive(Clone, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct BTreeNode { pub header: BTreeNodeHeader, pub entries: [BlockRef; BTREE_MAX_ENTRIES], _padding: [u8; 56], } const _: () = assert!(core::mem::size_of::() == BTREE_NODE_SIZE); impl BTreeNode { pub const ZEROED: Self = Self { header: BTreeNodeHeader { magic: 0, entry_count: 0, level: 0, flags: 0, _reserved: [0; 2], }, entries: [BlockRef::ZERO; BTREE_MAX_ENTRIES], _padding: [0; 56], }; pub const EMPTY_LEAF: Self = Self { header: BTreeNodeHeader { magic: BTREE_NODE_MAGIC, entry_count: 0, level: 0, flags: 0, _reserved: [0; 2], }, entries: [BlockRef::ZERO; BTREE_MAX_ENTRIES], _padding: [0; 56], }; pub const fn new(level: u8) -> Self { Self { header: BTreeNodeHeader { magic: BTREE_NODE_MAGIC, entry_count: 0, level, flags: 0, _reserved: [0; 2], }, entries: [BlockRef::ZERO; BTREE_MAX_ENTRIES], _padding: [0; 56], } } pub const fn is_valid(&self) -> bool { self.header.is_valid() } pub const fn is_leaf(&self) -> bool { self.header.is_leaf() } pub const fn entry_count(&self) -> usize { self.header.entry_count as usize } pub const fn is_full(&self) -> bool { self.entry_count() >= BTREE_MAX_ENTRIES } pub const fn is_underflow(&self) -> bool { self.entry_count() < BTREE_MIN_ENTRIES } pub fn active_entries(&self) -> &[BlockRef] { &self.entries[..self.entry_count()] } pub fn search_by_key(&self, target: u64) -> Result { self.active_entries() .binary_search_by_key(&target, |entry| entry.key) } pub fn find_child_index(&self, target: u64) -> usize { let count = self.entry_count(); match self.search_by_key(target) { Ok(idx) => idx, Err(idx) => idx.min(count.saturating_sub(1)), } } } impl core::fmt::Debug for BTreeNode { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("BTreeNode") .field("header", &self.header) .field("active_entries", &self.entry_count()) .finish() } } #[derive(Clone, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct Superblock { pub magic: u64, pub version: u32, _pad0: u32, pub sequence: u64, pub transaction_id: u64, pub total_blocks: u64, pub block_size: u32, _pad1: u32, pub tree_root: BlockRef, pub freemap_root: BlockRef, pub dedup_roots: [BlockRef; DEDUP_SHARDS], pub snapshot_root: BlockRef, pub scrub_cursor: u64, pub next_object_id: u64, _reserved: [u8; 2812], pub checksum: u32, } const _: () = assert!(core::mem::size_of::() == SUPERBLOCK_SIZE); impl Superblock { pub const fn new(total_blocks: u64, block_size: u32) -> Self { Self { magic: SUPERBLOCK_MAGIC, version: SUPERBLOCK_VERSION, _pad0: 0, sequence: 0, transaction_id: 0, total_blocks, block_size, _pad1: 0, tree_root: BlockRef::ZERO, freemap_root: BlockRef::ZERO, dedup_roots: [BlockRef::ZERO; DEDUP_SHARDS], snapshot_root: BlockRef::ZERO, scrub_cursor: 0, next_object_id: 0, _reserved: [0; 2812], checksum: 0, } } pub const fn is_valid_magic(&self) -> bool { self.magic == SUPERBLOCK_MAGIC && self.version == SUPERBLOCK_VERSION } pub const fn next_sequence(&self) -> Option { self.sequence.checked_add(1) } pub const fn next_transaction(&self) -> Option { self.transaction_id.checked_add(1) } } impl core::fmt::Debug for Superblock { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_struct("Superblock") .field("magic", &format_args!("{:#018x}", self.magic)) .field("version", &self.version) .field("sequence", &self.sequence) .field("txn", &self.transaction_id) .field("total_blocks", &self.total_blocks) .field("block_size", &self.block_size) .field("tree_root", &self.tree_root) .field("freemap_root", &self.freemap_root) .field("checksum", &format_args!("{:#010x}", self.checksum)) .finish() } } pub fn select_superblock<'a>(a: &'a Superblock, b: &'a Superblock) -> &'a Superblock { let a_valid = a.is_valid_magic(); let b_valid = b.is_valid_magic(); match (a_valid, b_valid) { (true, true) => match a.sequence >= b.sequence { true => a, false => b, }, (true, false) => a, (false, true) => b, (false, false) => a, } } pub fn commit_target<'a>(a: &'a Superblock, b: &'a Superblock) -> SuperblockSlot { let a_valid = a.is_valid_magic(); let b_valid = b.is_valid_magic(); match (a_valid, b_valid) { (true, true) => match a.sequence < b.sequence { true => SuperblockSlot::A, false => SuperblockSlot::B, }, (true, false) => SuperblockSlot::B, (false, true) => SuperblockSlot::A, (false, false) => SuperblockSlot::A, } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum SuperblockSlot { A, B, } impl SuperblockSlot { pub const fn block_number(self) -> u64 { match self { Self::A => 0, Self::B => 1, } } } #[derive(Clone, PartialEq, Eq, FromBytes, IntoBytes, KnownLayout, Immutable)] #[repr(C)] pub struct FreemapLeaf { pub bits: [u8; BTREE_NODE_SIZE], } const _: () = assert!(core::mem::size_of::() == BTREE_NODE_SIZE); impl FreemapLeaf { pub const ZERO: Self = Self { bits: [0; BTREE_NODE_SIZE], }; pub const FULL: Self = Self { bits: [0xFF; BTREE_NODE_SIZE], }; pub const fn is_allocated(&self, index: usize) -> bool { let byte = index / 8; let bit = index % 8; (self.bits[byte] & (1 << bit)) != 0 } pub const fn with_bit_set(mut self, index: usize) -> Self { let byte = index / 8; let bit = index % 8; self.bits[byte] |= 1 << bit; self } pub const fn with_bit_cleared(mut self, index: usize) -> Self { let byte = index / 8; let bit = index % 8; self.bits[byte] &= !(1 << bit); self } pub fn set_bit(&mut self, index: usize) { let byte = index / 8; let bit = index % 8; self.bits[byte] |= 1 << bit; } pub fn clear_bit(&mut self, index: usize) { let byte = index / 8; let bit = index % 8; self.bits[byte] &= !(1 << bit); } pub fn set_range(&mut self, start: usize, count: usize) { (start..start + count).for_each(|i| self.set_bit(i)); } pub fn clear_range(&mut self, start: usize, count: usize) { (start..start + count).for_each(|i| self.clear_bit(i)); } pub fn is_range_allocated(&self, start: usize, count: usize) -> bool { (start..start + count).all(|i| self.is_allocated(i)) } pub fn find_contiguous_free(&self, count: usize) -> Option { self.find_contiguous_free_from(0, count) } pub fn find_contiguous_free_from(&self, start_bit: usize, count: usize) -> Option { if count == 0 { return None; } let total_bits = self.bits.len() * 8; let start_word = start_bit / 64; let (mut run_start, mut run_len) = (start_bit, 0usize); self.bits .chunks_exact(8) .enumerate() .skip(start_word) .find_map(|(word_idx, chunk)| { let word = u64::from_le_bytes([ chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6], chunk[7], ]); let skip_bits = match word_idx == start_word { true => (start_bit % 64) as u32, false => 0, }; let masked = match skip_bits { 0 => word, n => word | ((1u64 << n) - 1), }; match masked { u64::MAX => { run_len = 0; run_start = (word_idx + 1) * 64; None } _ if masked == 0 && skip_bits == 0 => { if run_len == 0 { run_start = word_idx * 64; } run_len += 64; match run_len >= count { true => Some(run_start), false => None, } } partial => { let base = word_idx * 64; (skip_bits..64).find_map(|bit| match (partial >> bit) & 1 != 0 { true => { run_len = 0; run_start = base + bit as usize + 1; None } false => { if run_len == 0 { run_start = base + bit as usize; } run_len += 1; match run_len >= count { true => Some(run_start), false => None, } } }) } } }) .filter(|&start| start + count <= total_bits) } } impl core::fmt::Debug for FreemapLeaf { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let allocated = self .bits .iter() .fold(0u32, |acc, &byte| acc + byte.count_ones()); let total = self.bits.len() * 8; f.debug_struct("FreemapLeaf") .field("allocated", &allocated) .field("total", &total) .finish() } }