web engine - experimental web browser

Merge branch 'font-parsing': OTF/TTF font file parsing

+1272
+373
crates/text/src/font/mod.rs
··· 1 + //! OTF/TTF font file parser. 2 + //! 3 + //! Parses the OpenType/TrueType table directory and individual tables needed 4 + //! for text rendering: head, maxp, hhea, hmtx, cmap, name, loca. 5 + 6 + use std::fmt; 7 + 8 + mod parse; 9 + mod tables; 10 + 11 + pub use tables::cmap::CmapTable; 12 + pub use tables::head::HeadTable; 13 + pub use tables::hhea::HheaTable; 14 + pub use tables::hmtx::HmtxTable; 15 + pub use tables::loca::LocaTable; 16 + pub use tables::maxp::MaxpTable; 17 + pub use tables::name::NameTable; 18 + 19 + /// Errors that can occur during font parsing. 20 + #[derive(Debug)] 21 + pub enum FontError { 22 + /// The data is too short to contain the expected structure. 23 + UnexpectedEof, 24 + /// The font file has an unrecognized magic number / sfVersion. 25 + InvalidMagic(u32), 26 + /// A required table is missing. 27 + MissingTable(&'static str), 28 + /// A table's data is malformed. 29 + MalformedTable(&'static str), 30 + } 31 + 32 + impl fmt::Display for FontError { 33 + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 34 + match self { 35 + FontError::UnexpectedEof => write!(f, "unexpected end of font data"), 36 + FontError::InvalidMagic(v) => write!(f, "invalid font magic: 0x{:08X}", v), 37 + FontError::MissingTable(t) => write!(f, "missing required table: {}", t), 38 + FontError::MalformedTable(t) => write!(f, "malformed table: {}", t), 39 + } 40 + } 41 + } 42 + 43 + /// A record in the table directory describing one font table. 44 + #[derive(Debug, Clone)] 45 + pub struct TableRecord { 46 + /// Four-byte tag (e.g. b"head", b"cmap"). 47 + pub tag: [u8; 4], 48 + /// Checksum of the table. 49 + pub checksum: u32, 50 + /// Offset from the beginning of the font file. 51 + pub offset: u32, 52 + /// Length of the table in bytes. 53 + pub length: u32, 54 + } 55 + 56 + impl TableRecord { 57 + /// Return the tag as a string (for display/debugging). 58 + pub fn tag_str(&self) -> &str { 59 + std::str::from_utf8(&self.tag).unwrap_or("????") 60 + } 61 + } 62 + 63 + /// A parsed OpenType/TrueType font. 64 + #[derive(Debug)] 65 + pub struct Font { 66 + /// Raw font data (owned). 67 + data: Vec<u8>, 68 + /// Offset subtable version (0x00010000 for TrueType, 0x4F54544F for CFF). 69 + pub sf_version: u32, 70 + /// Table directory records. 71 + pub tables: Vec<TableRecord>, 72 + } 73 + 74 + impl Font { 75 + /// Parse a font from raw file bytes. 76 + pub fn parse(data: Vec<u8>) -> Result<Font, FontError> { 77 + let r = parse::Reader::new(&data); 78 + 79 + let sf_version = r.u32(0)?; 80 + match sf_version { 81 + 0x00010000 => {} // TrueType 82 + 0x4F54544F => {} // CFF (OpenType with PostScript outlines) 83 + 0x74727565 => {} // 'true' — old Apple TrueType 84 + _ => return Err(FontError::InvalidMagic(sf_version)), 85 + } 86 + 87 + let num_tables = r.u16(4)? as usize; 88 + // skip searchRange(2), entrySelector(2), rangeShift(2) = 6 bytes 89 + let mut tables = Vec::with_capacity(num_tables); 90 + for i in 0..num_tables { 91 + let base = 12 + i * 16; 92 + let tag = r.tag(base)?; 93 + let checksum = r.u32(base + 4)?; 94 + let offset = r.u32(base + 8)?; 95 + let length = r.u32(base + 12)?; 96 + tables.push(TableRecord { 97 + tag, 98 + checksum, 99 + offset, 100 + length, 101 + }); 102 + } 103 + 104 + Ok(Font { 105 + data, 106 + sf_version, 107 + tables, 108 + }) 109 + } 110 + 111 + /// Load a font from a file path. 112 + pub fn from_file(path: &std::path::Path) -> Result<Font, FontError> { 113 + let data = std::fs::read(path).map_err(|_| FontError::UnexpectedEof)?; 114 + Font::parse(data) 115 + } 116 + 117 + /// Find a table record by its 4-byte tag. 118 + pub fn table_record(&self, tag: &[u8; 4]) -> Option<&TableRecord> { 119 + self.tables.iter().find(|t| &t.tag == tag) 120 + } 121 + 122 + /// Get the raw bytes for a table. 123 + pub fn table_data(&self, tag: &[u8; 4]) -> Option<&[u8]> { 124 + let rec = self.table_record(tag)?; 125 + let start = rec.offset as usize; 126 + let end = start + rec.length as usize; 127 + if end <= self.data.len() { 128 + Some(&self.data[start..end]) 129 + } else { 130 + None 131 + } 132 + } 133 + 134 + /// Parse the `head` table. 135 + pub fn head(&self) -> Result<HeadTable, FontError> { 136 + let data = self 137 + .table_data(b"head") 138 + .ok_or(FontError::MissingTable("head"))?; 139 + HeadTable::parse(data) 140 + } 141 + 142 + /// Parse the `maxp` table. 143 + pub fn maxp(&self) -> Result<MaxpTable, FontError> { 144 + let data = self 145 + .table_data(b"maxp") 146 + .ok_or(FontError::MissingTable("maxp"))?; 147 + MaxpTable::parse(data) 148 + } 149 + 150 + /// Parse the `hhea` table. 151 + pub fn hhea(&self) -> Result<HheaTable, FontError> { 152 + let data = self 153 + .table_data(b"hhea") 154 + .ok_or(FontError::MissingTable("hhea"))?; 155 + HheaTable::parse(data) 156 + } 157 + 158 + /// Parse the `hmtx` table. 159 + /// 160 + /// Requires `maxp` and `hhea` to determine dimensions. 161 + pub fn hmtx(&self) -> Result<HmtxTable, FontError> { 162 + let maxp = self.maxp()?; 163 + let hhea = self.hhea()?; 164 + let data = self 165 + .table_data(b"hmtx") 166 + .ok_or(FontError::MissingTable("hmtx"))?; 167 + HmtxTable::parse(data, hhea.num_long_hor_metrics, maxp.num_glyphs) 168 + } 169 + 170 + /// Parse the `cmap` table. 171 + pub fn cmap(&self) -> Result<CmapTable, FontError> { 172 + let data = self 173 + .table_data(b"cmap") 174 + .ok_or(FontError::MissingTable("cmap"))?; 175 + CmapTable::parse(data) 176 + } 177 + 178 + /// Parse the `name` table. 179 + pub fn name(&self) -> Result<NameTable, FontError> { 180 + let data = self 181 + .table_data(b"name") 182 + .ok_or(FontError::MissingTable("name"))?; 183 + NameTable::parse(data) 184 + } 185 + 186 + /// Parse the `loca` table. 187 + /// 188 + /// Requires `head` (for index format) and `maxp` (for glyph count). 189 + pub fn loca(&self) -> Result<LocaTable, FontError> { 190 + let head = self.head()?; 191 + let maxp = self.maxp()?; 192 + let data = self 193 + .table_data(b"loca") 194 + .ok_or(FontError::MissingTable("loca"))?; 195 + LocaTable::parse(data, head.index_to_loc_format, maxp.num_glyphs) 196 + } 197 + 198 + /// Map a Unicode code point to a glyph index using the cmap table. 199 + pub fn glyph_index(&self, codepoint: u32) -> Result<Option<u16>, FontError> { 200 + let cmap = self.cmap()?; 201 + Ok(cmap.glyph_index(codepoint)) 202 + } 203 + 204 + /// Returns true if this is a TrueType font (vs CFF/PostScript outlines). 205 + pub fn is_truetype(&self) -> bool { 206 + self.sf_version == 0x00010000 || self.sf_version == 0x74727565 207 + } 208 + } 209 + 210 + /// Load the first available system font from standard macOS paths. 211 + /// 212 + /// Tries these fonts in order: Geneva.ttf, Helvetica.ttc, Monaco.ttf. 213 + /// For `.ttc` (TrueType Collection) files, only the first font is parsed. 214 + pub fn load_system_font() -> Result<Font, FontError> { 215 + let candidates = [ 216 + "/System/Library/Fonts/Geneva.ttf", 217 + "/System/Library/Fonts/Monaco.ttf", 218 + ]; 219 + for path in &candidates { 220 + let p = std::path::Path::new(path); 221 + if p.exists() { 222 + return Font::from_file(p); 223 + } 224 + } 225 + Err(FontError::MissingTable("no system font found")) 226 + } 227 + 228 + #[cfg(test)] 229 + mod tests { 230 + use super::*; 231 + 232 + fn test_font() -> Font { 233 + // Try several common macOS fonts. 234 + let paths = [ 235 + "/System/Library/Fonts/Geneva.ttf", 236 + "/System/Library/Fonts/Monaco.ttf", 237 + "/System/Library/Fonts/Keyboard.ttf", 238 + ]; 239 + for path in &paths { 240 + let p = std::path::Path::new(path); 241 + if p.exists() { 242 + return Font::from_file(p).expect("failed to parse font"); 243 + } 244 + } 245 + panic!("no test font found — need a .ttf file in /System/Library/Fonts/"); 246 + } 247 + 248 + #[test] 249 + fn parse_table_directory() { 250 + let font = test_font(); 251 + assert!(font.is_truetype()); 252 + assert!(!font.tables.is_empty()); 253 + // Every font must have these tables. 254 + assert!(font.table_record(b"head").is_some(), "missing head table"); 255 + assert!(font.table_record(b"cmap").is_some(), "missing cmap table"); 256 + assert!(font.table_record(b"maxp").is_some(), "missing maxp table"); 257 + } 258 + 259 + #[test] 260 + fn parse_head_table() { 261 + let font = test_font(); 262 + let head = font.head().expect("failed to parse head"); 263 + assert!( 264 + head.units_per_em > 0, 265 + "units_per_em should be positive: {}", 266 + head.units_per_em 267 + ); 268 + assert!( 269 + head.units_per_em >= 16 && head.units_per_em <= 16384, 270 + "units_per_em out of range: {}", 271 + head.units_per_em 272 + ); 273 + } 274 + 275 + #[test] 276 + fn parse_maxp_table() { 277 + let font = test_font(); 278 + let maxp = font.maxp().expect("failed to parse maxp"); 279 + assert!(maxp.num_glyphs > 0, "font should have at least one glyph"); 280 + } 281 + 282 + #[test] 283 + fn parse_hhea_table() { 284 + let font = test_font(); 285 + let hhea = font.hhea().expect("failed to parse hhea"); 286 + assert!(hhea.ascent > 0, "ascent should be positive"); 287 + assert!(hhea.num_long_hor_metrics > 0, "should have metrics"); 288 + } 289 + 290 + #[test] 291 + fn parse_hmtx_table() { 292 + let font = test_font(); 293 + let hmtx = font.hmtx().expect("failed to parse hmtx"); 294 + let maxp = font.maxp().unwrap(); 295 + assert_eq!( 296 + hmtx.advances.len(), 297 + maxp.num_glyphs as usize, 298 + "should have one advance per glyph" 299 + ); 300 + assert_eq!( 301 + hmtx.lsbs.len(), 302 + maxp.num_glyphs as usize, 303 + "should have one lsb per glyph" 304 + ); 305 + // Glyph 0 (.notdef) typically has a nonzero advance. 306 + assert!(hmtx.advances[0] > 0, "glyph 0 advance should be nonzero"); 307 + } 308 + 309 + #[test] 310 + fn parse_cmap_table() { 311 + let font = test_font(); 312 + let cmap = font.cmap().expect("failed to parse cmap"); 313 + 314 + // Look up ASCII 'A' (U+0041) — every Latin font should have it. 315 + let glyph_a = cmap.glyph_index(0x0041); 316 + assert!( 317 + glyph_a.is_some() && glyph_a.unwrap() > 0, 318 + "should find a glyph for 'A'" 319 + ); 320 + 321 + // Look up space (U+0020). 322 + let glyph_space = cmap.glyph_index(0x0020); 323 + assert!(glyph_space.is_some(), "should find a glyph for space"); 324 + } 325 + 326 + #[test] 327 + fn parse_name_table() { 328 + let font = test_font(); 329 + let name = font.name().expect("failed to parse name"); 330 + let family = name.family_name(); 331 + assert!(family.is_some(), "should have a family name"); 332 + let family = family.unwrap(); 333 + assert!(!family.is_empty(), "family name should not be empty"); 334 + } 335 + 336 + #[test] 337 + fn parse_loca_table() { 338 + let font = test_font(); 339 + let loca = font.loca().expect("failed to parse loca"); 340 + let maxp = font.maxp().unwrap(); 341 + // loca has num_glyphs + 1 entries. 342 + assert_eq!( 343 + loca.offsets.len(), 344 + maxp.num_glyphs as usize + 1, 345 + "loca should have num_glyphs + 1 entries" 346 + ); 347 + } 348 + 349 + #[test] 350 + fn glyph_index_lookup() { 351 + let font = test_font(); 352 + // 'A' should map to a nonzero glyph. 353 + let gid = font.glyph_index(0x0041).expect("glyph_index failed"); 354 + assert!(gid.is_some() && gid.unwrap() > 0); 355 + 356 + // A private-use code point likely has no glyph. 357 + let gid_pua = font.glyph_index(0xFFFD).expect("glyph_index failed"); 358 + // FFFD (replacement char) might or might not exist — just check no crash. 359 + let _ = gid_pua; 360 + } 361 + 362 + #[test] 363 + fn load_system_font_works() { 364 + // This test may fail in CI where no fonts are installed, 365 + // but should pass on macOS. 366 + if std::path::Path::new("/System/Library/Fonts/Geneva.ttf").exists() 367 + || std::path::Path::new("/System/Library/Fonts/Monaco.ttf").exists() 368 + { 369 + let font = load_system_font().expect("should load a system font"); 370 + assert!(!font.tables.is_empty()); 371 + } 372 + } 373 + }
+79
crates/text/src/font/parse.rs
··· 1 + //! Binary parsing utilities for reading big-endian font data. 2 + 3 + use super::FontError; 4 + 5 + /// A zero-copy reader over a byte slice, for big-endian binary parsing. 6 + pub struct Reader<'a> { 7 + data: &'a [u8], 8 + } 9 + 10 + impl<'a> Reader<'a> { 11 + pub fn new(data: &'a [u8]) -> Self { 12 + Reader { data } 13 + } 14 + 15 + pub fn len(&self) -> usize { 16 + self.data.len() 17 + } 18 + 19 + fn check(&self, offset: usize, size: usize) -> Result<(), FontError> { 20 + if offset + size > self.data.len() { 21 + Err(FontError::UnexpectedEof) 22 + } else { 23 + Ok(()) 24 + } 25 + } 26 + 27 + pub fn u16(&self, offset: usize) -> Result<u16, FontError> { 28 + self.check(offset, 2)?; 29 + Ok(u16::from_be_bytes([ 30 + self.data[offset], 31 + self.data[offset + 1], 32 + ])) 33 + } 34 + 35 + pub fn i16(&self, offset: usize) -> Result<i16, FontError> { 36 + self.check(offset, 2)?; 37 + Ok(i16::from_be_bytes([ 38 + self.data[offset], 39 + self.data[offset + 1], 40 + ])) 41 + } 42 + 43 + pub fn u32(&self, offset: usize) -> Result<u32, FontError> { 44 + self.check(offset, 4)?; 45 + Ok(u32::from_be_bytes([ 46 + self.data[offset], 47 + self.data[offset + 1], 48 + self.data[offset + 2], 49 + self.data[offset + 3], 50 + ])) 51 + } 52 + 53 + pub fn i32(&self, offset: usize) -> Result<i32, FontError> { 54 + self.check(offset, 4)?; 55 + Ok(i32::from_be_bytes([ 56 + self.data[offset], 57 + self.data[offset + 1], 58 + self.data[offset + 2], 59 + self.data[offset + 3], 60 + ])) 61 + } 62 + 63 + /// Read a 4-byte tag (e.g., table tags like b"head"). 64 + pub fn tag(&self, offset: usize) -> Result<[u8; 4], FontError> { 65 + self.check(offset, 4)?; 66 + Ok([ 67 + self.data[offset], 68 + self.data[offset + 1], 69 + self.data[offset + 2], 70 + self.data[offset + 3], 71 + ]) 72 + } 73 + 74 + /// Get a sub-slice of the data. 75 + pub fn slice(&self, offset: usize, len: usize) -> Result<&'a [u8], FontError> { 76 + self.check(offset, len)?; 77 + Ok(&self.data[offset..offset + len]) 78 + } 79 + }
+285
crates/text/src/font/tables/cmap.rs
··· 1 + //! `cmap` — Character to Glyph Index Mapping table. 2 + //! 3 + //! Maps Unicode code points to glyph indices. Supports format 4 (BMP) and 4 + //! format 12 (full Unicode). 5 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/cmap> 6 + 7 + use crate::font::parse::Reader; 8 + use crate::font::FontError; 9 + 10 + /// Parsed `cmap` table. 11 + #[derive(Debug)] 12 + pub struct CmapTable { 13 + /// The best subtable we found (preferring format 12 over format 4). 14 + subtable: CmapSubtable, 15 + } 16 + 17 + #[derive(Debug)] 18 + enum CmapSubtable { 19 + Format4(Format4), 20 + Format12(Format12), 21 + } 22 + 23 + /// cmap format 4: Segment mapping to delta values (BMP only). 24 + #[derive(Debug)] 25 + struct Format4 { 26 + /// Parallel arrays defining segments. 27 + end_codes: Vec<u16>, 28 + start_codes: Vec<u16>, 29 + id_deltas: Vec<i16>, 30 + id_range_offsets: Vec<u16>, 31 + /// The raw glyph index array following the segments. 32 + glyph_indices: Vec<u16>, 33 + } 34 + 35 + /// cmap format 12: Segmented coverage for the full Unicode range. 36 + #[derive(Debug)] 37 + struct Format12 { 38 + groups: Vec<SequentialMapGroup>, 39 + } 40 + 41 + #[derive(Debug)] 42 + struct SequentialMapGroup { 43 + start_char: u32, 44 + end_char: u32, 45 + start_glyph: u32, 46 + } 47 + 48 + impl CmapTable { 49 + /// Parse the `cmap` table from raw bytes. 50 + /// 51 + /// Selects the best available subtable: 52 + /// 1. Platform 3 (Windows), Encoding 10 (Unicode full) — format 12 53 + /// 2. Platform 0 (Unicode), Encoding 4 (Unicode full) — format 12 54 + /// 3. Platform 3 (Windows), Encoding 1 (Unicode BMP) — format 4 55 + /// 4. Platform 0 (Unicode), Encoding 3 (Unicode BMP) — format 4 56 + /// 5. First platform 0 or 3 subtable that parses successfully 57 + pub fn parse(data: &[u8]) -> Result<CmapTable, FontError> { 58 + let r = Reader::new(data); 59 + if r.len() < 4 { 60 + return Err(FontError::MalformedTable("cmap")); 61 + } 62 + 63 + let num_tables = r.u16(2)? as usize; 64 + 65 + // Collect encoding records. 66 + struct EncodingRecord { 67 + platform_id: u16, 68 + encoding_id: u16, 69 + offset: u32, 70 + } 71 + 72 + let mut records = Vec::with_capacity(num_tables); 73 + for i in 0..num_tables { 74 + let base = 4 + i * 8; 75 + records.push(EncodingRecord { 76 + platform_id: r.u16(base)?, 77 + encoding_id: r.u16(base + 2)?, 78 + offset: r.u32(base + 4)?, 79 + }); 80 + } 81 + 82 + // Try subtables in preference order. 83 + // Priority: (3,10) > (0,4) > (0,6) > (3,1) > (0,3) > (0,*) > (3,*) 84 + let priority = |pid: u16, eid: u16| -> u8 { 85 + match (pid, eid) { 86 + (3, 10) => 0, 87 + (0, 4) => 1, 88 + (0, 6) => 2, 89 + (3, 1) => 3, 90 + (0, 3) => 4, 91 + (0, _) => 5, 92 + (3, _) => 6, 93 + _ => 255, 94 + } 95 + }; 96 + 97 + let mut best: Option<(u8, CmapSubtable)> = None; 98 + 99 + for rec in &records { 100 + let p = priority(rec.platform_id, rec.encoding_id); 101 + if p == 255 { 102 + continue; 103 + } 104 + if let Some((bp, _)) = &best { 105 + if p >= *bp { 106 + continue; 107 + } 108 + } 109 + 110 + let offset = rec.offset as usize; 111 + if offset + 2 > data.len() { 112 + continue; 113 + } 114 + let format = r.u16(offset)?; 115 + 116 + match format { 117 + 4 => { 118 + if let Ok(st) = parse_format4(data, offset) { 119 + best = Some((p, CmapSubtable::Format4(st))); 120 + } 121 + } 122 + 12 => { 123 + if let Ok(st) = parse_format12(data, offset) { 124 + best = Some((p, CmapSubtable::Format12(st))); 125 + } 126 + } 127 + _ => {} 128 + } 129 + } 130 + 131 + match best { 132 + Some((_, subtable)) => Ok(CmapTable { subtable }), 133 + None => Err(FontError::MalformedTable("cmap: no usable subtable")), 134 + } 135 + } 136 + 137 + /// Look up a Unicode code point and return the corresponding glyph index. 138 + /// 139 + /// Returns `None` if the code point is not mapped (maps to glyph 0). 140 + pub fn glyph_index(&self, codepoint: u32) -> Option<u16> { 141 + let gid = match &self.subtable { 142 + CmapSubtable::Format4(f4) => lookup_format4(f4, codepoint), 143 + CmapSubtable::Format12(f12) => lookup_format12(f12, codepoint), 144 + }; 145 + if gid == 0 { 146 + None 147 + } else { 148 + Some(gid) 149 + } 150 + } 151 + } 152 + 153 + fn parse_format4(data: &[u8], offset: usize) -> Result<Format4, FontError> { 154 + let r = Reader::new(data); 155 + // format(2) + length(2) + language(2) + segCountX2(2) 156 + if offset + 14 > data.len() { 157 + return Err(FontError::MalformedTable("cmap format 4")); 158 + } 159 + 160 + let seg_count_x2 = r.u16(offset + 6)? as usize; 161 + let seg_count = seg_count_x2 / 2; 162 + // skip searchRange(2) + entrySelector(2) + rangeShift(2) 163 + let end_codes_offset = offset + 14; 164 + // After endCodes there is a reservedPad(2), then startCodes. 165 + let start_codes_offset = end_codes_offset + seg_count_x2 + 2; 166 + let id_delta_offset = start_codes_offset + seg_count_x2; 167 + let id_range_offset = id_delta_offset + seg_count_x2; 168 + 169 + let mut end_codes = Vec::with_capacity(seg_count); 170 + let mut start_codes = Vec::with_capacity(seg_count); 171 + let mut id_deltas = Vec::with_capacity(seg_count); 172 + let mut id_range_offsets = Vec::with_capacity(seg_count); 173 + 174 + for i in 0..seg_count { 175 + end_codes.push(r.u16(end_codes_offset + i * 2)?); 176 + start_codes.push(r.u16(start_codes_offset + i * 2)?); 177 + id_deltas.push(r.i16(id_delta_offset + i * 2)?); 178 + id_range_offsets.push(r.u16(id_range_offset + i * 2)?); 179 + } 180 + 181 + // Everything after idRangeOffset is the glyphIdArray. 182 + let glyph_array_offset = id_range_offset + seg_count_x2; 183 + let remaining_bytes = data.len().saturating_sub(glyph_array_offset); 184 + let num_glyph_indices = remaining_bytes / 2; 185 + let mut glyph_indices = Vec::with_capacity(num_glyph_indices); 186 + for i in 0..num_glyph_indices { 187 + glyph_indices.push(r.u16(glyph_array_offset + i * 2)?); 188 + } 189 + 190 + Ok(Format4 { 191 + end_codes, 192 + start_codes, 193 + id_deltas, 194 + id_range_offsets, 195 + glyph_indices, 196 + }) 197 + } 198 + 199 + fn lookup_format4(f4: &Format4, codepoint: u32) -> u16 { 200 + if codepoint > 0xFFFF { 201 + return 0; 202 + } 203 + let cp = codepoint as u16; 204 + 205 + for i in 0..f4.end_codes.len() { 206 + if cp > f4.end_codes[i] { 207 + continue; 208 + } 209 + if cp < f4.start_codes[i] { 210 + return 0; 211 + } 212 + 213 + if f4.id_range_offsets[i] == 0 { 214 + // Use delta. 215 + return (cp as i32 + f4.id_deltas[i] as i32) as u16; 216 + } 217 + 218 + // Use range offset into glyphIdArray. 219 + // The offset is relative to the position of idRangeOffset[i] in the data. 220 + // index = idRangeOffset[i]/2 + (cp - startCode[i]) - segCount + i 221 + let range_offset = f4.id_range_offsets[i] as usize; 222 + let seg_count = f4.end_codes.len(); 223 + let idx = range_offset / 2 + (cp - f4.start_codes[i]) as usize; 224 + // idx is relative to position of idRangeOffset[i], which is at 225 + // range_offset_base + i*2 in the original data. We need to convert 226 + // to an index into our glyph_indices array. 227 + // The glyph_indices array starts at range_offset_base + seg_count*2. 228 + // So the array index = idx - seg_count + i 229 + let array_idx = idx.wrapping_sub(seg_count).wrapping_add(i); 230 + if array_idx < f4.glyph_indices.len() { 231 + let gid = f4.glyph_indices[array_idx]; 232 + if gid == 0 { 233 + return 0; 234 + } 235 + return (gid as i32 + f4.id_deltas[i] as i32) as u16; 236 + } 237 + 238 + return 0; 239 + } 240 + 241 + 0 242 + } 243 + 244 + fn parse_format12(data: &[u8], offset: usize) -> Result<Format12, FontError> { 245 + let r = Reader::new(data); 246 + // format(2) + reserved(2) + length(4) + language(4) + numGroups(4) 247 + if offset + 16 > data.len() { 248 + return Err(FontError::MalformedTable("cmap format 12")); 249 + } 250 + 251 + let num_groups = r.u32(offset + 12)? as usize; 252 + let groups_offset = offset + 16; 253 + 254 + let mut groups = Vec::with_capacity(num_groups); 255 + for i in 0..num_groups { 256 + let base = groups_offset + i * 12; 257 + groups.push(SequentialMapGroup { 258 + start_char: r.u32(base)?, 259 + end_char: r.u32(base + 4)?, 260 + start_glyph: r.u32(base + 8)?, 261 + }); 262 + } 263 + 264 + Ok(Format12 { groups }) 265 + } 266 + 267 + fn lookup_format12(f12: &Format12, codepoint: u32) -> u16 { 268 + // Binary search for the group containing codepoint. 269 + let mut lo = 0usize; 270 + let mut hi = f12.groups.len(); 271 + while lo < hi { 272 + let mid = lo + (hi - lo) / 2; 273 + let group = &f12.groups[mid]; 274 + if codepoint < group.start_char { 275 + hi = mid; 276 + } else if codepoint > group.end_char { 277 + lo = mid + 1; 278 + } else { 279 + // Found it. 280 + let gid = group.start_glyph + (codepoint - group.start_char); 281 + return gid as u16; 282 + } 283 + } 284 + 0 285 + }
+74
crates/text/src/font/tables/head.rs
··· 1 + //! `head` — Font Header table. 2 + //! 3 + //! Contains global font metrics and flags. 4 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/head> 5 + 6 + use crate::font::parse::Reader; 7 + use crate::font::FontError; 8 + 9 + /// Parsed `head` table. 10 + #[derive(Debug)] 11 + pub struct HeadTable { 12 + /// Major version (should be 1). 13 + pub major_version: u16, 14 + /// Minor version (should be 0). 15 + pub minor_version: u16, 16 + /// Font revision (fixed-point 16.16). 17 + pub font_revision: i32, 18 + /// Units per em (typically 1000 or 2048). 19 + pub units_per_em: u16, 20 + /// Bounding box: minimum x. 21 + pub x_min: i16, 22 + /// Bounding box: minimum y. 23 + pub y_min: i16, 24 + /// Bounding box: maximum x. 25 + pub x_max: i16, 26 + /// Bounding box: maximum y. 27 + pub y_max: i16, 28 + /// Mac style flags (bit 0 = bold, bit 1 = italic). 29 + pub mac_style: u16, 30 + /// Smallest readable size in pixels. 31 + pub lowest_rec_ppem: u16, 32 + /// 0 = short offsets in loca, 1 = long offsets. 33 + pub index_to_loc_format: i16, 34 + } 35 + 36 + impl HeadTable { 37 + /// Parse the `head` table from raw bytes. 38 + pub fn parse(data: &[u8]) -> Result<HeadTable, FontError> { 39 + let r = Reader::new(data); 40 + // Minimum head table size is 54 bytes. 41 + if r.len() < 54 { 42 + return Err(FontError::MalformedTable("head")); 43 + } 44 + 45 + let major_version = r.u16(0)?; 46 + let minor_version = r.u16(2)?; 47 + let font_revision = r.i32(4)?; 48 + // skip checksumAdjustment(4) + magicNumber(4) + flags(2) 49 + let units_per_em = r.u16(18)?; 50 + // skip created(8) + modified(8) 51 + let x_min = r.i16(36)?; 52 + let y_min = r.i16(38)?; 53 + let x_max = r.i16(40)?; 54 + let y_max = r.i16(42)?; 55 + let mac_style = r.u16(44)?; 56 + let lowest_rec_ppem = r.u16(46)?; 57 + // skip fontDirectionHint(2) 58 + let index_to_loc_format = r.i16(50)?; 59 + 60 + Ok(HeadTable { 61 + major_version, 62 + minor_version, 63 + font_revision, 64 + units_per_em, 65 + x_min, 66 + y_min, 67 + x_max, 68 + y_max, 69 + mac_style, 70 + lowest_rec_ppem, 71 + index_to_loc_format, 72 + }) 73 + } 74 + }
+60
crates/text/src/font/tables/hhea.rs
··· 1 + //! `hhea` — Horizontal Header table. 2 + //! 3 + //! Contains global horizontal layout metrics. 4 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/hhea> 5 + 6 + use crate::font::parse::Reader; 7 + use crate::font::FontError; 8 + 9 + /// Parsed `hhea` table. 10 + #[derive(Debug)] 11 + pub struct HheaTable { 12 + /// Typographic ascent (in font units). 13 + pub ascent: i16, 14 + /// Typographic descent (typically negative, in font units). 15 + pub descent: i16, 16 + /// Typographic line gap (in font units). 17 + pub line_gap: i16, 18 + /// Maximum advance width across all glyphs. 19 + pub advance_width_max: u16, 20 + /// Minimum left side bearing across all glyphs. 21 + pub min_left_side_bearing: i16, 22 + /// Minimum right side bearing across all glyphs. 23 + pub min_right_side_bearing: i16, 24 + /// Maximum x extent (max(lsb + (xMax - xMin))). 25 + pub x_max_extent: i16, 26 + /// Number of entries in the hmtx table's longHorMetric array. 27 + pub num_long_hor_metrics: u16, 28 + } 29 + 30 + impl HheaTable { 31 + /// Parse the `hhea` table from raw bytes. 32 + pub fn parse(data: &[u8]) -> Result<HheaTable, FontError> { 33 + let r = Reader::new(data); 34 + if r.len() < 36 { 35 + return Err(FontError::MalformedTable("hhea")); 36 + } 37 + 38 + // skip version(4) 39 + let ascent = r.i16(4)?; 40 + let descent = r.i16(6)?; 41 + let line_gap = r.i16(8)?; 42 + let advance_width_max = r.u16(10)?; 43 + let min_left_side_bearing = r.i16(12)?; 44 + let min_right_side_bearing = r.i16(14)?; 45 + let x_max_extent = r.i16(16)?; 46 + // skip caretSlopeRise(2), caretSlopeRun(2), caretOffset(2), reserved(8), metricDataFormat(2) 47 + let num_long_hor_metrics = r.u16(34)?; 48 + 49 + Ok(HheaTable { 50 + ascent, 51 + descent, 52 + line_gap, 53 + advance_width_max, 54 + min_left_side_bearing, 55 + min_right_side_bearing, 56 + x_max_extent, 57 + num_long_hor_metrics, 58 + }) 59 + } 60 + }
+55
crates/text/src/font/tables/hmtx.rs
··· 1 + //! `hmtx` — Horizontal Metrics table. 2 + //! 3 + //! Contains per-glyph horizontal metrics (advance width + left side bearing). 4 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/hmtx> 5 + 6 + use crate::font::parse::Reader; 7 + use crate::font::FontError; 8 + 9 + /// Parsed `hmtx` table. 10 + /// 11 + /// Both `advances` and `lsbs` are indexed by glyph ID and have exactly 12 + /// `num_glyphs` entries. 13 + #[derive(Debug)] 14 + pub struct HmtxTable { 15 + /// Advance widths for each glyph (in font units). 16 + pub advances: Vec<u16>, 17 + /// Left side bearings for each glyph (in font units). 18 + pub lsbs: Vec<i16>, 19 + } 20 + 21 + impl HmtxTable { 22 + /// Parse the `hmtx` table from raw bytes. 23 + /// 24 + /// `num_long_hor_metrics` comes from `hhea`, `num_glyphs` from `maxp`. 25 + pub fn parse( 26 + data: &[u8], 27 + num_long_hor_metrics: u16, 28 + num_glyphs: u16, 29 + ) -> Result<HmtxTable, FontError> { 30 + let r = Reader::new(data); 31 + let n_long = num_long_hor_metrics as usize; 32 + let n_glyphs = num_glyphs as usize; 33 + 34 + let mut advances = Vec::with_capacity(n_glyphs); 35 + let mut lsbs = Vec::with_capacity(n_glyphs); 36 + 37 + // First n_long entries are (advance_width: u16, lsb: i16) pairs. 38 + for i in 0..n_long { 39 + let offset = i * 4; 40 + advances.push(r.u16(offset)?); 41 + lsbs.push(r.i16(offset + 2)?); 42 + } 43 + 44 + // Remaining glyphs share the last advance width, but have individual lsbs. 45 + let last_advance = advances.last().copied().unwrap_or(0); 46 + let remaining = n_glyphs.saturating_sub(n_long); 47 + let lsb_offset = n_long * 4; 48 + for i in 0..remaining { 49 + advances.push(last_advance); 50 + lsbs.push(r.i16(lsb_offset + i * 2)?); 51 + } 52 + 53 + Ok(HmtxTable { advances, lsbs }) 54 + } 55 + }
+79
crates/text/src/font/tables/loca.rs
··· 1 + //! `loca` — Index to Location table. 2 + //! 3 + //! Maps glyph IDs to byte offsets within the `glyf` table. 4 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/loca> 5 + 6 + use crate::font::parse::Reader; 7 + use crate::font::FontError; 8 + 9 + /// Parsed `loca` table. 10 + /// 11 + /// Contains `num_glyphs + 1` offsets. The glyph data for glyph `i` starts at 12 + /// `offsets[i]` and ends at `offsets[i + 1]`. If they are equal, the glyph 13 + /// has no outline (e.g., a space character). 14 + #[derive(Debug)] 15 + pub struct LocaTable { 16 + /// Byte offsets into the `glyf` table, one per glyph plus a sentinel. 17 + pub offsets: Vec<u32>, 18 + } 19 + 20 + impl LocaTable { 21 + /// Parse the `loca` table from raw bytes. 22 + /// 23 + /// `index_to_loc_format` comes from the `head` table (0 = short, 1 = long). 24 + /// `num_glyphs` comes from the `maxp` table. 25 + pub fn parse( 26 + data: &[u8], 27 + index_to_loc_format: i16, 28 + num_glyphs: u16, 29 + ) -> Result<LocaTable, FontError> { 30 + let r = Reader::new(data); 31 + let count = num_glyphs as usize + 1; 32 + let mut offsets = Vec::with_capacity(count); 33 + 34 + match index_to_loc_format { 35 + 0 => { 36 + // Short format: offsets are u16 values divided by 2. 37 + for i in 0..count { 38 + let raw = r.u16(i * 2)? as u32; 39 + offsets.push(raw * 2); 40 + } 41 + } 42 + 1 => { 43 + // Long format: offsets are u32 values. 44 + for i in 0..count { 45 + offsets.push(r.u32(i * 4)?); 46 + } 47 + } 48 + _ => return Err(FontError::MalformedTable("loca: invalid index format")), 49 + } 50 + 51 + Ok(LocaTable { offsets }) 52 + } 53 + 54 + /// Returns true if the glyph has outline data (non-empty in glyf). 55 + pub fn has_outline(&self, glyph_id: u16) -> bool { 56 + let i = glyph_id as usize; 57 + if i + 1 < self.offsets.len() { 58 + self.offsets[i] != self.offsets[i + 1] 59 + } else { 60 + false 61 + } 62 + } 63 + 64 + /// Get the byte range for a glyph within the `glyf` table. 65 + pub fn glyph_range(&self, glyph_id: u16) -> Option<(u32, u32)> { 66 + let i = glyph_id as usize; 67 + if i + 1 < self.offsets.len() { 68 + let start = self.offsets[i]; 69 + let end = self.offsets[i + 1]; 70 + if start < end { 71 + Some((start, end)) 72 + } else { 73 + None 74 + } 75 + } else { 76 + None 77 + } 78 + } 79 + }
+35
crates/text/src/font/tables/maxp.rs
··· 1 + //! `maxp` — Maximum Profile table. 2 + //! 3 + //! Contains the number of glyphs in the font plus (for TrueType) various 4 + //! maximum values used for memory allocation. 5 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/maxp> 6 + 7 + use crate::font::parse::Reader; 8 + use crate::font::FontError; 9 + 10 + /// Parsed `maxp` table. 11 + #[derive(Debug)] 12 + pub struct MaxpTable { 13 + /// Version (0x00005000 for CFF, 0x00010000 for TrueType). 14 + pub version: u32, 15 + /// Total number of glyphs in the font. 16 + pub num_glyphs: u16, 17 + } 18 + 19 + impl MaxpTable { 20 + /// Parse the `maxp` table from raw bytes. 21 + pub fn parse(data: &[u8]) -> Result<MaxpTable, FontError> { 22 + let r = Reader::new(data); 23 + if r.len() < 6 { 24 + return Err(FontError::MalformedTable("maxp")); 25 + } 26 + 27 + let version = r.u32(0)?; 28 + let num_glyphs = r.u16(4)?; 29 + 30 + Ok(MaxpTable { 31 + version, 32 + num_glyphs, 33 + }) 34 + } 35 + }
+9
crates/text/src/font/tables/mod.rs
··· 1 + //! Individual font table parsers. 2 + 3 + pub mod cmap; 4 + pub mod head; 5 + pub mod hhea; 6 + pub mod hmtx; 7 + pub mod loca; 8 + pub mod maxp; 9 + pub mod name;
+221
crates/text/src/font/tables/name.rs
··· 1 + //! `name` — Naming table. 2 + //! 3 + //! Contains human-readable strings like family name, style name, copyright, etc. 4 + //! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/name> 5 + 6 + use crate::font::parse::Reader; 7 + use crate::font::FontError; 8 + 9 + /// Parsed `name` table. 10 + #[derive(Debug)] 11 + pub struct NameTable { 12 + /// All name records extracted from the table. 13 + pub records: Vec<NameRecord>, 14 + } 15 + 16 + /// A single name record. 17 + #[derive(Debug)] 18 + pub struct NameRecord { 19 + /// Platform ID (0 = Unicode, 1 = Macintosh, 3 = Windows). 20 + pub platform_id: u16, 21 + /// Encoding ID (platform-specific). 22 + pub encoding_id: u16, 23 + /// Language ID. 24 + pub language_id: u16, 25 + /// Name ID (1 = family, 2 = subfamily, 4 = full name, 6 = PostScript name, etc.). 26 + pub name_id: u16, 27 + /// The decoded string value. 28 + pub value: String, 29 + } 30 + 31 + impl NameTable { 32 + /// Parse the `name` table from raw bytes. 33 + pub fn parse(data: &[u8]) -> Result<NameTable, FontError> { 34 + let r = Reader::new(data); 35 + if r.len() < 6 { 36 + return Err(FontError::MalformedTable("name")); 37 + } 38 + 39 + // format(2) + count(2) + stringOffset(2) 40 + let count = r.u16(2)? as usize; 41 + let string_offset = r.u16(4)? as usize; 42 + 43 + let mut records = Vec::with_capacity(count); 44 + 45 + for i in 0..count { 46 + let base = 6 + i * 12; 47 + if base + 12 > data.len() { 48 + break; 49 + } 50 + 51 + let platform_id = r.u16(base)?; 52 + let encoding_id = r.u16(base + 2)?; 53 + let language_id = r.u16(base + 4)?; 54 + let name_id = r.u16(base + 6)?; 55 + let length = r.u16(base + 8)? as usize; 56 + let offset = r.u16(base + 10)? as usize; 57 + 58 + let str_start = string_offset + offset; 59 + if str_start + length > data.len() { 60 + continue; 61 + } 62 + 63 + let raw = r.slice(str_start, length)?; 64 + let value = decode_name_string(platform_id, encoding_id, raw); 65 + 66 + records.push(NameRecord { 67 + platform_id, 68 + encoding_id, 69 + language_id, 70 + name_id, 71 + value, 72 + }); 73 + } 74 + 75 + Ok(NameTable { records }) 76 + } 77 + 78 + /// Get the font family name (name ID 1). 79 + /// 80 + /// Prefers Windows/Unicode platform, falls back to any platform. 81 + pub fn family_name(&self) -> Option<&str> { 82 + self.get_name(1) 83 + } 84 + 85 + /// Get the font subfamily/style name (name ID 2, e.g. "Regular", "Bold"). 86 + pub fn subfamily_name(&self) -> Option<&str> { 87 + self.get_name(2) 88 + } 89 + 90 + /// Get the full font name (name ID 4). 91 + pub fn full_name(&self) -> Option<&str> { 92 + self.get_name(4) 93 + } 94 + 95 + /// Get a name string by name ID. 96 + /// 97 + /// Prefers Windows platform (3) with English, then any platform. 98 + fn get_name(&self, name_id: u16) -> Option<&str> { 99 + // Prefer Windows platform (3), English (language_id 0x0409). 100 + let win_en = self 101 + .records 102 + .iter() 103 + .find(|r| r.name_id == name_id && r.platform_id == 3 && r.language_id == 0x0409); 104 + if let Some(rec) = win_en { 105 + if !rec.value.is_empty() { 106 + return Some(&rec.value); 107 + } 108 + } 109 + 110 + // Fall back to any Windows platform record. 111 + let win = self 112 + .records 113 + .iter() 114 + .find(|r| r.name_id == name_id && r.platform_id == 3); 115 + if let Some(rec) = win { 116 + if !rec.value.is_empty() { 117 + return Some(&rec.value); 118 + } 119 + } 120 + 121 + // Fall back to any record. 122 + self.records 123 + .iter() 124 + .find(|r| r.name_id == name_id && !r.value.is_empty()) 125 + .map(|r| r.value.as_str()) 126 + } 127 + } 128 + 129 + /// Decode a name string based on platform/encoding. 130 + fn decode_name_string(platform_id: u16, encoding_id: u16, data: &[u8]) -> String { 131 + match platform_id { 132 + 0 => { 133 + // Unicode platform — always UTF-16BE. 134 + decode_utf16be(data) 135 + } 136 + 1 => { 137 + // Macintosh platform. 138 + if encoding_id == 0 { 139 + // Mac Roman. 140 + decode_mac_roman(data) 141 + } else { 142 + // Other Mac encodings — treat as ASCII fallback. 143 + String::from_utf8_lossy(data).into_owned() 144 + } 145 + } 146 + 3 => { 147 + // Windows platform — encoding 1 = UTF-16BE, encoding 10 = UTF-16BE. 148 + match encoding_id { 149 + 1 | 10 => decode_utf16be(data), 150 + 0 => { 151 + // Symbol encoding — treat as UTF-16BE. 152 + decode_utf16be(data) 153 + } 154 + _ => String::from_utf8_lossy(data).into_owned(), 155 + } 156 + } 157 + _ => String::from_utf8_lossy(data).into_owned(), 158 + } 159 + } 160 + 161 + fn decode_utf16be(data: &[u8]) -> String { 162 + let mut chars = Vec::with_capacity(data.len() / 2); 163 + let mut i = 0; 164 + while i + 1 < data.len() { 165 + let unit = u16::from_be_bytes([data[i], data[i + 1]]); 166 + i += 2; 167 + 168 + // Handle surrogate pairs. 169 + if (0xD800..=0xDBFF).contains(&unit) { 170 + if i + 1 < data.len() { 171 + let lo = u16::from_be_bytes([data[i], data[i + 1]]); 172 + if (0xDC00..=0xDFFF).contains(&lo) { 173 + i += 2; 174 + let cp = 0x10000 + ((unit as u32 - 0xD800) << 10) + (lo as u32 - 0xDC00); 175 + if let Some(ch) = char::from_u32(cp) { 176 + chars.push(ch); 177 + } 178 + continue; 179 + } 180 + } 181 + // Lone surrogate — skip. 182 + continue; 183 + } 184 + 185 + if let Some(ch) = char::from_u32(unit as u32) { 186 + chars.push(ch); 187 + } 188 + } 189 + chars.into_iter().collect() 190 + } 191 + 192 + fn decode_mac_roman(data: &[u8]) -> String { 193 + // Mac Roman: 0x00-0x7F are ASCII, 0x80-0xFF map to specific Unicode code points. 194 + static MAC_ROMAN_HIGH: [u16; 128] = [ 195 + 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4, 196 + 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 197 + 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020, 198 + 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4, 199 + 0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 200 + 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1, 201 + 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 202 + 0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA, 203 + 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A, 204 + 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 205 + 0x00D3, 0x00D4, 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF, 206 + 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7, 207 + ]; 208 + 209 + let mut s = String::with_capacity(data.len()); 210 + for &b in data { 211 + if b < 0x80 { 212 + s.push(b as char); 213 + } else { 214 + let cp = MAC_ROMAN_HIGH[(b - 0x80) as usize]; 215 + if let Some(ch) = char::from_u32(cp as u32) { 216 + s.push(ch); 217 + } 218 + } 219 + } 220 + s 221 + }
+2
crates/text/src/lib.rs
··· 1 1 //! Font parsing (OTF/TTF), shaping, rasterization, and line breaking — pure Rust. 2 + 3 + pub mod font;