web engine - experimental web browser

Implement font discovery: enumerate and load system fonts

- FontRegistry scans /System/Library/Fonts/ and /Library/Fonts/ on init
- TTC collection parsing (ttcf header, per-font offsets)
- Font selection by family name (case-insensitive)
- Style-aware selection (bold/italic via macStyle, OS/2 weight, subfamily name)
- Fallback mechanism: Helvetica -> Arial -> Geneva -> any available
- FontEntry metadata: path, offset, family, subfamily, bold, italic
- 10 new tests for registry discovery, TTC parsing, style selection, fallback

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

+483
+2
crates/text/src/font/mod.rs
··· 7 7 8 8 mod parse; 9 9 pub mod rasterizer; 10 + pub mod registry; 10 11 mod tables; 11 12 12 13 pub use rasterizer::GlyphBitmap; 14 + pub use registry::{FontEntry, FontRegistry}; 13 15 pub use tables::cmap::CmapTable; 14 16 pub use tables::glyf::{Contour, GlyphOutline, Point}; 15 17 pub use tables::head::HeadTable;
+481
crates/text/src/font/registry.rs
··· 1 + //! Font discovery and registry. 2 + //! 3 + //! Scans system font directories, builds an index by family name and style, 4 + //! and provides font selection with fallback. 5 + 6 + use super::{Font, FontError}; 7 + use crate::font::parse::Reader; 8 + use std::collections::HashMap; 9 + use std::path::{Path, PathBuf}; 10 + 11 + /// Metadata about a single font face discovered on the system. 12 + #[derive(Debug, Clone)] 13 + pub struct FontEntry { 14 + /// File path to the font. 15 + pub path: PathBuf, 16 + /// Byte offset within the file (0 for standalone .ttf/.otf, nonzero for .ttc fonts). 17 + pub offset: u32, 18 + /// Font family name (e.g., "Helvetica"). 19 + pub family: String, 20 + /// Subfamily name (e.g., "Regular", "Bold", "Italic", "Bold Italic"). 21 + pub subfamily: String, 22 + /// True if this face is bold (from macStyle bit 0 or weight class >= 700). 23 + pub bold: bool, 24 + /// True if this face is italic (from macStyle bit 1 or subfamily heuristic). 25 + pub italic: bool, 26 + } 27 + 28 + /// A registry of system fonts, indexed by family name. 29 + pub struct FontRegistry { 30 + /// Map from lowercase family name to a list of font entries. 31 + families: HashMap<String, Vec<FontEntry>>, 32 + } 33 + 34 + impl Default for FontRegistry { 35 + fn default() -> Self { 36 + Self::new() 37 + } 38 + } 39 + 40 + impl FontRegistry { 41 + /// Scan system font directories and build the registry. 42 + /// 43 + /// Scans `/System/Library/Fonts/` and `/Library/Fonts/` for .ttf, .otf, 44 + /// and .ttc files. Errors in individual files are silently skipped. 45 + pub fn new() -> FontRegistry { 46 + let mut families: HashMap<String, Vec<FontEntry>> = HashMap::new(); 47 + 48 + let dirs = [ 49 + Path::new("/System/Library/Fonts"), 50 + Path::new("/Library/Fonts"), 51 + ]; 52 + 53 + for dir in &dirs { 54 + let entries = match std::fs::read_dir(dir) { 55 + Ok(e) => e, 56 + Err(_) => continue, 57 + }; 58 + 59 + for entry in entries { 60 + let entry = match entry { 61 + Ok(e) => e, 62 + Err(_) => continue, 63 + }; 64 + 65 + let path = entry.path(); 66 + let ext = path 67 + .extension() 68 + .and_then(|e| e.to_str()) 69 + .unwrap_or("") 70 + .to_ascii_lowercase(); 71 + 72 + match ext.as_str() { 73 + "ttf" | "otf" => { 74 + if let Some(fe) = probe_single_font(&path, 0) { 75 + let key = fe.family.to_ascii_lowercase(); 76 + families.entry(key).or_default().push(fe); 77 + } 78 + } 79 + "ttc" => { 80 + if let Ok(offsets) = parse_ttc_offsets(&path) { 81 + for offset in offsets { 82 + if let Some(fe) = probe_single_font(&path, offset) { 83 + let key = fe.family.to_ascii_lowercase(); 84 + families.entry(key).or_default().push(fe); 85 + } 86 + } 87 + } 88 + } 89 + _ => {} 90 + } 91 + } 92 + } 93 + 94 + FontRegistry { families } 95 + } 96 + 97 + /// Find a font by family name. Returns the first match (prefers Regular). 98 + /// 99 + /// The family name match is case-insensitive. 100 + pub fn find_font(&self, family: &str) -> Option<Font> { 101 + let key = family.to_ascii_lowercase(); 102 + let entries = self.families.get(&key)?; 103 + 104 + // Prefer the Regular face. 105 + let entry = entries 106 + .iter() 107 + .find(|e| !e.bold && !e.italic) 108 + .or_else(|| entries.first())?; 109 + 110 + load_font_at_offset(&entry.path, entry.offset).ok() 111 + } 112 + 113 + /// Find a font by family name with bold/italic style preference. 114 + /// 115 + /// Falls back through: exact match -> any with same bold -> any in family. 116 + pub fn find_font_with_style(&self, family: &str, bold: bool, italic: bool) -> Option<Font> { 117 + let key = family.to_ascii_lowercase(); 118 + let entries = self.families.get(&key)?; 119 + 120 + // Exact style match. 121 + let entry = entries 122 + .iter() 123 + .find(|e| e.bold == bold && e.italic == italic) 124 + // Fallback: match bold, ignore italic. 125 + .or_else(|| entries.iter().find(|e| e.bold == bold)) 126 + // Fallback: any face in the family. 127 + .or_else(|| entries.first())?; 128 + 129 + load_font_at_offset(&entry.path, entry.offset).ok() 130 + } 131 + 132 + /// List all discovered font family names (sorted alphabetically). 133 + pub fn list_families(&self) -> Vec<String> { 134 + // Return the original-case family name from the first entry of each family. 135 + let mut names: Vec<String> = self 136 + .families 137 + .values() 138 + .filter_map(|entries| entries.first().map(|e| e.family.clone())) 139 + .collect(); 140 + names.sort(); 141 + names 142 + } 143 + 144 + /// Get all font entries for a given family name (case-insensitive). 145 + pub fn family_entries(&self, family: &str) -> Option<&[FontEntry]> { 146 + let key = family.to_ascii_lowercase(); 147 + self.families.get(&key).map(|v| v.as_slice()) 148 + } 149 + 150 + /// Find any available font, preferring common system defaults. 151 + /// 152 + /// Tries: Helvetica, Arial, Geneva, then any available font. 153 + pub fn find_fallback(&self) -> Option<Font> { 154 + for preferred in &["Helvetica", "Arial", "Geneva", "Lucida Grande"] { 155 + if let Some(font) = self.find_font(preferred) { 156 + return Some(font); 157 + } 158 + } 159 + 160 + // Last resort: pick the first font in the registry. 161 + for entries in self.families.values() { 162 + if let Some(entry) = entries.first() { 163 + if let Ok(font) = load_font_at_offset(&entry.path, entry.offset) { 164 + return Some(font); 165 + } 166 + } 167 + } 168 + 169 + None 170 + } 171 + 172 + /// Number of distinct font families in the registry. 173 + pub fn family_count(&self) -> usize { 174 + self.families.len() 175 + } 176 + } 177 + 178 + /// Parse a TTC (TrueType Collection) file header to get individual font offsets. 179 + fn parse_ttc_offsets(path: &Path) -> Result<Vec<u32>, FontError> { 180 + let data = std::fs::read(path).map_err(|_| FontError::UnexpectedEof)?; 181 + if data.len() < 12 { 182 + return Err(FontError::UnexpectedEof); 183 + } 184 + 185 + let r = Reader::new(&data); 186 + let tag = r.tag(0)?; 187 + 188 + // TTC header: tag must be "ttcf". 189 + if &tag != b"ttcf" { 190 + return Err(FontError::InvalidMagic(u32::from_be_bytes(tag))); 191 + } 192 + 193 + // version(4) + numFonts(4) 194 + let num_fonts = r.u32(8)? as usize; 195 + let mut offsets = Vec::with_capacity(num_fonts); 196 + 197 + for i in 0..num_fonts { 198 + let offset = r.u32(12 + i * 4)?; 199 + offsets.push(offset); 200 + } 201 + 202 + Ok(offsets) 203 + } 204 + 205 + /// Probe a single font at the given byte offset within a file. 206 + /// 207 + /// Reads just enough to extract family name, subfamily, and style flags. 208 + /// Returns `None` if the font can't be parsed. 209 + fn probe_single_font(path: &Path, offset: u32) -> Option<FontEntry> { 210 + let data = std::fs::read(path).ok()?; 211 + let font = parse_font_at_offset(data, offset).ok()?; 212 + 213 + let name = font.name().ok()?; 214 + let family = name.family_name()?.to_owned(); 215 + let subfamily = name.subfamily_name().unwrap_or("Regular").to_owned(); 216 + 217 + // Determine bold/italic from head.macStyle and name heuristics. 218 + let (bold, italic) = detect_style(&font, &subfamily); 219 + 220 + Some(FontEntry { 221 + path: path.to_owned(), 222 + offset, 223 + family, 224 + subfamily, 225 + bold, 226 + italic, 227 + }) 228 + } 229 + 230 + /// Detect bold/italic from head.macStyle flags, OS/2 weight class, and subfamily name. 231 + fn detect_style(font: &Font, subfamily: &str) -> (bool, bool) { 232 + let sub_lower = subfamily.to_ascii_lowercase(); 233 + 234 + // Start with head.macStyle bits. 235 + let (mut bold, mut italic) = if let Ok(head) = font.head() { 236 + (head.mac_style & 1 != 0, head.mac_style & 2 != 0) 237 + } else { 238 + (false, false) 239 + }; 240 + 241 + // Also consider OS/2 weight class. 242 + if let Ok(os2) = font.os2() { 243 + if os2.us_weight_class >= 700 { 244 + bold = true; 245 + } 246 + } 247 + 248 + // Subfamily name heuristics as fallback. 249 + if sub_lower.contains("bold") { 250 + bold = true; 251 + } 252 + if sub_lower.contains("italic") || sub_lower.contains("oblique") { 253 + italic = true; 254 + } 255 + 256 + (bold, italic) 257 + } 258 + 259 + /// Parse a font from raw data at a given byte offset. 260 + /// 261 + /// For standalone fonts, offset is 0. For TTC fonts, offset points to 262 + /// the individual font's offset table within the collection. 263 + fn parse_font_at_offset(data: Vec<u8>, offset: u32) -> Result<Font, FontError> { 264 + if offset == 0 { 265 + return Font::parse(data); 266 + } 267 + 268 + // For TTC: we need to parse the table directory starting at `offset`. 269 + let off = offset as usize; 270 + let r = Reader::new(&data); 271 + 272 + let sf_version = r.u32(off)?; 273 + match sf_version { 274 + 0x00010000 | 0x4F54544F | 0x74727565 => {} 275 + _ => return Err(FontError::InvalidMagic(sf_version)), 276 + } 277 + 278 + let num_tables = r.u16(off + 4)? as usize; 279 + let mut tables = Vec::with_capacity(num_tables); 280 + 281 + for i in 0..num_tables { 282 + let base = off + 12 + i * 16; 283 + let tag = r.tag(base)?; 284 + let checksum = r.u32(base + 4)?; 285 + let table_offset = r.u32(base + 8)?; 286 + let length = r.u32(base + 12)?; 287 + tables.push(super::TableRecord { 288 + tag, 289 + checksum, 290 + offset: table_offset, 291 + length, 292 + }); 293 + } 294 + 295 + Ok(Font { 296 + data, 297 + sf_version, 298 + tables, 299 + }) 300 + } 301 + 302 + /// Load a font from a file at the given byte offset. 303 + pub fn load_font_at_offset(path: &Path, offset: u32) -> Result<Font, FontError> { 304 + let data = std::fs::read(path).map_err(|_| FontError::UnexpectedEof)?; 305 + parse_font_at_offset(data, offset) 306 + } 307 + 308 + #[cfg(test)] 309 + mod tests { 310 + use super::*; 311 + 312 + fn has_system_fonts() -> bool { 313 + Path::new("/System/Library/Fonts").exists() 314 + } 315 + 316 + #[test] 317 + fn registry_discovers_fonts() { 318 + if !has_system_fonts() { 319 + return; 320 + } 321 + let reg = FontRegistry::new(); 322 + assert!( 323 + reg.family_count() > 0, 324 + "should discover at least one font family" 325 + ); 326 + } 327 + 328 + #[test] 329 + fn registry_list_families() { 330 + if !has_system_fonts() { 331 + return; 332 + } 333 + let reg = FontRegistry::new(); 334 + let families = reg.list_families(); 335 + assert!(!families.is_empty(), "should list font families"); 336 + 337 + // Families should be sorted. 338 + for i in 1..families.len() { 339 + assert!( 340 + families[i] >= families[i - 1], 341 + "families should be sorted: '{}' < '{}'", 342 + families[i], 343 + families[i - 1] 344 + ); 345 + } 346 + } 347 + 348 + #[test] 349 + fn registry_find_font_case_insensitive() { 350 + if !has_system_fonts() { 351 + return; 352 + } 353 + let reg = FontRegistry::new(); 354 + let families = reg.list_families(); 355 + 356 + // Find the first family and try a case-insensitive lookup. 357 + if let Some(family) = families.first() { 358 + let upper = family.to_ascii_uppercase(); 359 + let font = reg.find_font(&upper); 360 + assert!( 361 + font.is_some(), 362 + "should find '{}' via uppercase '{}'", 363 + family, 364 + upper 365 + ); 366 + } 367 + } 368 + 369 + #[test] 370 + fn registry_find_fallback() { 371 + if !has_system_fonts() { 372 + return; 373 + } 374 + let reg = FontRegistry::new(); 375 + let font = reg.find_fallback(); 376 + assert!(font.is_some(), "should find at least one fallback font"); 377 + } 378 + 379 + #[test] 380 + fn registry_ttc_parsing() { 381 + // Check that TTC files in /System/Library/Fonts/ are parsed. 382 + if !has_system_fonts() { 383 + return; 384 + } 385 + let reg = FontRegistry::new(); 386 + 387 + // Courier.ttc exists on all macOS versions. 388 + let courier_path = Path::new("/System/Library/Fonts/Courier.ttc"); 389 + if courier_path.exists() { 390 + let font = reg.find_font("Courier"); 391 + assert!(font.is_some(), "should find Courier from .ttc file"); 392 + } 393 + } 394 + 395 + #[test] 396 + fn registry_find_with_style() { 397 + if !has_system_fonts() { 398 + return; 399 + } 400 + let reg = FontRegistry::new(); 401 + 402 + // Try to find a font family that has multiple styles. 403 + let families = reg.list_families(); 404 + for family in &families { 405 + if let Some(entries) = reg.family_entries(family) { 406 + if entries.len() > 1 { 407 + // This family has multiple faces — test style selection. 408 + let _regular = reg.find_font_with_style(family, false, false); 409 + let _bold = reg.find_font_with_style(family, true, false); 410 + // Just verify no crash. 411 + return; 412 + } 413 + } 414 + } 415 + } 416 + 417 + #[test] 418 + fn parse_ttc_offsets_courier() { 419 + let path = Path::new("/System/Library/Fonts/Courier.ttc"); 420 + if !path.exists() { 421 + return; 422 + } 423 + let offsets = parse_ttc_offsets(path).expect("should parse TTC offsets"); 424 + assert!( 425 + !offsets.is_empty(), 426 + "Courier.ttc should contain at least one font" 427 + ); 428 + // First offset should be a valid position (after the TTC header). 429 + assert!( 430 + offsets[0] >= 12, 431 + "first font offset should be past TTC header" 432 + ); 433 + } 434 + 435 + #[test] 436 + fn font_entry_has_valid_metadata() { 437 + if !has_system_fonts() { 438 + return; 439 + } 440 + let reg = FontRegistry::new(); 441 + let families = reg.list_families(); 442 + if let Some(family) = families.first() { 443 + let entries = reg.family_entries(family).unwrap(); 444 + for entry in entries { 445 + assert!(!entry.family.is_empty(), "family name should not be empty"); 446 + assert!( 447 + !entry.subfamily.is_empty(), 448 + "subfamily name should not be empty" 449 + ); 450 + assert!(entry.path.exists(), "font file should exist"); 451 + } 452 + } 453 + } 454 + 455 + #[test] 456 + fn load_font_at_offset_ttc() { 457 + let path = Path::new("/System/Library/Fonts/Courier.ttc"); 458 + if !path.exists() { 459 + return; 460 + } 461 + let offsets = parse_ttc_offsets(path).expect("should parse TTC"); 462 + if let Some(&offset) = offsets.first() { 463 + let font = load_font_at_offset(path, offset).expect("should load font from TTC"); 464 + // Verify we can parse basic tables. 465 + let name = font.name().expect("should parse name table"); 466 + assert!(name.family_name().is_some(), "should have a family name"); 467 + } 468 + } 469 + 470 + #[test] 471 + fn registry_nonexistent_family_returns_none() { 472 + if !has_system_fonts() { 473 + return; 474 + } 475 + let reg = FontRegistry::new(); 476 + assert!( 477 + reg.find_font("NonexistentFontFamily12345").is_none(), 478 + "should return None for nonexistent family" 479 + ); 480 + } 481 + }