···11+//! OTF/TTF font file parser.
22+//!
33+//! Parses the OpenType/TrueType table directory and individual tables needed
44+//! for text rendering: head, maxp, hhea, hmtx, cmap, name, loca.
55+66+use std::fmt;
77+88+mod parse;
99+mod tables;
1010+1111+pub use tables::cmap::CmapTable;
1212+pub use tables::head::HeadTable;
1313+pub use tables::hhea::HheaTable;
1414+pub use tables::hmtx::HmtxTable;
1515+pub use tables::loca::LocaTable;
1616+pub use tables::maxp::MaxpTable;
1717+pub use tables::name::NameTable;
1818+1919+/// Errors that can occur during font parsing.
2020+#[derive(Debug)]
2121+pub enum FontError {
2222+ /// The data is too short to contain the expected structure.
2323+ UnexpectedEof,
2424+ /// The font file has an unrecognized magic number / sfVersion.
2525+ InvalidMagic(u32),
2626+ /// A required table is missing.
2727+ MissingTable(&'static str),
2828+ /// A table's data is malformed.
2929+ MalformedTable(&'static str),
3030+}
3131+3232+impl fmt::Display for FontError {
3333+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3434+ match self {
3535+ FontError::UnexpectedEof => write!(f, "unexpected end of font data"),
3636+ FontError::InvalidMagic(v) => write!(f, "invalid font magic: 0x{:08X}", v),
3737+ FontError::MissingTable(t) => write!(f, "missing required table: {}", t),
3838+ FontError::MalformedTable(t) => write!(f, "malformed table: {}", t),
3939+ }
4040+ }
4141+}
4242+4343+/// A record in the table directory describing one font table.
4444+#[derive(Debug, Clone)]
4545+pub struct TableRecord {
4646+ /// Four-byte tag (e.g. b"head", b"cmap").
4747+ pub tag: [u8; 4],
4848+ /// Checksum of the table.
4949+ pub checksum: u32,
5050+ /// Offset from the beginning of the font file.
5151+ pub offset: u32,
5252+ /// Length of the table in bytes.
5353+ pub length: u32,
5454+}
5555+5656+impl TableRecord {
5757+ /// Return the tag as a string (for display/debugging).
5858+ pub fn tag_str(&self) -> &str {
5959+ std::str::from_utf8(&self.tag).unwrap_or("????")
6060+ }
6161+}
6262+6363+/// A parsed OpenType/TrueType font.
6464+#[derive(Debug)]
6565+pub struct Font {
6666+ /// Raw font data (owned).
6767+ data: Vec<u8>,
6868+ /// Offset subtable version (0x00010000 for TrueType, 0x4F54544F for CFF).
6969+ pub sf_version: u32,
7070+ /// Table directory records.
7171+ pub tables: Vec<TableRecord>,
7272+}
7373+7474+impl Font {
7575+ /// Parse a font from raw file bytes.
7676+ pub fn parse(data: Vec<u8>) -> Result<Font, FontError> {
7777+ let r = parse::Reader::new(&data);
7878+7979+ let sf_version = r.u32(0)?;
8080+ match sf_version {
8181+ 0x00010000 => {} // TrueType
8282+ 0x4F54544F => {} // CFF (OpenType with PostScript outlines)
8383+ 0x74727565 => {} // 'true' — old Apple TrueType
8484+ _ => return Err(FontError::InvalidMagic(sf_version)),
8585+ }
8686+8787+ let num_tables = r.u16(4)? as usize;
8888+ // skip searchRange(2), entrySelector(2), rangeShift(2) = 6 bytes
8989+ let mut tables = Vec::with_capacity(num_tables);
9090+ for i in 0..num_tables {
9191+ let base = 12 + i * 16;
9292+ let tag = r.tag(base)?;
9393+ let checksum = r.u32(base + 4)?;
9494+ let offset = r.u32(base + 8)?;
9595+ let length = r.u32(base + 12)?;
9696+ tables.push(TableRecord {
9797+ tag,
9898+ checksum,
9999+ offset,
100100+ length,
101101+ });
102102+ }
103103+104104+ Ok(Font {
105105+ data,
106106+ sf_version,
107107+ tables,
108108+ })
109109+ }
110110+111111+ /// Load a font from a file path.
112112+ pub fn from_file(path: &std::path::Path) -> Result<Font, FontError> {
113113+ let data = std::fs::read(path).map_err(|_| FontError::UnexpectedEof)?;
114114+ Font::parse(data)
115115+ }
116116+117117+ /// Find a table record by its 4-byte tag.
118118+ pub fn table_record(&self, tag: &[u8; 4]) -> Option<&TableRecord> {
119119+ self.tables.iter().find(|t| &t.tag == tag)
120120+ }
121121+122122+ /// Get the raw bytes for a table.
123123+ pub fn table_data(&self, tag: &[u8; 4]) -> Option<&[u8]> {
124124+ let rec = self.table_record(tag)?;
125125+ let start = rec.offset as usize;
126126+ let end = start + rec.length as usize;
127127+ if end <= self.data.len() {
128128+ Some(&self.data[start..end])
129129+ } else {
130130+ None
131131+ }
132132+ }
133133+134134+ /// Parse the `head` table.
135135+ pub fn head(&self) -> Result<HeadTable, FontError> {
136136+ let data = self
137137+ .table_data(b"head")
138138+ .ok_or(FontError::MissingTable("head"))?;
139139+ HeadTable::parse(data)
140140+ }
141141+142142+ /// Parse the `maxp` table.
143143+ pub fn maxp(&self) -> Result<MaxpTable, FontError> {
144144+ let data = self
145145+ .table_data(b"maxp")
146146+ .ok_or(FontError::MissingTable("maxp"))?;
147147+ MaxpTable::parse(data)
148148+ }
149149+150150+ /// Parse the `hhea` table.
151151+ pub fn hhea(&self) -> Result<HheaTable, FontError> {
152152+ let data = self
153153+ .table_data(b"hhea")
154154+ .ok_or(FontError::MissingTable("hhea"))?;
155155+ HheaTable::parse(data)
156156+ }
157157+158158+ /// Parse the `hmtx` table.
159159+ ///
160160+ /// Requires `maxp` and `hhea` to determine dimensions.
161161+ pub fn hmtx(&self) -> Result<HmtxTable, FontError> {
162162+ let maxp = self.maxp()?;
163163+ let hhea = self.hhea()?;
164164+ let data = self
165165+ .table_data(b"hmtx")
166166+ .ok_or(FontError::MissingTable("hmtx"))?;
167167+ HmtxTable::parse(data, hhea.num_long_hor_metrics, maxp.num_glyphs)
168168+ }
169169+170170+ /// Parse the `cmap` table.
171171+ pub fn cmap(&self) -> Result<CmapTable, FontError> {
172172+ let data = self
173173+ .table_data(b"cmap")
174174+ .ok_or(FontError::MissingTable("cmap"))?;
175175+ CmapTable::parse(data)
176176+ }
177177+178178+ /// Parse the `name` table.
179179+ pub fn name(&self) -> Result<NameTable, FontError> {
180180+ let data = self
181181+ .table_data(b"name")
182182+ .ok_or(FontError::MissingTable("name"))?;
183183+ NameTable::parse(data)
184184+ }
185185+186186+ /// Parse the `loca` table.
187187+ ///
188188+ /// Requires `head` (for index format) and `maxp` (for glyph count).
189189+ pub fn loca(&self) -> Result<LocaTable, FontError> {
190190+ let head = self.head()?;
191191+ let maxp = self.maxp()?;
192192+ let data = self
193193+ .table_data(b"loca")
194194+ .ok_or(FontError::MissingTable("loca"))?;
195195+ LocaTable::parse(data, head.index_to_loc_format, maxp.num_glyphs)
196196+ }
197197+198198+ /// Map a Unicode code point to a glyph index using the cmap table.
199199+ pub fn glyph_index(&self, codepoint: u32) -> Result<Option<u16>, FontError> {
200200+ let cmap = self.cmap()?;
201201+ Ok(cmap.glyph_index(codepoint))
202202+ }
203203+204204+ /// Returns true if this is a TrueType font (vs CFF/PostScript outlines).
205205+ pub fn is_truetype(&self) -> bool {
206206+ self.sf_version == 0x00010000 || self.sf_version == 0x74727565
207207+ }
208208+}
209209+210210+/// Load the first available system font from standard macOS paths.
211211+///
212212+/// Tries these fonts in order: Geneva.ttf, Helvetica.ttc, Monaco.ttf.
213213+/// For `.ttc` (TrueType Collection) files, only the first font is parsed.
214214+pub fn load_system_font() -> Result<Font, FontError> {
215215+ let candidates = [
216216+ "/System/Library/Fonts/Geneva.ttf",
217217+ "/System/Library/Fonts/Monaco.ttf",
218218+ ];
219219+ for path in &candidates {
220220+ let p = std::path::Path::new(path);
221221+ if p.exists() {
222222+ return Font::from_file(p);
223223+ }
224224+ }
225225+ Err(FontError::MissingTable("no system font found"))
226226+}
227227+228228+#[cfg(test)]
229229+mod tests {
230230+ use super::*;
231231+232232+ fn test_font() -> Font {
233233+ // Try several common macOS fonts.
234234+ let paths = [
235235+ "/System/Library/Fonts/Geneva.ttf",
236236+ "/System/Library/Fonts/Monaco.ttf",
237237+ "/System/Library/Fonts/Keyboard.ttf",
238238+ ];
239239+ for path in &paths {
240240+ let p = std::path::Path::new(path);
241241+ if p.exists() {
242242+ return Font::from_file(p).expect("failed to parse font");
243243+ }
244244+ }
245245+ panic!("no test font found — need a .ttf file in /System/Library/Fonts/");
246246+ }
247247+248248+ #[test]
249249+ fn parse_table_directory() {
250250+ let font = test_font();
251251+ assert!(font.is_truetype());
252252+ assert!(!font.tables.is_empty());
253253+ // Every font must have these tables.
254254+ assert!(font.table_record(b"head").is_some(), "missing head table");
255255+ assert!(font.table_record(b"cmap").is_some(), "missing cmap table");
256256+ assert!(font.table_record(b"maxp").is_some(), "missing maxp table");
257257+ }
258258+259259+ #[test]
260260+ fn parse_head_table() {
261261+ let font = test_font();
262262+ let head = font.head().expect("failed to parse head");
263263+ assert!(
264264+ head.units_per_em > 0,
265265+ "units_per_em should be positive: {}",
266266+ head.units_per_em
267267+ );
268268+ assert!(
269269+ head.units_per_em >= 16 && head.units_per_em <= 16384,
270270+ "units_per_em out of range: {}",
271271+ head.units_per_em
272272+ );
273273+ }
274274+275275+ #[test]
276276+ fn parse_maxp_table() {
277277+ let font = test_font();
278278+ let maxp = font.maxp().expect("failed to parse maxp");
279279+ assert!(maxp.num_glyphs > 0, "font should have at least one glyph");
280280+ }
281281+282282+ #[test]
283283+ fn parse_hhea_table() {
284284+ let font = test_font();
285285+ let hhea = font.hhea().expect("failed to parse hhea");
286286+ assert!(hhea.ascent > 0, "ascent should be positive");
287287+ assert!(hhea.num_long_hor_metrics > 0, "should have metrics");
288288+ }
289289+290290+ #[test]
291291+ fn parse_hmtx_table() {
292292+ let font = test_font();
293293+ let hmtx = font.hmtx().expect("failed to parse hmtx");
294294+ let maxp = font.maxp().unwrap();
295295+ assert_eq!(
296296+ hmtx.advances.len(),
297297+ maxp.num_glyphs as usize,
298298+ "should have one advance per glyph"
299299+ );
300300+ assert_eq!(
301301+ hmtx.lsbs.len(),
302302+ maxp.num_glyphs as usize,
303303+ "should have one lsb per glyph"
304304+ );
305305+ // Glyph 0 (.notdef) typically has a nonzero advance.
306306+ assert!(hmtx.advances[0] > 0, "glyph 0 advance should be nonzero");
307307+ }
308308+309309+ #[test]
310310+ fn parse_cmap_table() {
311311+ let font = test_font();
312312+ let cmap = font.cmap().expect("failed to parse cmap");
313313+314314+ // Look up ASCII 'A' (U+0041) — every Latin font should have it.
315315+ let glyph_a = cmap.glyph_index(0x0041);
316316+ assert!(
317317+ glyph_a.is_some() && glyph_a.unwrap() > 0,
318318+ "should find a glyph for 'A'"
319319+ );
320320+321321+ // Look up space (U+0020).
322322+ let glyph_space = cmap.glyph_index(0x0020);
323323+ assert!(glyph_space.is_some(), "should find a glyph for space");
324324+ }
325325+326326+ #[test]
327327+ fn parse_name_table() {
328328+ let font = test_font();
329329+ let name = font.name().expect("failed to parse name");
330330+ let family = name.family_name();
331331+ assert!(family.is_some(), "should have a family name");
332332+ let family = family.unwrap();
333333+ assert!(!family.is_empty(), "family name should not be empty");
334334+ }
335335+336336+ #[test]
337337+ fn parse_loca_table() {
338338+ let font = test_font();
339339+ let loca = font.loca().expect("failed to parse loca");
340340+ let maxp = font.maxp().unwrap();
341341+ // loca has num_glyphs + 1 entries.
342342+ assert_eq!(
343343+ loca.offsets.len(),
344344+ maxp.num_glyphs as usize + 1,
345345+ "loca should have num_glyphs + 1 entries"
346346+ );
347347+ }
348348+349349+ #[test]
350350+ fn glyph_index_lookup() {
351351+ let font = test_font();
352352+ // 'A' should map to a nonzero glyph.
353353+ let gid = font.glyph_index(0x0041).expect("glyph_index failed");
354354+ assert!(gid.is_some() && gid.unwrap() > 0);
355355+356356+ // A private-use code point likely has no glyph.
357357+ let gid_pua = font.glyph_index(0xFFFD).expect("glyph_index failed");
358358+ // FFFD (replacement char) might or might not exist — just check no crash.
359359+ let _ = gid_pua;
360360+ }
361361+362362+ #[test]
363363+ fn load_system_font_works() {
364364+ // This test may fail in CI where no fonts are installed,
365365+ // but should pass on macOS.
366366+ if std::path::Path::new("/System/Library/Fonts/Geneva.ttf").exists()
367367+ || std::path::Path::new("/System/Library/Fonts/Monaco.ttf").exists()
368368+ {
369369+ let font = load_system_font().expect("should load a system font");
370370+ assert!(!font.tables.is_empty());
371371+ }
372372+ }
373373+}
···11+//! `cmap` — Character to Glyph Index Mapping table.
22+//!
33+//! Maps Unicode code points to glyph indices. Supports format 4 (BMP) and
44+//! format 12 (full Unicode).
55+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/cmap>
66+77+use crate::font::parse::Reader;
88+use crate::font::FontError;
99+1010+/// Parsed `cmap` table.
1111+#[derive(Debug)]
1212+pub struct CmapTable {
1313+ /// The best subtable we found (preferring format 12 over format 4).
1414+ subtable: CmapSubtable,
1515+}
1616+1717+#[derive(Debug)]
1818+enum CmapSubtable {
1919+ Format4(Format4),
2020+ Format12(Format12),
2121+}
2222+2323+/// cmap format 4: Segment mapping to delta values (BMP only).
2424+#[derive(Debug)]
2525+struct Format4 {
2626+ /// Parallel arrays defining segments.
2727+ end_codes: Vec<u16>,
2828+ start_codes: Vec<u16>,
2929+ id_deltas: Vec<i16>,
3030+ id_range_offsets: Vec<u16>,
3131+ /// The raw glyph index array following the segments.
3232+ glyph_indices: Vec<u16>,
3333+}
3434+3535+/// cmap format 12: Segmented coverage for the full Unicode range.
3636+#[derive(Debug)]
3737+struct Format12 {
3838+ groups: Vec<SequentialMapGroup>,
3939+}
4040+4141+#[derive(Debug)]
4242+struct SequentialMapGroup {
4343+ start_char: u32,
4444+ end_char: u32,
4545+ start_glyph: u32,
4646+}
4747+4848+impl CmapTable {
4949+ /// Parse the `cmap` table from raw bytes.
5050+ ///
5151+ /// Selects the best available subtable:
5252+ /// 1. Platform 3 (Windows), Encoding 10 (Unicode full) — format 12
5353+ /// 2. Platform 0 (Unicode), Encoding 4 (Unicode full) — format 12
5454+ /// 3. Platform 3 (Windows), Encoding 1 (Unicode BMP) — format 4
5555+ /// 4. Platform 0 (Unicode), Encoding 3 (Unicode BMP) — format 4
5656+ /// 5. First platform 0 or 3 subtable that parses successfully
5757+ pub fn parse(data: &[u8]) -> Result<CmapTable, FontError> {
5858+ let r = Reader::new(data);
5959+ if r.len() < 4 {
6060+ return Err(FontError::MalformedTable("cmap"));
6161+ }
6262+6363+ let num_tables = r.u16(2)? as usize;
6464+6565+ // Collect encoding records.
6666+ struct EncodingRecord {
6767+ platform_id: u16,
6868+ encoding_id: u16,
6969+ offset: u32,
7070+ }
7171+7272+ let mut records = Vec::with_capacity(num_tables);
7373+ for i in 0..num_tables {
7474+ let base = 4 + i * 8;
7575+ records.push(EncodingRecord {
7676+ platform_id: r.u16(base)?,
7777+ encoding_id: r.u16(base + 2)?,
7878+ offset: r.u32(base + 4)?,
7979+ });
8080+ }
8181+8282+ // Try subtables in preference order.
8383+ // Priority: (3,10) > (0,4) > (0,6) > (3,1) > (0,3) > (0,*) > (3,*)
8484+ let priority = |pid: u16, eid: u16| -> u8 {
8585+ match (pid, eid) {
8686+ (3, 10) => 0,
8787+ (0, 4) => 1,
8888+ (0, 6) => 2,
8989+ (3, 1) => 3,
9090+ (0, 3) => 4,
9191+ (0, _) => 5,
9292+ (3, _) => 6,
9393+ _ => 255,
9494+ }
9595+ };
9696+9797+ let mut best: Option<(u8, CmapSubtable)> = None;
9898+9999+ for rec in &records {
100100+ let p = priority(rec.platform_id, rec.encoding_id);
101101+ if p == 255 {
102102+ continue;
103103+ }
104104+ if let Some((bp, _)) = &best {
105105+ if p >= *bp {
106106+ continue;
107107+ }
108108+ }
109109+110110+ let offset = rec.offset as usize;
111111+ if offset + 2 > data.len() {
112112+ continue;
113113+ }
114114+ let format = r.u16(offset)?;
115115+116116+ match format {
117117+ 4 => {
118118+ if let Ok(st) = parse_format4(data, offset) {
119119+ best = Some((p, CmapSubtable::Format4(st)));
120120+ }
121121+ }
122122+ 12 => {
123123+ if let Ok(st) = parse_format12(data, offset) {
124124+ best = Some((p, CmapSubtable::Format12(st)));
125125+ }
126126+ }
127127+ _ => {}
128128+ }
129129+ }
130130+131131+ match best {
132132+ Some((_, subtable)) => Ok(CmapTable { subtable }),
133133+ None => Err(FontError::MalformedTable("cmap: no usable subtable")),
134134+ }
135135+ }
136136+137137+ /// Look up a Unicode code point and return the corresponding glyph index.
138138+ ///
139139+ /// Returns `None` if the code point is not mapped (maps to glyph 0).
140140+ pub fn glyph_index(&self, codepoint: u32) -> Option<u16> {
141141+ let gid = match &self.subtable {
142142+ CmapSubtable::Format4(f4) => lookup_format4(f4, codepoint),
143143+ CmapSubtable::Format12(f12) => lookup_format12(f12, codepoint),
144144+ };
145145+ if gid == 0 {
146146+ None
147147+ } else {
148148+ Some(gid)
149149+ }
150150+ }
151151+}
152152+153153+fn parse_format4(data: &[u8], offset: usize) -> Result<Format4, FontError> {
154154+ let r = Reader::new(data);
155155+ // format(2) + length(2) + language(2) + segCountX2(2)
156156+ if offset + 14 > data.len() {
157157+ return Err(FontError::MalformedTable("cmap format 4"));
158158+ }
159159+160160+ let seg_count_x2 = r.u16(offset + 6)? as usize;
161161+ let seg_count = seg_count_x2 / 2;
162162+ // skip searchRange(2) + entrySelector(2) + rangeShift(2)
163163+ let end_codes_offset = offset + 14;
164164+ // After endCodes there is a reservedPad(2), then startCodes.
165165+ let start_codes_offset = end_codes_offset + seg_count_x2 + 2;
166166+ let id_delta_offset = start_codes_offset + seg_count_x2;
167167+ let id_range_offset = id_delta_offset + seg_count_x2;
168168+169169+ let mut end_codes = Vec::with_capacity(seg_count);
170170+ let mut start_codes = Vec::with_capacity(seg_count);
171171+ let mut id_deltas = Vec::with_capacity(seg_count);
172172+ let mut id_range_offsets = Vec::with_capacity(seg_count);
173173+174174+ for i in 0..seg_count {
175175+ end_codes.push(r.u16(end_codes_offset + i * 2)?);
176176+ start_codes.push(r.u16(start_codes_offset + i * 2)?);
177177+ id_deltas.push(r.i16(id_delta_offset + i * 2)?);
178178+ id_range_offsets.push(r.u16(id_range_offset + i * 2)?);
179179+ }
180180+181181+ // Everything after idRangeOffset is the glyphIdArray.
182182+ let glyph_array_offset = id_range_offset + seg_count_x2;
183183+ let remaining_bytes = data.len().saturating_sub(glyph_array_offset);
184184+ let num_glyph_indices = remaining_bytes / 2;
185185+ let mut glyph_indices = Vec::with_capacity(num_glyph_indices);
186186+ for i in 0..num_glyph_indices {
187187+ glyph_indices.push(r.u16(glyph_array_offset + i * 2)?);
188188+ }
189189+190190+ Ok(Format4 {
191191+ end_codes,
192192+ start_codes,
193193+ id_deltas,
194194+ id_range_offsets,
195195+ glyph_indices,
196196+ })
197197+}
198198+199199+fn lookup_format4(f4: &Format4, codepoint: u32) -> u16 {
200200+ if codepoint > 0xFFFF {
201201+ return 0;
202202+ }
203203+ let cp = codepoint as u16;
204204+205205+ for i in 0..f4.end_codes.len() {
206206+ if cp > f4.end_codes[i] {
207207+ continue;
208208+ }
209209+ if cp < f4.start_codes[i] {
210210+ return 0;
211211+ }
212212+213213+ if f4.id_range_offsets[i] == 0 {
214214+ // Use delta.
215215+ return (cp as i32 + f4.id_deltas[i] as i32) as u16;
216216+ }
217217+218218+ // Use range offset into glyphIdArray.
219219+ // The offset is relative to the position of idRangeOffset[i] in the data.
220220+ // index = idRangeOffset[i]/2 + (cp - startCode[i]) - segCount + i
221221+ let range_offset = f4.id_range_offsets[i] as usize;
222222+ let seg_count = f4.end_codes.len();
223223+ let idx = range_offset / 2 + (cp - f4.start_codes[i]) as usize;
224224+ // idx is relative to position of idRangeOffset[i], which is at
225225+ // range_offset_base + i*2 in the original data. We need to convert
226226+ // to an index into our glyph_indices array.
227227+ // The glyph_indices array starts at range_offset_base + seg_count*2.
228228+ // So the array index = idx - seg_count + i
229229+ let array_idx = idx.wrapping_sub(seg_count).wrapping_add(i);
230230+ if array_idx < f4.glyph_indices.len() {
231231+ let gid = f4.glyph_indices[array_idx];
232232+ if gid == 0 {
233233+ return 0;
234234+ }
235235+ return (gid as i32 + f4.id_deltas[i] as i32) as u16;
236236+ }
237237+238238+ return 0;
239239+ }
240240+241241+ 0
242242+}
243243+244244+fn parse_format12(data: &[u8], offset: usize) -> Result<Format12, FontError> {
245245+ let r = Reader::new(data);
246246+ // format(2) + reserved(2) + length(4) + language(4) + numGroups(4)
247247+ if offset + 16 > data.len() {
248248+ return Err(FontError::MalformedTable("cmap format 12"));
249249+ }
250250+251251+ let num_groups = r.u32(offset + 12)? as usize;
252252+ let groups_offset = offset + 16;
253253+254254+ let mut groups = Vec::with_capacity(num_groups);
255255+ for i in 0..num_groups {
256256+ let base = groups_offset + i * 12;
257257+ groups.push(SequentialMapGroup {
258258+ start_char: r.u32(base)?,
259259+ end_char: r.u32(base + 4)?,
260260+ start_glyph: r.u32(base + 8)?,
261261+ });
262262+ }
263263+264264+ Ok(Format12 { groups })
265265+}
266266+267267+fn lookup_format12(f12: &Format12, codepoint: u32) -> u16 {
268268+ // Binary search for the group containing codepoint.
269269+ let mut lo = 0usize;
270270+ let mut hi = f12.groups.len();
271271+ while lo < hi {
272272+ let mid = lo + (hi - lo) / 2;
273273+ let group = &f12.groups[mid];
274274+ if codepoint < group.start_char {
275275+ hi = mid;
276276+ } else if codepoint > group.end_char {
277277+ lo = mid + 1;
278278+ } else {
279279+ // Found it.
280280+ let gid = group.start_glyph + (codepoint - group.start_char);
281281+ return gid as u16;
282282+ }
283283+ }
284284+ 0
285285+}
+74
crates/text/src/font/tables/head.rs
···11+//! `head` — Font Header table.
22+//!
33+//! Contains global font metrics and flags.
44+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/head>
55+66+use crate::font::parse::Reader;
77+use crate::font::FontError;
88+99+/// Parsed `head` table.
1010+#[derive(Debug)]
1111+pub struct HeadTable {
1212+ /// Major version (should be 1).
1313+ pub major_version: u16,
1414+ /// Minor version (should be 0).
1515+ pub minor_version: u16,
1616+ /// Font revision (fixed-point 16.16).
1717+ pub font_revision: i32,
1818+ /// Units per em (typically 1000 or 2048).
1919+ pub units_per_em: u16,
2020+ /// Bounding box: minimum x.
2121+ pub x_min: i16,
2222+ /// Bounding box: minimum y.
2323+ pub y_min: i16,
2424+ /// Bounding box: maximum x.
2525+ pub x_max: i16,
2626+ /// Bounding box: maximum y.
2727+ pub y_max: i16,
2828+ /// Mac style flags (bit 0 = bold, bit 1 = italic).
2929+ pub mac_style: u16,
3030+ /// Smallest readable size in pixels.
3131+ pub lowest_rec_ppem: u16,
3232+ /// 0 = short offsets in loca, 1 = long offsets.
3333+ pub index_to_loc_format: i16,
3434+}
3535+3636+impl HeadTable {
3737+ /// Parse the `head` table from raw bytes.
3838+ pub fn parse(data: &[u8]) -> Result<HeadTable, FontError> {
3939+ let r = Reader::new(data);
4040+ // Minimum head table size is 54 bytes.
4141+ if r.len() < 54 {
4242+ return Err(FontError::MalformedTable("head"));
4343+ }
4444+4545+ let major_version = r.u16(0)?;
4646+ let minor_version = r.u16(2)?;
4747+ let font_revision = r.i32(4)?;
4848+ // skip checksumAdjustment(4) + magicNumber(4) + flags(2)
4949+ let units_per_em = r.u16(18)?;
5050+ // skip created(8) + modified(8)
5151+ let x_min = r.i16(36)?;
5252+ let y_min = r.i16(38)?;
5353+ let x_max = r.i16(40)?;
5454+ let y_max = r.i16(42)?;
5555+ let mac_style = r.u16(44)?;
5656+ let lowest_rec_ppem = r.u16(46)?;
5757+ // skip fontDirectionHint(2)
5858+ let index_to_loc_format = r.i16(50)?;
5959+6060+ Ok(HeadTable {
6161+ major_version,
6262+ minor_version,
6363+ font_revision,
6464+ units_per_em,
6565+ x_min,
6666+ y_min,
6767+ x_max,
6868+ y_max,
6969+ mac_style,
7070+ lowest_rec_ppem,
7171+ index_to_loc_format,
7272+ })
7373+ }
7474+}
+60
crates/text/src/font/tables/hhea.rs
···11+//! `hhea` — Horizontal Header table.
22+//!
33+//! Contains global horizontal layout metrics.
44+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/hhea>
55+66+use crate::font::parse::Reader;
77+use crate::font::FontError;
88+99+/// Parsed `hhea` table.
1010+#[derive(Debug)]
1111+pub struct HheaTable {
1212+ /// Typographic ascent (in font units).
1313+ pub ascent: i16,
1414+ /// Typographic descent (typically negative, in font units).
1515+ pub descent: i16,
1616+ /// Typographic line gap (in font units).
1717+ pub line_gap: i16,
1818+ /// Maximum advance width across all glyphs.
1919+ pub advance_width_max: u16,
2020+ /// Minimum left side bearing across all glyphs.
2121+ pub min_left_side_bearing: i16,
2222+ /// Minimum right side bearing across all glyphs.
2323+ pub min_right_side_bearing: i16,
2424+ /// Maximum x extent (max(lsb + (xMax - xMin))).
2525+ pub x_max_extent: i16,
2626+ /// Number of entries in the hmtx table's longHorMetric array.
2727+ pub num_long_hor_metrics: u16,
2828+}
2929+3030+impl HheaTable {
3131+ /// Parse the `hhea` table from raw bytes.
3232+ pub fn parse(data: &[u8]) -> Result<HheaTable, FontError> {
3333+ let r = Reader::new(data);
3434+ if r.len() < 36 {
3535+ return Err(FontError::MalformedTable("hhea"));
3636+ }
3737+3838+ // skip version(4)
3939+ let ascent = r.i16(4)?;
4040+ let descent = r.i16(6)?;
4141+ let line_gap = r.i16(8)?;
4242+ let advance_width_max = r.u16(10)?;
4343+ let min_left_side_bearing = r.i16(12)?;
4444+ let min_right_side_bearing = r.i16(14)?;
4545+ let x_max_extent = r.i16(16)?;
4646+ // skip caretSlopeRise(2), caretSlopeRun(2), caretOffset(2), reserved(8), metricDataFormat(2)
4747+ let num_long_hor_metrics = r.u16(34)?;
4848+4949+ Ok(HheaTable {
5050+ ascent,
5151+ descent,
5252+ line_gap,
5353+ advance_width_max,
5454+ min_left_side_bearing,
5555+ min_right_side_bearing,
5656+ x_max_extent,
5757+ num_long_hor_metrics,
5858+ })
5959+ }
6060+}
+55
crates/text/src/font/tables/hmtx.rs
···11+//! `hmtx` — Horizontal Metrics table.
22+//!
33+//! Contains per-glyph horizontal metrics (advance width + left side bearing).
44+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/hmtx>
55+66+use crate::font::parse::Reader;
77+use crate::font::FontError;
88+99+/// Parsed `hmtx` table.
1010+///
1111+/// Both `advances` and `lsbs` are indexed by glyph ID and have exactly
1212+/// `num_glyphs` entries.
1313+#[derive(Debug)]
1414+pub struct HmtxTable {
1515+ /// Advance widths for each glyph (in font units).
1616+ pub advances: Vec<u16>,
1717+ /// Left side bearings for each glyph (in font units).
1818+ pub lsbs: Vec<i16>,
1919+}
2020+2121+impl HmtxTable {
2222+ /// Parse the `hmtx` table from raw bytes.
2323+ ///
2424+ /// `num_long_hor_metrics` comes from `hhea`, `num_glyphs` from `maxp`.
2525+ pub fn parse(
2626+ data: &[u8],
2727+ num_long_hor_metrics: u16,
2828+ num_glyphs: u16,
2929+ ) -> Result<HmtxTable, FontError> {
3030+ let r = Reader::new(data);
3131+ let n_long = num_long_hor_metrics as usize;
3232+ let n_glyphs = num_glyphs as usize;
3333+3434+ let mut advances = Vec::with_capacity(n_glyphs);
3535+ let mut lsbs = Vec::with_capacity(n_glyphs);
3636+3737+ // First n_long entries are (advance_width: u16, lsb: i16) pairs.
3838+ for i in 0..n_long {
3939+ let offset = i * 4;
4040+ advances.push(r.u16(offset)?);
4141+ lsbs.push(r.i16(offset + 2)?);
4242+ }
4343+4444+ // Remaining glyphs share the last advance width, but have individual lsbs.
4545+ let last_advance = advances.last().copied().unwrap_or(0);
4646+ let remaining = n_glyphs.saturating_sub(n_long);
4747+ let lsb_offset = n_long * 4;
4848+ for i in 0..remaining {
4949+ advances.push(last_advance);
5050+ lsbs.push(r.i16(lsb_offset + i * 2)?);
5151+ }
5252+5353+ Ok(HmtxTable { advances, lsbs })
5454+ }
5555+}
+79
crates/text/src/font/tables/loca.rs
···11+//! `loca` — Index to Location table.
22+//!
33+//! Maps glyph IDs to byte offsets within the `glyf` table.
44+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/loca>
55+66+use crate::font::parse::Reader;
77+use crate::font::FontError;
88+99+/// Parsed `loca` table.
1010+///
1111+/// Contains `num_glyphs + 1` offsets. The glyph data for glyph `i` starts at
1212+/// `offsets[i]` and ends at `offsets[i + 1]`. If they are equal, the glyph
1313+/// has no outline (e.g., a space character).
1414+#[derive(Debug)]
1515+pub struct LocaTable {
1616+ /// Byte offsets into the `glyf` table, one per glyph plus a sentinel.
1717+ pub offsets: Vec<u32>,
1818+}
1919+2020+impl LocaTable {
2121+ /// Parse the `loca` table from raw bytes.
2222+ ///
2323+ /// `index_to_loc_format` comes from the `head` table (0 = short, 1 = long).
2424+ /// `num_glyphs` comes from the `maxp` table.
2525+ pub fn parse(
2626+ data: &[u8],
2727+ index_to_loc_format: i16,
2828+ num_glyphs: u16,
2929+ ) -> Result<LocaTable, FontError> {
3030+ let r = Reader::new(data);
3131+ let count = num_glyphs as usize + 1;
3232+ let mut offsets = Vec::with_capacity(count);
3333+3434+ match index_to_loc_format {
3535+ 0 => {
3636+ // Short format: offsets are u16 values divided by 2.
3737+ for i in 0..count {
3838+ let raw = r.u16(i * 2)? as u32;
3939+ offsets.push(raw * 2);
4040+ }
4141+ }
4242+ 1 => {
4343+ // Long format: offsets are u32 values.
4444+ for i in 0..count {
4545+ offsets.push(r.u32(i * 4)?);
4646+ }
4747+ }
4848+ _ => return Err(FontError::MalformedTable("loca: invalid index format")),
4949+ }
5050+5151+ Ok(LocaTable { offsets })
5252+ }
5353+5454+ /// Returns true if the glyph has outline data (non-empty in glyf).
5555+ pub fn has_outline(&self, glyph_id: u16) -> bool {
5656+ let i = glyph_id as usize;
5757+ if i + 1 < self.offsets.len() {
5858+ self.offsets[i] != self.offsets[i + 1]
5959+ } else {
6060+ false
6161+ }
6262+ }
6363+6464+ /// Get the byte range for a glyph within the `glyf` table.
6565+ pub fn glyph_range(&self, glyph_id: u16) -> Option<(u32, u32)> {
6666+ let i = glyph_id as usize;
6767+ if i + 1 < self.offsets.len() {
6868+ let start = self.offsets[i];
6969+ let end = self.offsets[i + 1];
7070+ if start < end {
7171+ Some((start, end))
7272+ } else {
7373+ None
7474+ }
7575+ } else {
7676+ None
7777+ }
7878+ }
7979+}
+35
crates/text/src/font/tables/maxp.rs
···11+//! `maxp` — Maximum Profile table.
22+//!
33+//! Contains the number of glyphs in the font plus (for TrueType) various
44+//! maximum values used for memory allocation.
55+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/maxp>
66+77+use crate::font::parse::Reader;
88+use crate::font::FontError;
99+1010+/// Parsed `maxp` table.
1111+#[derive(Debug)]
1212+pub struct MaxpTable {
1313+ /// Version (0x00005000 for CFF, 0x00010000 for TrueType).
1414+ pub version: u32,
1515+ /// Total number of glyphs in the font.
1616+ pub num_glyphs: u16,
1717+}
1818+1919+impl MaxpTable {
2020+ /// Parse the `maxp` table from raw bytes.
2121+ pub fn parse(data: &[u8]) -> Result<MaxpTable, FontError> {
2222+ let r = Reader::new(data);
2323+ if r.len() < 6 {
2424+ return Err(FontError::MalformedTable("maxp"));
2525+ }
2626+2727+ let version = r.u32(0)?;
2828+ let num_glyphs = r.u16(4)?;
2929+3030+ Ok(MaxpTable {
3131+ version,
3232+ num_glyphs,
3333+ })
3434+ }
3535+}
+9
crates/text/src/font/tables/mod.rs
···11+//! Individual font table parsers.
22+33+pub mod cmap;
44+pub mod head;
55+pub mod hhea;
66+pub mod hmtx;
77+pub mod loca;
88+pub mod maxp;
99+pub mod name;
+221
crates/text/src/font/tables/name.rs
···11+//! `name` — Naming table.
22+//!
33+//! Contains human-readable strings like family name, style name, copyright, etc.
44+//! Reference: <https://learn.microsoft.com/en-us/typography/opentype/spec/name>
55+66+use crate::font::parse::Reader;
77+use crate::font::FontError;
88+99+/// Parsed `name` table.
1010+#[derive(Debug)]
1111+pub struct NameTable {
1212+ /// All name records extracted from the table.
1313+ pub records: Vec<NameRecord>,
1414+}
1515+1616+/// A single name record.
1717+#[derive(Debug)]
1818+pub struct NameRecord {
1919+ /// Platform ID (0 = Unicode, 1 = Macintosh, 3 = Windows).
2020+ pub platform_id: u16,
2121+ /// Encoding ID (platform-specific).
2222+ pub encoding_id: u16,
2323+ /// Language ID.
2424+ pub language_id: u16,
2525+ /// Name ID (1 = family, 2 = subfamily, 4 = full name, 6 = PostScript name, etc.).
2626+ pub name_id: u16,
2727+ /// The decoded string value.
2828+ pub value: String,
2929+}
3030+3131+impl NameTable {
3232+ /// Parse the `name` table from raw bytes.
3333+ pub fn parse(data: &[u8]) -> Result<NameTable, FontError> {
3434+ let r = Reader::new(data);
3535+ if r.len() < 6 {
3636+ return Err(FontError::MalformedTable("name"));
3737+ }
3838+3939+ // format(2) + count(2) + stringOffset(2)
4040+ let count = r.u16(2)? as usize;
4141+ let string_offset = r.u16(4)? as usize;
4242+4343+ let mut records = Vec::with_capacity(count);
4444+4545+ for i in 0..count {
4646+ let base = 6 + i * 12;
4747+ if base + 12 > data.len() {
4848+ break;
4949+ }
5050+5151+ let platform_id = r.u16(base)?;
5252+ let encoding_id = r.u16(base + 2)?;
5353+ let language_id = r.u16(base + 4)?;
5454+ let name_id = r.u16(base + 6)?;
5555+ let length = r.u16(base + 8)? as usize;
5656+ let offset = r.u16(base + 10)? as usize;
5757+5858+ let str_start = string_offset + offset;
5959+ if str_start + length > data.len() {
6060+ continue;
6161+ }
6262+6363+ let raw = r.slice(str_start, length)?;
6464+ let value = decode_name_string(platform_id, encoding_id, raw);
6565+6666+ records.push(NameRecord {
6767+ platform_id,
6868+ encoding_id,
6969+ language_id,
7070+ name_id,
7171+ value,
7272+ });
7373+ }
7474+7575+ Ok(NameTable { records })
7676+ }
7777+7878+ /// Get the font family name (name ID 1).
7979+ ///
8080+ /// Prefers Windows/Unicode platform, falls back to any platform.
8181+ pub fn family_name(&self) -> Option<&str> {
8282+ self.get_name(1)
8383+ }
8484+8585+ /// Get the font subfamily/style name (name ID 2, e.g. "Regular", "Bold").
8686+ pub fn subfamily_name(&self) -> Option<&str> {
8787+ self.get_name(2)
8888+ }
8989+9090+ /// Get the full font name (name ID 4).
9191+ pub fn full_name(&self) -> Option<&str> {
9292+ self.get_name(4)
9393+ }
9494+9595+ /// Get a name string by name ID.
9696+ ///
9797+ /// Prefers Windows platform (3) with English, then any platform.
9898+ fn get_name(&self, name_id: u16) -> Option<&str> {
9999+ // Prefer Windows platform (3), English (language_id 0x0409).
100100+ let win_en = self
101101+ .records
102102+ .iter()
103103+ .find(|r| r.name_id == name_id && r.platform_id == 3 && r.language_id == 0x0409);
104104+ if let Some(rec) = win_en {
105105+ if !rec.value.is_empty() {
106106+ return Some(&rec.value);
107107+ }
108108+ }
109109+110110+ // Fall back to any Windows platform record.
111111+ let win = self
112112+ .records
113113+ .iter()
114114+ .find(|r| r.name_id == name_id && r.platform_id == 3);
115115+ if let Some(rec) = win {
116116+ if !rec.value.is_empty() {
117117+ return Some(&rec.value);
118118+ }
119119+ }
120120+121121+ // Fall back to any record.
122122+ self.records
123123+ .iter()
124124+ .find(|r| r.name_id == name_id && !r.value.is_empty())
125125+ .map(|r| r.value.as_str())
126126+ }
127127+}
128128+129129+/// Decode a name string based on platform/encoding.
130130+fn decode_name_string(platform_id: u16, encoding_id: u16, data: &[u8]) -> String {
131131+ match platform_id {
132132+ 0 => {
133133+ // Unicode platform — always UTF-16BE.
134134+ decode_utf16be(data)
135135+ }
136136+ 1 => {
137137+ // Macintosh platform.
138138+ if encoding_id == 0 {
139139+ // Mac Roman.
140140+ decode_mac_roman(data)
141141+ } else {
142142+ // Other Mac encodings — treat as ASCII fallback.
143143+ String::from_utf8_lossy(data).into_owned()
144144+ }
145145+ }
146146+ 3 => {
147147+ // Windows platform — encoding 1 = UTF-16BE, encoding 10 = UTF-16BE.
148148+ match encoding_id {
149149+ 1 | 10 => decode_utf16be(data),
150150+ 0 => {
151151+ // Symbol encoding — treat as UTF-16BE.
152152+ decode_utf16be(data)
153153+ }
154154+ _ => String::from_utf8_lossy(data).into_owned(),
155155+ }
156156+ }
157157+ _ => String::from_utf8_lossy(data).into_owned(),
158158+ }
159159+}
160160+161161+fn decode_utf16be(data: &[u8]) -> String {
162162+ let mut chars = Vec::with_capacity(data.len() / 2);
163163+ let mut i = 0;
164164+ while i + 1 < data.len() {
165165+ let unit = u16::from_be_bytes([data[i], data[i + 1]]);
166166+ i += 2;
167167+168168+ // Handle surrogate pairs.
169169+ if (0xD800..=0xDBFF).contains(&unit) {
170170+ if i + 1 < data.len() {
171171+ let lo = u16::from_be_bytes([data[i], data[i + 1]]);
172172+ if (0xDC00..=0xDFFF).contains(&lo) {
173173+ i += 2;
174174+ let cp = 0x10000 + ((unit as u32 - 0xD800) << 10) + (lo as u32 - 0xDC00);
175175+ if let Some(ch) = char::from_u32(cp) {
176176+ chars.push(ch);
177177+ }
178178+ continue;
179179+ }
180180+ }
181181+ // Lone surrogate — skip.
182182+ continue;
183183+ }
184184+185185+ if let Some(ch) = char::from_u32(unit as u32) {
186186+ chars.push(ch);
187187+ }
188188+ }
189189+ chars.into_iter().collect()
190190+}
191191+192192+fn decode_mac_roman(data: &[u8]) -> String {
193193+ // Mac Roman: 0x00-0x7F are ASCII, 0x80-0xFF map to specific Unicode code points.
194194+ static MAC_ROMAN_HIGH: [u16; 128] = [
195195+ 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1, 0x00E0, 0x00E2, 0x00E4,
196196+ 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8, 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF,
197197+ 0x00F1, 0x00F3, 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC, 0x2020,
198198+ 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF, 0x00AE, 0x00A9, 0x2122, 0x00B4,
199199+ 0x00A8, 0x2260, 0x00C6, 0x00D8, 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202,
200200+ 0x2211, 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8, 0x00BF, 0x00A1,
201201+ 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB, 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3,
202202+ 0x00D5, 0x0152, 0x0153, 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
203203+ 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02, 0x2021, 0x00B7, 0x201A,
204204+ 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1, 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC,
205205+ 0x00D3, 0x00D4, 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC, 0x00AF,
206206+ 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7,
207207+ ];
208208+209209+ let mut s = String::with_capacity(data.len());
210210+ for &b in data {
211211+ if b < 0x80 {
212212+ s.push(b as char);
213213+ } else {
214214+ let cp = MAC_ROMAN_HIGH[(b - 0x80) as usize];
215215+ if let Some(ch) = char::from_u32(cp as u32) {
216216+ s.push(ch);
217217+ }
218218+ }
219219+ }
220220+ s
221221+}
+2
crates/text/src/lib.rs
···11//! Font parsing (OTF/TTF), shaping, rasterization, and line breaking — pure Rust.
22+33+pub mod font;