//! `cmap` — Character to Glyph Index Mapping table. //! //! Maps Unicode code points to glyph indices. Supports format 4 (BMP) and //! format 12 (full Unicode). //! Reference: use crate::font::parse::Reader; use crate::font::FontError; /// Parsed `cmap` table. #[derive(Debug)] pub struct CmapTable { /// The best subtable we found (preferring format 12 over format 4). subtable: CmapSubtable, } #[derive(Debug)] enum CmapSubtable { Format4(Format4), Format12(Format12), } /// cmap format 4: Segment mapping to delta values (BMP only). #[derive(Debug)] struct Format4 { /// Parallel arrays defining segments. end_codes: Vec, start_codes: Vec, id_deltas: Vec, id_range_offsets: Vec, /// The raw glyph index array following the segments. glyph_indices: Vec, } /// cmap format 12: Segmented coverage for the full Unicode range. #[derive(Debug)] struct Format12 { groups: Vec, } #[derive(Debug)] struct SequentialMapGroup { start_char: u32, end_char: u32, start_glyph: u32, } impl CmapTable { /// Parse the `cmap` table from raw bytes. /// /// Selects the best available subtable: /// 1. Platform 3 (Windows), Encoding 10 (Unicode full) — format 12 /// 2. Platform 0 (Unicode), Encoding 4 (Unicode full) — format 12 /// 3. Platform 3 (Windows), Encoding 1 (Unicode BMP) — format 4 /// 4. Platform 0 (Unicode), Encoding 3 (Unicode BMP) — format 4 /// 5. First platform 0 or 3 subtable that parses successfully pub fn parse(data: &[u8]) -> Result { let r = Reader::new(data); if r.len() < 4 { return Err(FontError::MalformedTable("cmap")); } let num_tables = r.u16(2)? as usize; // Collect encoding records. struct EncodingRecord { platform_id: u16, encoding_id: u16, offset: u32, } let mut records = Vec::with_capacity(num_tables); for i in 0..num_tables { let base = 4 + i * 8; records.push(EncodingRecord { platform_id: r.u16(base)?, encoding_id: r.u16(base + 2)?, offset: r.u32(base + 4)?, }); } // Try subtables in preference order. // Priority: (3,10) > (0,4) > (0,6) > (3,1) > (0,3) > (0,*) > (3,*) let priority = |pid: u16, eid: u16| -> u8 { match (pid, eid) { (3, 10) => 0, (0, 4) => 1, (0, 6) => 2, (3, 1) => 3, (0, 3) => 4, (0, _) => 5, (3, _) => 6, _ => 255, } }; let mut best: Option<(u8, CmapSubtable)> = None; for rec in &records { let p = priority(rec.platform_id, rec.encoding_id); if p == 255 { continue; } if let Some((bp, _)) = &best { if p >= *bp { continue; } } let offset = rec.offset as usize; if offset + 2 > data.len() { continue; } let format = r.u16(offset)?; match format { 4 => { if let Ok(st) = parse_format4(data, offset) { best = Some((p, CmapSubtable::Format4(st))); } } 12 => { if let Ok(st) = parse_format12(data, offset) { best = Some((p, CmapSubtable::Format12(st))); } } _ => {} } } match best { Some((_, subtable)) => Ok(CmapTable { subtable }), None => Err(FontError::MalformedTable("cmap: no usable subtable")), } } /// Look up a Unicode code point and return the corresponding glyph index. /// /// Returns `None` if the code point is not mapped (maps to glyph 0). pub fn glyph_index(&self, codepoint: u32) -> Option { let gid = match &self.subtable { CmapSubtable::Format4(f4) => lookup_format4(f4, codepoint), CmapSubtable::Format12(f12) => lookup_format12(f12, codepoint), }; if gid == 0 { None } else { Some(gid) } } } fn parse_format4(data: &[u8], offset: usize) -> Result { let r = Reader::new(data); // format(2) + length(2) + language(2) + segCountX2(2) if offset + 14 > data.len() { return Err(FontError::MalformedTable("cmap format 4")); } let seg_count_x2 = r.u16(offset + 6)? as usize; let seg_count = seg_count_x2 / 2; // skip searchRange(2) + entrySelector(2) + rangeShift(2) let end_codes_offset = offset + 14; // After endCodes there is a reservedPad(2), then startCodes. let start_codes_offset = end_codes_offset + seg_count_x2 + 2; let id_delta_offset = start_codes_offset + seg_count_x2; let id_range_offset = id_delta_offset + seg_count_x2; let mut end_codes = Vec::with_capacity(seg_count); let mut start_codes = Vec::with_capacity(seg_count); let mut id_deltas = Vec::with_capacity(seg_count); let mut id_range_offsets = Vec::with_capacity(seg_count); for i in 0..seg_count { end_codes.push(r.u16(end_codes_offset + i * 2)?); start_codes.push(r.u16(start_codes_offset + i * 2)?); id_deltas.push(r.i16(id_delta_offset + i * 2)?); id_range_offsets.push(r.u16(id_range_offset + i * 2)?); } // Everything after idRangeOffset is the glyphIdArray. let glyph_array_offset = id_range_offset + seg_count_x2; let remaining_bytes = data.len().saturating_sub(glyph_array_offset); let num_glyph_indices = remaining_bytes / 2; let mut glyph_indices = Vec::with_capacity(num_glyph_indices); for i in 0..num_glyph_indices { glyph_indices.push(r.u16(glyph_array_offset + i * 2)?); } Ok(Format4 { end_codes, start_codes, id_deltas, id_range_offsets, glyph_indices, }) } fn lookup_format4(f4: &Format4, codepoint: u32) -> u16 { if codepoint > 0xFFFF { return 0; } let cp = codepoint as u16; for i in 0..f4.end_codes.len() { if cp > f4.end_codes[i] { continue; } if cp < f4.start_codes[i] { return 0; } if f4.id_range_offsets[i] == 0 { // Use delta. return (cp as i32 + f4.id_deltas[i] as i32) as u16; } // Use range offset into glyphIdArray. // The offset is relative to the position of idRangeOffset[i] in the data. // index = idRangeOffset[i]/2 + (cp - startCode[i]) - segCount + i let range_offset = f4.id_range_offsets[i] as usize; let seg_count = f4.end_codes.len(); let idx = range_offset / 2 + (cp - f4.start_codes[i]) as usize; // idx is relative to position of idRangeOffset[i], which is at // range_offset_base + i*2 in the original data. We need to convert // to an index into our glyph_indices array. // The glyph_indices array starts at range_offset_base + seg_count*2. // So the array index = idx - seg_count + i let array_idx = idx.wrapping_sub(seg_count).wrapping_add(i); if array_idx < f4.glyph_indices.len() { let gid = f4.glyph_indices[array_idx]; if gid == 0 { return 0; } return (gid as i32 + f4.id_deltas[i] as i32) as u16; } return 0; } 0 } fn parse_format12(data: &[u8], offset: usize) -> Result { let r = Reader::new(data); // format(2) + reserved(2) + length(4) + language(4) + numGroups(4) if offset + 16 > data.len() { return Err(FontError::MalformedTable("cmap format 12")); } let num_groups = r.u32(offset + 12)? as usize; let groups_offset = offset + 16; let mut groups = Vec::with_capacity(num_groups); for i in 0..num_groups { let base = groups_offset + i * 12; groups.push(SequentialMapGroup { start_char: r.u32(base)?, end_char: r.u32(base + 4)?, start_glyph: r.u32(base + 8)?, }); } Ok(Format12 { groups }) } fn lookup_format12(f12: &Format12, codepoint: u32) -> u16 { // Binary search for the group containing codepoint. let mut lo = 0usize; let mut hi = f12.groups.len(); while lo < hi { let mid = lo + (hi - lo) / 2; let group = &f12.groups[mid]; if codepoint < group.start_char { hi = mid; } else if codepoint > group.end_char { lo = mid + 1; } else { // Found it. let gid = group.start_glyph + (codepoint - group.start_char); return gid as u16; } } 0 }