// src/lib/richtext.js - Bluesky-compatible richtext parsing and rendering
/**
* Parse text for Bluesky facets: mentions, links, hashtags.
* Returns { text, facets } with byte-indexed positions.
*
* @param {string} text - Plain text to parse
* @param {function} resolveHandle - Optional async function to resolve @handle to DID
* @returns {Promise<{ text: string, facets: Array }>}
*/
export async function parseTextToFacets(text, resolveHandle = null) {
if (!text) return { text: '', facets: [] };
const facets = [];
const encoder = new TextEncoder();
function getByteOffset(str, charIndex) {
return encoder.encode(str.slice(0, charIndex)).length;
}
// Track claimed positions to avoid overlaps
const claimedPositions = new Set();
function isRangeClaimed(start, end) {
for (let i = start; i < end; i++) {
if (claimedPositions.has(i)) return true;
}
return false;
}
function claimRange(start, end) {
for (let i = start; i < end; i++) {
claimedPositions.add(i);
}
}
// URLs first (highest priority)
const urlRegex = /https?:\/\/[^\s<>\[\]()]+/g;
let urlMatch;
while ((urlMatch = urlRegex.exec(text)) !== null) {
const start = urlMatch.index;
const end = start + urlMatch[0].length;
if (!isRangeClaimed(start, end)) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#link',
uri: urlMatch[0],
}],
});
}
}
// Mentions: @handle or @handle.domain.tld
const mentionRegex = /@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)*[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/g;
let mentionMatch;
while ((mentionMatch = mentionRegex.exec(text)) !== null) {
const start = mentionMatch.index;
const end = start + mentionMatch[0].length;
const handle = mentionMatch[0].slice(1); // Remove @
if (!isRangeClaimed(start, end)) {
// Try to resolve handle to DID
let did = null;
if (resolveHandle) {
try {
did = await resolveHandle(handle);
} catch (e) {
// Handle not found - skip this mention
continue;
}
}
if (did) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#mention',
did,
}],
});
}
}
}
// Hashtags: #tag (alphanumeric, no leading numbers)
const hashtagRegex = /#([a-zA-Z][a-zA-Z0-9_]*)/g;
let hashtagMatch;
while ((hashtagMatch = hashtagRegex.exec(text)) !== null) {
const start = hashtagMatch.index;
const end = start + hashtagMatch[0].length;
const tag = hashtagMatch[1]; // Without #
if (!isRangeClaimed(start, end)) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#tag',
tag,
}],
});
}
}
// Sort by byte position
facets.sort((a, b) => a.index.byteStart - b.index.byteStart);
return { text, facets };
}
/**
* Synchronous parsing for client-side render (no DID resolution).
* Mentions display as-is without profile links.
*/
export function parseTextToFacetsSync(text) {
if (!text) return { text: '', facets: [] };
const facets = [];
const encoder = new TextEncoder();
function getByteOffset(str, charIndex) {
return encoder.encode(str.slice(0, charIndex)).length;
}
const claimedPositions = new Set();
function isRangeClaimed(start, end) {
for (let i = start; i < end; i++) {
if (claimedPositions.has(i)) return true;
}
return false;
}
function claimRange(start, end) {
for (let i = start; i < end; i++) {
claimedPositions.add(i);
}
}
// URLs
const urlRegex = /https?:\/\/[^\s<>\[\]()]+/g;
let urlMatch;
while ((urlMatch = urlRegex.exec(text)) !== null) {
const start = urlMatch.index;
const end = start + urlMatch[0].length;
if (!isRangeClaimed(start, end)) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#link',
uri: urlMatch[0],
}],
});
}
}
// Mentions: @handle or @handle.domain.tld (no DID resolution in sync mode)
const mentionRegex = /@([a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)*[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?/g;
let mentionMatch;
while ((mentionMatch = mentionRegex.exec(text)) !== null) {
const start = mentionMatch.index;
const end = start + mentionMatch[0].length;
if (!isRangeClaimed(start, end)) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#mention',
did: null, // No DID in sync mode
}],
});
}
}
// Hashtags
const hashtagRegex = /#([a-zA-Z][a-zA-Z0-9_]*)/g;
let hashtagMatch;
while ((hashtagMatch = hashtagRegex.exec(text)) !== null) {
const start = hashtagMatch.index;
const end = start + hashtagMatch[0].length;
const tag = hashtagMatch[1];
if (!isRangeClaimed(start, end)) {
claimRange(start, end);
facets.push({
index: {
byteStart: getByteOffset(text, start),
byteEnd: getByteOffset(text, end),
},
features: [{
$type: 'app.bsky.richtext.facet#tag',
tag,
}],
});
}
}
facets.sort((a, b) => a.index.byteStart - b.index.byteStart);
return { text, facets };
}
/**
* Render text with facets as HTML.
*
* @param {string} text - The text content
* @param {Array} facets - Array of facet objects
* @param {Object} options - Rendering options
* @returns {string} HTML string
*/
export function renderFacetedText(text, facets, options = {}) {
if (!text) return '';
// If no facets, just escape and return
if (!facets || facets.length === 0) {
return escapeHtml(text);
}
const encoder = new TextEncoder();
const decoder = new TextDecoder();
const bytes = encoder.encode(text);
// Sort facets by start position
const sortedFacets = [...facets].sort(
(a, b) => a.index.byteStart - b.index.byteStart
);
let result = '';
let lastEnd = 0;
for (const facet of sortedFacets) {
// Validate byte indices
if (facet.index.byteStart < 0 || facet.index.byteEnd > bytes.length) {
continue; // Skip invalid facets
}
// Add text before this facet
if (facet.index.byteStart > lastEnd) {
const beforeBytes = bytes.slice(lastEnd, facet.index.byteStart);
result += escapeHtml(decoder.decode(beforeBytes));
}
// Get the faceted text
const facetBytes = bytes.slice(facet.index.byteStart, facet.index.byteEnd);
const facetText = decoder.decode(facetBytes);
// Determine facet type and render
const feature = facet.features?.[0];
if (!feature) {
result += escapeHtml(facetText);
lastEnd = facet.index.byteEnd;
continue;
}
const type = feature.$type || feature.__typename || '';
if (type.includes('link')) {
const uri = feature.uri || '';
result += `${escapeHtml(facetText)}`;
} else if (type.includes('mention')) {
// Extract handle from text (remove @)
const handle = facetText.startsWith('@') ? facetText.slice(1) : facetText;
result += `${escapeHtml(facetText)}`;
} else if (type.includes('tag')) {
// Hashtag - styled but not clickable for now
result += `${escapeHtml(facetText)}`;
} else {
result += escapeHtml(facetText);
}
lastEnd = facet.index.byteEnd;
}
// Add remaining text
if (lastEnd < bytes.length) {
const remainingBytes = bytes.slice(lastEnd);
result += escapeHtml(decoder.decode(remainingBytes));
}
return result;
}
function escapeHtml(text) {
return text
.replace(/&/g, '&')
.replace(//g, '>')
.replace(/"/g, '"')
.replace(/'/g, ''');
}