···11-/*
22- Extracted from https://github.com/bluesky-social/atproto/
33-44- Copyright (c) 2022-2024 Bluesky PBC, and Contributors
55- MIT License
66-77- Permission is hereby granted, free of charge, to any person obtaining a copy
88- of this software and associated documentation files (the "Software"), to deal
99- in the Software without restriction, including without limitation the rights
1010- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1111- copies of the Software, and to permit persons to whom the Software is
1212- furnished to do so, subject to the following conditions:
1313-1414- The above copyright notice and this permission notice shall be included in all
1515- copies or substantial portions of the Software.
1616-1717- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1818- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1919- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2020- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2121- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2222- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2323- SOFTWARE.
2424-*/
2525-2626-// packages/api/src/rich-text/rich-text.ts
2727-2828-class RichTextSegment {
2929- /** @param {string} text, @param {json} [facet] */
3030- constructor(text, facet) {
3131- this.text = text;
3232- this.facet = facet;
3333- }
3434-3535- /** @returns {object | undefined} */
3636- get link() {
3737- return this.facet?.features?.find(v => v.$type === 'app.bsky.richtext.facet#link');
3838- }
3939-4040- /** @returns {object | undefined} */
4141- get mention() {
4242- return this.facet?.features?.find(v => v.$type === 'app.bsky.richtext.facet#mention');
4343- }
4444-4545- /** @returns {object | undefined} */
4646- get tag() {
4747- return this.facet?.features?.find(v => v.$type === 'app.bsky.richtext.facet#tag');
4848- }
4949-}
5050-5151-export class RichText {
5252- /** @params {json} props */
5353- constructor(props) {
5454- this.unicodeText = new UnicodeString(props.text);
5555- this.facets = props.facets;
5656-5757- if (this.facets) {
5858- this.facets.sort((a, b) => a.index.byteStart - b.index.byteStart);
5959- }
6060- }
6161-6262- /** @returns {string} */
6363- get text() {
6464- return this.unicodeText.toString();
6565- }
6666-6767- /** @returns {number} */
6868- get length() {
6969- return this.unicodeText.length;
7070- }
7171-7272- /** @returns {number} */
7373- get graphemeLength() {
7474- return this.unicodeText.graphemeLength;
7575- }
7676-7777- *segments() {
7878- const facets = this.facets || [];
7979-8080- if (facets.length == 0) {
8181- yield new RichTextSegment(this.unicodeText.utf16);
8282- return;
8383- }
8484-8585- let textCursor = 0;
8686- let facetCursor = 0;
8787-8888- do {
8989- const currFacet = facets[facetCursor];
9090-9191- if (textCursor < currFacet.index.byteStart) {
9292- yield new RichTextSegment(this.unicodeText.slice(textCursor, currFacet.index.byteStart));
9393- } else if (textCursor > currFacet.index.byteStart) {
9494- facetCursor++;
9595- continue;
9696- }
9797-9898- if (currFacet.index.byteStart < currFacet.index.byteEnd) {
9999- const subtext = this.unicodeText.slice(currFacet.index.byteStart, currFacet.index.byteEnd);
100100-101101- if (subtext.trim().length == 0) {
102102- yield new RichTextSegment(subtext);
103103- } else {
104104- yield new RichTextSegment(subtext, currFacet);
105105- }
106106- }
107107-108108- textCursor = currFacet.index.byteEnd;
109109- facetCursor++;
110110-111111- } while (facetCursor < facets.length);
112112-113113- if (textCursor < this.unicodeText.length) {
114114- yield new RichTextSegment(this.unicodeText.slice(textCursor, this.unicodeText.length));
115115- }
116116- }
117117-}
118118-119119-120120-// packages/api/src/rich-text/unicode.ts
121121-122122-/**
123123- * Javascript uses utf16-encoded strings while most environments and specs
124124- * have standardized around utf8 (including JSON).
125125- *
126126- * After some lengthy debated we decided that richtext facets need to use
127127- * utf8 indices. This means we need tools to convert indices between utf8
128128- * and utf16, and that's precisely what this library handles.
129129- */
130130-131131-class UnicodeString {
132132- static encoder = new TextEncoder();
133133- static decoder = new TextDecoder();
134134- static segmenter = window.Intl && Intl.Segmenter && new Intl.Segmenter();
135135-136136- /** @param {string} utf16 */
137137- constructor(utf16) {
138138- this.utf16 = utf16;
139139- this.utf8 = UnicodeString.encoder.encode(utf16);
140140- }
141141-142142- /** @returns {number} */
143143- get length() {
144144- return this.utf8.byteLength;
145145- }
146146-147147- /** @returns {number} */
148148- get graphemeLength() {
149149- return Array.from(UnicodeString.segmenter.segment(this.utf16)).length;
150150- }
151151-152152- /** @param {number} start, @param {number} end, @returns {string} */
153153- slice(start, end) {
154154- return UnicodeString.decoder.decode(this.utf8.slice(start, end));
155155- }
156156-157157- /** @returns {string} */
158158- toString() {
159159- return this.utf16;
160160- }
161161-}
+209
lib/rich_text_lite.ts
···11+/*
22+ Extracted from https://github.com/bluesky-social/atproto/
33+44+ Copyright (c) 2022-2024 Bluesky PBC, and Contributors
55+ MIT License
66+77+ Permission is hereby granted, free of charge, to any person obtaining a copy
88+ of this software and associated documentation files (the "Software"), to deal
99+ in the Software without restriction, including without limitation the rights
1010+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1111+ copies of the Software, and to permit persons to whom the Software is
1212+ furnished to do so, subject to the following conditions:
1313+1414+ The above copyright notice and this permission notice shall be included in all
1515+ copies or substantial portions of the Software.
1616+1717+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1818+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1919+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
2020+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2121+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
2222+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2323+ SOFTWARE.
2424+*/
2525+2626+// packages/api/src/rich-text/rich-text.ts
2727+2828+export interface ByteSlice {
2929+ $type?: 'app.bsky.richtext.facet#byteSlice'
3030+ byteStart: number
3131+ byteEnd: number
3232+}
3333+3434+export interface Facet {
3535+ $type?: 'app.bsky.richtext.facet'
3636+ index: ByteSlice
3737+ features: (FacetMention | FacetLink | FacetTag | { $type: string })[]
3838+}
3939+4040+export interface FacetTag {
4141+ $type?: 'app.bsky.richtext.facet#tag'
4242+ tag: string
4343+}
4444+4545+export interface FacetLink {
4646+ $type?: 'app.bsky.richtext.facet#link'
4747+ uri: string
4848+}
4949+5050+export interface FacetMention {
5151+ $type?: 'app.bsky.richtext.facet#mention'
5252+ did: string
5353+}
5454+5555+export interface RichTextProps {
5656+ text: string
5757+ facets?: Facet[] | undefined
5858+}
5959+6060+export class RichTextSegment {
6161+ constructor(public text: string, public facet?: Facet) {}
6262+6363+ get link(): FacetLink | undefined {
6464+ return this.facet?.features.find(v => v.$type === 'app.bsky.richtext.facet#link') as FacetLink
6565+ }
6666+6767+ isLink() {
6868+ return !!this.link
6969+ }
7070+7171+ get mention(): FacetMention | undefined {
7272+ return this.facet?.features.find(v => v.$type === 'app.bsky.richtext.facet#mention') as FacetMention
7373+ }
7474+7575+ isMention() {
7676+ return !!this.mention
7777+ }
7878+7979+ get tag(): FacetTag | undefined {
8080+ return this.facet?.features.find(v => v.$type === 'app.bsky.richtext.facet#tag') as FacetTag
8181+ }
8282+8383+ isTag() {
8484+ return !!this.tag
8585+ }
8686+}
8787+8888+export class RichText {
8989+ unicodeText: UnicodeString
9090+ facets?: Facet[] | undefined
9191+9292+ constructor(props: RichTextProps) {
9393+ this.unicodeText = new UnicodeString(props.text);
9494+ this.facets = props.facets;
9595+9696+ if (this.facets) {
9797+ this.facets = this.facets.filter(facetFilter).sort(facetSort)
9898+ }
9999+ }
100100+101101+ get text() {
102102+ return this.unicodeText.toString();
103103+ }
104104+105105+ get length() {
106106+ return this.unicodeText.length;
107107+ }
108108+109109+ get graphemeLength() {
110110+ return this.unicodeText.graphemeLength;
111111+ }
112112+113113+ *segments(): Generator<RichTextSegment, void, void> {
114114+ const facets = this.facets || [];
115115+116116+ if (!facets.length) {
117117+ yield new RichTextSegment(this.unicodeText.utf16);
118118+ return;
119119+ }
120120+121121+ let textCursor = 0;
122122+ let facetCursor = 0;
123123+124124+ do {
125125+ const currFacet = facets[facetCursor];
126126+127127+ if (textCursor < currFacet.index.byteStart) {
128128+ yield new RichTextSegment(this.unicodeText.slice(textCursor, currFacet.index.byteStart));
129129+ } else if (textCursor > currFacet.index.byteStart) {
130130+ facetCursor++;
131131+ continue;
132132+ }
133133+134134+ if (currFacet.index.byteStart < currFacet.index.byteEnd) {
135135+ const subtext = this.unicodeText.slice(currFacet.index.byteStart, currFacet.index.byteEnd);
136136+137137+ if (!subtext.trim()) {
138138+ // dont empty string entities
139139+ yield new RichTextSegment(subtext);
140140+ } else {
141141+ yield new RichTextSegment(subtext, currFacet);
142142+ }
143143+ }
144144+145145+ textCursor = currFacet.index.byteEnd;
146146+ facetCursor++;
147147+ } while (facetCursor < facets.length);
148148+149149+ if (textCursor < this.unicodeText.length) {
150150+ yield new RichTextSegment(this.unicodeText.slice(textCursor, this.unicodeText.length));
151151+ }
152152+ }
153153+}
154154+155155+const facetSort = (a: Facet, b: Facet) => a.index.byteStart - b.index.byteStart
156156+157157+const facetFilter = (facet: Facet) =>
158158+ // discard negative-length facets. zero-length facets are valid
159159+ facet.index.byteStart <= facet.index.byteEnd
160160+161161+162162+// packages/api/src/rich-text/unicode.ts
163163+164164+/**
165165+ * Javascript uses utf16-encoded strings while most environments and specs
166166+ * have standardized around utf8 (including JSON).
167167+ *
168168+ * After some lengthy debated we decided that richtext facets need to use
169169+ * utf8 indices. This means we need tools to convert indices between utf8
170170+ * and utf16, and that's precisely what this library handles.
171171+ */
172172+173173+const encoder = new TextEncoder()
174174+const decoder = new TextDecoder()
175175+const segmenter = new Intl.Segmenter();
176176+177177+export const graphemeLen = (str: string): number => {
178178+ return Array.from(segmenter.segment(str)).length;
179179+}
180180+181181+export class UnicodeString {
182182+ utf16: string
183183+ utf8: Uint8Array
184184+ private _graphemeLen?: number | undefined
185185+186186+ constructor(utf16: string) {
187187+ this.utf16 = utf16;
188188+ this.utf8 = encoder.encode(utf16);
189189+ }
190190+191191+ get length() {
192192+ return this.utf8.byteLength;
193193+ }
194194+195195+ get graphemeLength() {
196196+ if (!this._graphemeLen) {
197197+ this._graphemeLen = graphemeLen(this.utf16)
198198+ }
199199+ return this._graphemeLen;
200200+ }
201201+202202+ slice(start?: number, end?: number): string {
203203+ return decoder.decode(this.utf8.slice(start, end));
204204+ }
205205+206206+ toString() {
207207+ return this.utf16;
208208+ }
209209+}
+2-2
src/components/RichTextFromFacets.svelte
···11<script lang="ts">
22- import { RichText } from '../../lib/rich_text_lite.js';
22+ import { RichText, type Facet } from '../../lib/rich_text_lite.js';
33 import { linkToHashtagPage } from '../router.js';
4455- let { text, facets }: { text: string, facets: json[] } = $props();
55+ let { text, facets }: { text: string, facets: Facet[] } = $props();
6677 let richText = $derived(new RichText({ text, facets }));
88 let segments = $derived(richText.segments());
+2-1
src/components/posts/PostBody.svelte
···11<script lang="ts">
22 import { getPostContext } from './PostComponent.svelte';
33 import { sanitizeHTML } from '../../utils.js';
44+ import { type Facet } from '../../../lib/rich_text_lite.js';
45 import RichTextFromFacets from '../RichTextFromFacets.svelte';
5667 const highlightID = 'search-results';
···5455 </div>
5556{:else}
5657 <p class="body" bind:this={bodyElement}>
5757- <RichTextFromFacets text={post.text} facets={post.facets} />
5858+ <RichTextFromFacets text={post.text} facets={post.facets as Facet[]} />
5859 </p>
5960{/if}
6061