tangled
alpha
login
or
join now
indexx.dev
/
tweets2bsky
forked from
j4ck.xyz/tweets2bsky
0
fork
atom
A simple tool which lets you scrape twitter accounts and crosspost them to bluesky accounts! Comes with a CLI and a webapp for managing profiles! Works with images/videos/link embeds/threads.
0
fork
atom
overview
issues
pulls
pipelines
feat: link non-bsky handles to twitter
Jack G
1 month ago
86015a55
60a3c3e3
+53
1 changed file
expand all
collapse all
unified
split
src
index.ts
+53
src/index.ts
···
1075
1075
return chunks;
1076
1076
}
1077
1077
1078
1078
+
function utf16IndexToUtf8Index(text: string, index: number): number {
1079
1079
+
return Buffer.byteLength(text.slice(0, index), 'utf8');
1080
1080
+
}
1081
1081
+
1082
1082
+
function rangesOverlap(startA: number, endA: number, startB: number, endB: number): boolean {
1083
1083
+
return startA < endB && startB < endA;
1084
1084
+
}
1085
1085
+
1086
1086
+
function addTwitterHandleLinkFacets(text: string, facets?: any[]): any[] | undefined {
1087
1087
+
const existingFacets = facets ?? [];
1088
1088
+
const newFacets: any[] = [];
1089
1089
+
const regex = /@([A-Za-z0-9_]{1,15})/g;
1090
1090
+
let match: RegExpExecArray | null;
1091
1091
+
1092
1092
+
while ((match = regex.exec(text))) {
1093
1093
+
const handle = match[1];
1094
1094
+
if (!handle) continue;
1095
1095
+
1096
1096
+
const atIndex = match.index;
1097
1097
+
const prevChar = atIndex > 0 ? text[atIndex - 1] : '';
1098
1098
+
if (prevChar && /[A-Za-z0-9_]/.test(prevChar)) continue;
1099
1099
+
1100
1100
+
const endIndex = atIndex + handle.length + 1;
1101
1101
+
const trailing = text.slice(endIndex);
1102
1102
+
if (trailing.startsWith('.') && /^\.[A-Za-z0-9-]+/.test(trailing)) continue;
1103
1103
+
1104
1104
+
const nextChar = endIndex < text.length ? text[endIndex] : '';
1105
1105
+
if (nextChar && /[A-Za-z0-9_]/.test(nextChar)) continue;
1106
1106
+
1107
1107
+
const byteStart = utf16IndexToUtf8Index(text, atIndex);
1108
1108
+
const byteEnd = utf16IndexToUtf8Index(text, endIndex);
1109
1109
+
1110
1110
+
const overlaps = existingFacets.some((facet) =>
1111
1111
+
rangesOverlap(byteStart, byteEnd, facet.index.byteStart, facet.index.byteEnd),
1112
1112
+
);
1113
1113
+
if (overlaps) continue;
1114
1114
+
1115
1115
+
newFacets.push({
1116
1116
+
index: { byteStart, byteEnd },
1117
1117
+
features: [
1118
1118
+
{
1119
1119
+
$type: 'app.bsky.richtext.facet#link',
1120
1120
+
uri: `https://twitter.com/${handle}`,
1121
1121
+
},
1122
1122
+
],
1123
1123
+
});
1124
1124
+
}
1125
1125
+
1126
1126
+
if (newFacets.length === 0) return facets;
1127
1127
+
return [...existingFacets, ...newFacets].sort((a, b) => a.index.byteStart - b.index.byteStart);
1128
1128
+
}
1129
1129
+
1078
1130
// Simple p-limit implementation for concurrency control
1079
1131
const pLimit = (concurrency: number) => {
1080
1132
const queue: (() => Promise<void>)[] = [];
···
1670
1722
1671
1723
const rt = new RichText({ text: chunk });
1672
1724
await rt.detectFacets(agent);
1725
1725
+
rt.facets = addTwitterHandleLinkFacets(rt.text, rt.facets);
1673
1726
const detectedLangs = detectLanguage(chunk);
1674
1727
1675
1728
// Add offset for split chunks to ensure correct ordering/threading