···11import {extractHtmlMeta} from '../../src/lib/extractHtmlMeta'
22import {exampleComHtml} from './__mocks__/exampleComHtml'
33import {youtubeHTML} from './__mocks__/youtubeHtml'
44+import {tiktokHtml} from './__mocks__/tiktokHtml'
4556describe('extractHtmlMeta', () => {
67 const cases = [
···5657 expect(output).toEqual(expectedOutput)
5758 })
58596060+ it('extracts title and description from a Tiktok HTML page', () => {
6161+ const input = tiktokHtml
6262+ const expectedOutput = {
6363+ title:
6464+ 'Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #bal... | TikTok',
6565+ description:
6666+ '5.5M Likes, 20.8K Comments. TikTok video from Power Vision Tests (@_powervision_): "Coca-Cola and Mentos! Super Reaction! #cocacola #mentos #reaction #balloon #sciencemoment #scienceexperiment #experiment #test #amazing #pvexp". оригинальный звук - Power Vision Tests.',
6767+ }
6868+ const output = extractHtmlMeta({html: input, hostname: 'tiktok.com'})
6969+ expect(output).toEqual(expectedOutput)
7070+ })
7171+5972 it('extracts title and description from a generic youtube page', () => {
6073 const input = youtubeHTML
6174 const expectedOutput = {
···6578 image: 'https://i.ytimg.com/vi/x6UITRjhijI/sddefault.jpg',
6679 }
6780 const output = extractHtmlMeta({html: input, hostname: 'youtube.com'})
8181+ expect(output).toEqual(expectedOutput)
8282+ })
8383+8484+ it('extracts username from the url a twitter profile page', () => {
8585+ const expectedOutput = {
8686+ title: '@bluesky on Twitter',
8787+ }
8888+ const output = extractHtmlMeta({
8989+ hostname: 'twitter.com',
9090+ pathname: '/bluesky',
9191+ })
9292+ expect(output).toEqual(expectedOutput)
9393+ })
9494+9595+ it('extracts username from the url a tweet', () => {
9696+ const expectedOutput = {
9797+ title: 'Tweet by @bluesky',
9898+ }
9999+ const output = extractHtmlMeta({
100100+ hostname: 'twitter.com',
101101+ pathname: '/bluesky/status/1582437529969917953',
102102+ })
103103+ expect(output).toEqual(expectedOutput)
104104+ })
105105+106106+ it("does not extract username from the url when it's not a tweet or profile page", () => {
107107+ const expectedOutput = {
108108+ title: 'Twitter',
109109+ }
110110+ const output = extractHtmlMeta({
111111+ hostname: 'twitter.com',
112112+ pathname: '/i/articles/follows/-1675653703?time_window=24',
113113+ })
68114 expect(output).toEqual(expectedOutput)
69115 })
70116})
+8-2
src/lib/extractHtmlMeta.ts
···11+import {extractTwitterMeta} from './extractTwitterMeta'
12import {extractYoutubeMeta} from './extractYoutubeMeta'
2334interface ExtractHtmlMetaInput {
45 html: string
56 hostname?: string
77+ pathname?: string
68}
79810export const extractHtmlMeta = ({
911 html,
1012 hostname,
1313+ pathname,
1114}: ExtractHtmlMetaInput): Record<string, string> => {
1212- const htmlTitleRegex = /<title>([^<]+)<\/title>/i
1515+ const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i
13161417 let res: Record<string, string> = {}
1518···56595760 const isYoutubeUrl =
5861 hostname?.includes('youtube.') || hostname?.includes('youtu.be')
6262+ const isTwitterUrl = hostname?.includes('twitter.')
6363+ // Workaround for some websites not having a title or description in the meta tags in the initial serve
5964 if (isYoutubeUrl) {
6060- // Workaround for Youtube not having a title in the meta tags
6165 res = {...res, ...extractYoutubeMeta(html)}
6666+ } else if (isTwitterUrl) {
6767+ res = {...extractTwitterMeta({pathname})}
6268 }
63696470 return res
+20
src/lib/extractTwitterMeta.ts
···11+export const extractTwitterMeta = ({
22+ pathname,
33+}: {
44+ pathname: string
55+}): Record<string, string> => {
66+ const res = {title: 'Twitter'}
77+ const parsedPathname = pathname.split('/')
88+ if (parsedPathname.length <= 1 || parsedPathname[1].length <= 1) {
99+ // Excluding one letter usernames as they're reserved by twitter for things like cases like twitter.com/i/articles/follows/-1675653703
1010+ return res
1111+ }
1212+ const username = parsedPathname?.[1]
1313+ const isUserProfile = parsedPathname?.length === 2
1414+1515+ res.title = isUserProfile
1616+ ? `@${username} on Twitter`
1717+ : `Tweet by @${username}`
1818+1919+ return res
2020+}