// Isolated from Semble repo to keep Citoid mapping auditable and easy to update. export interface CitoidCreator { firstName?: string; lastName?: string; creatorType?: string; name?: string; } export interface CitoidResponse { itemType?: string; creators?: CitoidCreator[]; title?: string; date?: string; abstractNote?: string; publicationTitle?: string; websiteTitle?: string; blogTitle?: string; forumTitle?: string; repository?: string; libraryCatalog?: string; institution?: string; university?: string; publisher?: string; proceedingsTitle?: string; conferenceName?: string; series?: string; seriesTitle?: string; network?: string; programTitle?: string; distributor?: string; studio?: string; label?: string; DOI?: string; ISBN?: string; dateDecided?: string; dateEnacted?: string; issueDate?: string; filingDate?: string; } export enum SembleUrlType { LINK = "link", ARTICLE = "article", BOOK = "book", RESEARCH = "research", AUDIO = "audio", VIDEO = "video", SOCIAL = "social", SOFTWARE = "software" } export interface SembleUrlMetadata { title?: string; description?: string; author?: string; publishedDate?: Date; siteName?: string; type?: SembleUrlType; doi?: string; isbn?: string; } export enum CitoidUrlTypes { ARTWORK = "artwork", AUDIO_RECORDING = "audioRecording", BILL = "bill", BLOG_POST = "blogPost", BOOK = "book", BOOK_SECTION = "bookSection", CASE = "case", CONFERENCE_PAPER = "conferencePaper", DATASET = "dataset", DICTIONARY_ENTRY = "dictionaryEntry", DOCUMENT = "document", EMAIL = "email", ENCYCLOPEDIA_ARTICLE = "encyclopediaArticle", FILM = "film", FORUM_POST = "forumPost", HEARING = "hearing", INSTANT_MESSAGE = "instantMessage", INTERVIEW = "interview", JOURNAL_ARTICLE = "journalArticle", LETTER = "letter", MAGAZINE_ARTICLE = "magazineArticle", MANUSCRIPT = "manuscript", MAP = "map", NEWSPAPER_ARTICLE = "newspaperArticle", NOTE = "note", PATENT = "patent", PODCAST = "podcast", PREPRINT = "preprint", PRESENTATION = "presentation", RADIO_BROADCAST = "radioBroadcast", REPORT = "report", COMPUTER_PROGRAM = "computerProgram", STANDARD = "standard", STATUTE = "statute", THESIS = "thesis", TV_BROADCAST = "tvBroadcast", VIDEO_RECORDING = "videoRecording", WEBPAGE = "webpage" } const citoidToSembleType: Record = { [CitoidUrlTypes.ARTWORK]: SembleUrlType.LINK, [CitoidUrlTypes.AUDIO_RECORDING]: SembleUrlType.AUDIO, [CitoidUrlTypes.BILL]: SembleUrlType.LINK, [CitoidUrlTypes.BLOG_POST]: SembleUrlType.ARTICLE, [CitoidUrlTypes.BOOK]: SembleUrlType.BOOK, [CitoidUrlTypes.BOOK_SECTION]: SembleUrlType.BOOK, [CitoidUrlTypes.CASE]: SembleUrlType.LINK, [CitoidUrlTypes.CONFERENCE_PAPER]: SembleUrlType.RESEARCH, [CitoidUrlTypes.DATASET]: SembleUrlType.RESEARCH, [CitoidUrlTypes.DICTIONARY_ENTRY]: SembleUrlType.LINK, [CitoidUrlTypes.DOCUMENT]: SembleUrlType.LINK, [CitoidUrlTypes.EMAIL]: SembleUrlType.LINK, [CitoidUrlTypes.ENCYCLOPEDIA_ARTICLE]: SembleUrlType.ARTICLE, [CitoidUrlTypes.FILM]: SembleUrlType.VIDEO, [CitoidUrlTypes.FORUM_POST]: SembleUrlType.SOCIAL, [CitoidUrlTypes.HEARING]: SembleUrlType.LINK, [CitoidUrlTypes.INSTANT_MESSAGE]: SembleUrlType.LINK, [CitoidUrlTypes.INTERVIEW]: SembleUrlType.LINK, [CitoidUrlTypes.JOURNAL_ARTICLE]: SembleUrlType.RESEARCH, [CitoidUrlTypes.LETTER]: SembleUrlType.LINK, [CitoidUrlTypes.MAGAZINE_ARTICLE]: SembleUrlType.ARTICLE, [CitoidUrlTypes.MANUSCRIPT]: SembleUrlType.RESEARCH, [CitoidUrlTypes.MAP]: SembleUrlType.LINK, [CitoidUrlTypes.NEWSPAPER_ARTICLE]: SembleUrlType.ARTICLE, [CitoidUrlTypes.NOTE]: SembleUrlType.LINK, [CitoidUrlTypes.PATENT]: SembleUrlType.LINK, [CitoidUrlTypes.PODCAST]: SembleUrlType.AUDIO, [CitoidUrlTypes.PREPRINT]: SembleUrlType.RESEARCH, [CitoidUrlTypes.PRESENTATION]: SembleUrlType.RESEARCH, [CitoidUrlTypes.RADIO_BROADCAST]: SembleUrlType.AUDIO, [CitoidUrlTypes.REPORT]: SembleUrlType.RESEARCH, [CitoidUrlTypes.COMPUTER_PROGRAM]: SembleUrlType.SOFTWARE, [CitoidUrlTypes.STANDARD]: SembleUrlType.LINK, [CitoidUrlTypes.STATUTE]: SembleUrlType.LINK, [CitoidUrlTypes.THESIS]: SembleUrlType.RESEARCH, [CitoidUrlTypes.TV_BROADCAST]: SembleUrlType.VIDEO, [CitoidUrlTypes.VIDEO_RECORDING]: SembleUrlType.VIDEO, [CitoidUrlTypes.WEBPAGE]: SembleUrlType.LINK }; export function mapCitoidUrlType(citoidType?: string): SembleUrlType { if (!citoidType) return SembleUrlType.LINK; const normalizedType = citoidType as CitoidUrlTypes; return citoidToSembleType[normalizedType] || SembleUrlType.LINK; } export function buildSembleMetadataFromCitoid(data: CitoidResponse): SembleUrlMetadata { return { title: data.title, description: data.abstractNote, author: extractAuthors(data.creators), publishedDate: extractPublishedDate(data), siteName: determineSiteName(data), type: mapCitoidUrlType(data.itemType), doi: data.DOI, isbn: data.ISBN }; } function extractAuthors(creators?: CitoidCreator[]): string | undefined { if (!creators || creators.length === 0) return undefined; const authorCreators = creators.filter(creator => creator.creatorType === "author"); if (authorCreators.length > 0) { return joinAuthors(authorCreators); } const primaryTypes = [ "artist", "performer", "director", "podcaster", "cartographer", "programmer", "presenter", "sponsor", "inventor", "interviewee", "bookAuthor", "editor", "contributor" ]; for (const type of primaryTypes) { const creatorsByType = creators.filter(entry => entry.creatorType === type); if (creatorsByType.length > 0) { return joinAuthors(creatorsByType); } } return joinAuthors(creators); } function joinAuthors(creators: CitoidCreator[]): string | undefined { const formatted = creators.map(formatAuthor).filter(Boolean); if (formatted.length === 0) return undefined; return formatted.join(", "); } function formatAuthor(creator: CitoidCreator): string { if (creator.name) return creator.name; if (creator.firstName && creator.lastName) { return `${creator.firstName} ${creator.lastName}`; } return creator.lastName || creator.firstName || ""; } function determineSiteName(data: CitoidResponse): string | undefined { const itemType = data.itemType; switch (itemType) { case CitoidUrlTypes.JOURNAL_ARTICLE: case CitoidUrlTypes.MAGAZINE_ARTICLE: case CitoidUrlTypes.NEWSPAPER_ARTICLE: return data.publicationTitle || data.publisher; case CitoidUrlTypes.BLOG_POST: return data.blogTitle || data.websiteTitle; case CitoidUrlTypes.FORUM_POST: return data.forumTitle; case CitoidUrlTypes.WEBPAGE: return data.websiteTitle; case CitoidUrlTypes.BOOK: case CitoidUrlTypes.BOOK_SECTION: return data.publisher || data.series; case CitoidUrlTypes.CONFERENCE_PAPER: return data.proceedingsTitle || data.conferenceName || data.publisher; case CitoidUrlTypes.THESIS: return data.university || data.institution; case CitoidUrlTypes.REPORT: return data.institution || data.publisher; case CitoidUrlTypes.DATASET: case CitoidUrlTypes.PREPRINT: return data.repository || data.institution; case CitoidUrlTypes.PODCAST: return data.seriesTitle || data.network; case CitoidUrlTypes.TV_BROADCAST: case CitoidUrlTypes.RADIO_BROADCAST: return data.network || data.programTitle; case CitoidUrlTypes.FILM: case CitoidUrlTypes.VIDEO_RECORDING: return data.distributor || data.studio; case CitoidUrlTypes.AUDIO_RECORDING: return data.label || data.publisher; default: return ( data.publicationTitle || data.websiteTitle || data.blogTitle || data.forumTitle || data.repository || data.libraryCatalog || data.institution || data.university || data.publisher ); } } function extractPublishedDate(data: CitoidResponse): Date | undefined { let dateString: string | undefined; switch (data.itemType) { case CitoidUrlTypes.CASE: dateString = data.dateDecided || data.date; break; case CitoidUrlTypes.STATUTE: dateString = data.dateEnacted || data.date; break; case CitoidUrlTypes.PATENT: dateString = data.issueDate || data.filingDate || data.date; break; default: dateString = data.date; break; } return dateString ? parseDate(dateString) : undefined; } function parseDate(dateString: string): Date | undefined { if (/^\d{4}-\d{2}-\d{2}$/.test(dateString)) { const parsed = new Date(`${dateString}T00:00:00.000Z`); return Number.isNaN(parsed.getTime()) ? undefined : parsed; } if (/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}/.test(dateString)) { const parsed = new Date(dateString); return Number.isNaN(parsed.getTime()) ? undefined : parsed; } if (/^\d{4}$/.test(dateString)) { const parsed = new Date(`${dateString}-01-01T00:00:00.000Z`); return Number.isNaN(parsed.getTime()) ? undefined : parsed; } if (/^\d{4}-\d{2}$/.test(dateString)) { const parsed = new Date(`${dateString}-01T00:00:00.000Z`); return Number.isNaN(parsed.getTime()) ? undefined : parsed; } const parsed = new Date(dateString); return Number.isNaN(parsed.getTime()) ? undefined : parsed; }