use futures::future::join_all; use headless_chrome::Browser; use headless_chrome::Tab; use headless_chrome::protocol::cdp::Target::CreateTarget; use jacquard::api::app_bsky::embed::record_with_media::ViewMedia; use jacquard::api::app_bsky::embed::record::ViewUnionRecord; use jacquard::api::app_bsky::feed::get_feed::{GetFeed, GetFeedResponse}; use jacquard::api::app_bsky::unspecced::get_trending_topics::GetTrendingTopics; use jacquard::api::app_bsky::feed::post::Post; use jacquard::api::app_bsky::feed::FeedViewPost; use jacquard::api::app_bsky::feed::PostView; use jacquard::api::app_bsky::feed::PostViewEmbed; use jacquard::api::app_bsky::graph::ListPurpose; use jacquard::api::app_bsky::graph::starterpack::Starterpack; use jacquard::types::string::AtUri; use jacquard::types::string::Handle; use jacquard::types::string::Language; use jacquard::identity::PublicResolver; use jacquard::types::value::from_data; use jacquard::xrpc::RespOutput; use jacquard::xrpc::XrpcExt; use jacquard::xrpc::XrpcResponse; use jacquard::DataDeserializerError; use lazy_static::lazy_static; use miette::IntoDiagnostic; mod oembed; pub use oembed::{EmbedResponse, EmbedType}; use regex::Regex; use rss::CategoryBuilder; use rss::ChannelBuilder; use rss::ItemBuilder; use rss::GuidBuilder; use rss::extension::dublincore::DublinCoreExtensionBuilder; enum EmbedViewGeneric<'a, 'b> { ImagesView(&'b Box>), VideoView(&'b Box>), ExternalView(&'b Box>), Unknown(&'b jacquard::types::value::Data<'a>) } impl<'a, 'b> From<&'a ViewMedia<'a>> for EmbedViewGeneric<'a, 'b> { fn from(item: &'a ViewMedia<'a>) -> EmbedViewGeneric<'a, 'b> { match item { ViewMedia::ImagesView(images) => EmbedViewGeneric::ImagesView(images), ViewMedia::VideoView(video) => EmbedViewGeneric::VideoView(video), ViewMedia::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext), ViewMedia::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown) } } } impl<'a, 'b> From<&'a PostViewEmbed<'a>> for EmbedViewGeneric<'a, 'b> { fn from(item: &'a PostViewEmbed<'a>) -> Self { match item { PostViewEmbed::ImagesView(images) => EmbedViewGeneric::ImagesView(images), PostViewEmbed::VideoView(video) => EmbedViewGeneric::VideoView(video), PostViewEmbed::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext), PostViewEmbed::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown), _ => panic!("Defined on restricted subset of enum variants, got {:?}", item) } } } async fn get_posts(maybe_browser : Option ) -> miette::Result<()> { lazy_static! { static ref RESOLVER: PublicResolver = PublicResolver::default(); static ref PROFILE_RE: Regex = Regex::new(r"^(?Phttps://bsky.app)?/profile/(?P[^/]+)/feed/(?P[^/?]+)").unwrap_or_else(|e| panic!("Invalid regex: {}", e)); static ref HTTP: reqwest::Client = reqwest::Client::new(); static ref API_DOMAIN: url::Url = url::Url::parse("https://public.api.bsky.app").unwrap_or_else(|e| panic!("Invalid URL: {}", e)); static ref O_EMBED_ENDPOINT: url::Url = url::Url::parse("https://embed.bsky.app/oembed").unwrap_or_else(|e| panic!("Invalid URL: {}", e)); } // Stateless XRPC - no auth required for public feeds let request = GetTrendingTopics::new().limit(14).build(); let response = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?; let output = response.into_output()?; let maybe_browser = &maybe_browser; eprintln!("Current trending topics from Bluesky:"); let future_feed_items = output.topics.iter().enumerate().flat_map(|(i, topic)| { PROFILE_RE.captures(&topic.link).map(|groups| { let user_handle: String = groups["user"].into(); let rkey: String = groups["rkey"].into(); let feed_category: rss::Category = CategoryBuilder::default() .name(topic.topic.clone()) .domain(format!("https://bsky.app/profile/{user}/feed/{rkey}", user=user_handle, rkey=rkey)) .build(); async move { let (did, response, warnings) = RESOLVER.resolve_handle_and_doc(&Handle::new(&user_handle).unwrap()).await?; if warnings.len() > 0 { for warning in warnings { eprintln!("\tWarning! {:?} for response {:?} when validating {}", warning, response.buffer, user_handle); } } let new_url = format!("at://{user}/app.bsky.feed.generator/{rkey}", user=did, rkey=rkey); eprintln!("{:2} Latest Posts From: {} {} ( aka {} )", i + 1, topic.topic, topic.link, new_url); let feed_uri = AtUri::new_cow(new_url.into()).unwrap(); let request = GetFeed::new().feed(feed_uri).limit(10).build(); let response: XrpcResponse = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?; let output: RespOutput<'static, GetFeedResponse> = response.into_output()?; let future_posts = output.feed.iter().enumerate().into_iter().map(|(j, item) : (usize, &FeedViewPost)| /*-> impl Future> /* why rust is lame?? */ */ { let feed_category = feed_category.clone(); let maybe_browser = &maybe_browser; async move { let post_view: &PostView = &item.post; // Deserialize the post record from the Data type let de_post: Result = from_data(&post_view.record); let post: Post = de_post.into_diagnostic()?; eprintln!("\t{:02}.(@{})\n\t{} ", j + 1, post_view.author.handle, post.text); let post_handle = &post_view.author.handle; let post_rkey = post_view.uri.path().as_ref().map(|path| path.rkey.as_ref()).flatten().unwrap().as_ref(); let post_web_url = format!("https://bsky.app/profile/{handle}/post/{rkey}", handle=post_handle, rkey=post_rkey); let get_o_embed_html = async || { let mut query_url = O_EMBED_ENDPOINT.clone(); query_url.query_pairs_mut().append_pair("url", &post_web_url); let o_embed_response: reqwest::Response = HTTP.get(query_url).send().await.into_diagnostic()?; Ok::(match o_embed_response.error_for_status() { Err(e) => { match e.status() { Some(code) => { match code { reqwest::StatusCode::FORBIDDEN => String::from("

You must be logged in to view this content

"), reqwest::StatusCode::NOT_FOUND => String::from("

Post not found??!

"), _ => format!("Unexpected HTTP status {} on error {:?}", code, e) } } _ => format!("Unknown error: {:?}", e) } } Ok(response) => { let o_embed_bytes: jacquard::bytes::Bytes = response.bytes().await.into_diagnostic()?; let o_embed_json: EmbedResponse = serde_json::from_slice::(o_embed_bytes.as_ref()).into_diagnostic()?; match o_embed_json.oembed_type { EmbedType::Rich(rich) => rich.html, embed => { eprintln!("Bluesky embed server sends rich embeds, but we got: {:?}", embed); String::from("

Unexpected oEmbed Response??

") } } } }) }; let o_embed_html: String = match maybe_browser { Some(browser) => match (|| -> anyhow::Result { let embed_url = format!("https://embed.bsky.app/embed/{authority}/app.bsky.feed.post/{rkey}", authority=post_view.uri.authority(), rkey=post_rkey); eprintln!("opening tab on {}", &embed_url); let tab: std::sync::Arc = browser.new_tab_with_options(CreateTarget { url: embed_url, width: None, height: None, browser_context_id: None, enable_begin_frame_control: None, new_window: None, background: None, for_tab: None })?; let tab = tab.wait_until_navigated()?; eprintln!("!!PSA!! page finished loading"); let embed_div = tab.wait_for_element("div#app div")?; eprintln!("!!PSA!! embed div loaded"); let _ = embed_div.wait_for_element("p > span")?; /* has the page *REALLY* finished rendering? */ let result: String = embed_div.get_content()?; tab.close_target()?; Ok(result) })() { Ok(html) => html, Err(e) => { eprintln!("Browser failed with error {:?}, falling back to oEmbed", e); get_o_embed_html().await? } } _ => get_o_embed_html().await? }; Ok(ItemBuilder::default() .title(match &post_view.author.display_name { Some(display_name) => format!("Bluesky Post by {} (@{})", display_name, post_view.author.handle), _ => format!("Bluesky Post by {}", post_view.author.handle) }) .dublin_core_ext( DublinCoreExtensionBuilder::default() .creators(vec![format!("https://bsky.app/profile/{handle}", handle=post_view.author.handle)]) .languages(post.langs.unwrap_or(vec![]).into_iter().map( |lang : Language| String::from(lang.as_str()) ).collect::>()) .build() ) .guid(GuidBuilder::default().value(post_view.uri.as_str()).permalink(true).build()) .link(post_web_url) .pub_date(post.created_at.as_ref().to_rfc2822()) .description(Some((|| -> String { let describe_record = |record : &ViewUnionRecord| -> String { match record { ViewUnionRecord::ViewRecord(record) => { let de_post: Result = from_data(&record.value); match de_post.into_diagnostic() { Err(e) => { eprintln!("Unable to decode embedded post: {:?}", e); String::from("<>") } Ok(post) => format!("Quoted Bluesky Post by {}:\n{}", match &record.author.display_name { Some(display_name) => format!("{} (@{})", display_name, record.author.handle), _ => String::from(record.author.handle.as_ref()) }, post.text) } }, ViewUnionRecord::ViewNotFound(not_found) => format!("Missing record at {}", not_found.uri), ViewUnionRecord::ViewBlocked(blocked) => format!("Blocked post at {}", blocked.uri), ViewUnionRecord::ViewDetached(detached) => format!("Detached quote at {}", detached.uri), ViewUnionRecord::GeneratorView(feed) => format!("Algorithmic Feed {} ({})", feed.display_name, feed.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), ViewUnionRecord::ListView(list) => format!("{} List {} ({})", (match &list.purpose { ListPurpose::AppBskyGraphDefsModlist => String::from("Moderation"), ListPurpose::AppBskyGraphDefsCuratelist => String::from("Curated"), ListPurpose::AppBskyGraphDefsReferencelist => String::from("Reference"), ListPurpose::Other(purpose) => String::from(purpose.as_ref()) }), list.name, list.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), ViewUnionRecord::LabelerView(labeler) => format!("Moderation Service By {} ({})", labeler.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(labeler.creator.handle.as_ref())), labeler.creator.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), ViewUnionRecord::StarterPackViewBasic(starter_pack) => { let de_starter_pack: Result = from_data(&starter_pack.record); format!("Starter Pack {} By {}", (match de_starter_pack.into_diagnostic() { Err(e) => { eprintln!("Unable to decode embedded starter pack: {:?}", e); String::from("<>") } Ok(pack) => { match &pack.description { Some(desc) => format!("{} (@{})", pack.name, desc), _ => String::from(pack.name) } } }), starter_pack.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(starter_pack.creator.handle.as_ref()))) } ViewUnionRecord::Unknown(unknown) => format!("Unknown embed {:?}", unknown) } }; let describe_media = |media : EmbedViewGeneric| -> String { match media { EmbedViewGeneric::ImagesView(images) => { images.images.iter().enumerate().map(|(i, img)| match img.alt.as_ref() { "" => format!("Image #{} has no alt-text.\n", i + 1), alt => format!("Embedded Image #{} has alt text: {}\n", i + 1, alt) } ).collect::>().join("\n") } EmbedViewGeneric::VideoView(video) => { match video.alt.as_ref() { Some(alt) if alt.len() > 0 => format!("Video has alt text: {}\n", alt), _ => format!("Embedded Video has no alt-text.\n"), } } EmbedViewGeneric::ExternalView(ext) => { format!("External embed {} ({}):\n{}\n", ext.external.title, ext.external.uri.as_str(), ext.external.description) } EmbedViewGeneric::Unknown(wat) => { eprintln!("What is going on: {:?}", wat); String::from("Unknown embedded media was elided") } } }; format!("{}\n\n{}", post.text, match &post_view.embed { Some(embed) => { match embed { /* it's a bit tacky to have to rematch these three... */ media@(PostViewEmbed::ImagesView(_) | PostViewEmbed::VideoView(_) | PostViewEmbed::ExternalView(_)) => describe_media(media.into()), PostViewEmbed::RecordView(rec) => describe_record(&rec.record), PostViewEmbed::RecordWithMediaView(rwm) => format!("Post embeds record ({}) with media ({})", describe_record(&rwm.record.record), describe_media((&rwm.media).into())), PostViewEmbed::Unknown(wat) => { eprintln!("What is going on: {:?}", wat); String::from("Unknown embed") } } } _ => { String::from("") } }) // unfortunately the escaping logic in rss crate is "not smart" so .... // escaping this with quick_xml::escape::escape all doesn't help get rid // of CDATA spam })())) .category(feed_category.clone()) .content(o_embed_html) .build()) } }); let posts = join_all(future_posts).await; Ok::<_, Box>(posts.into_iter().filter_map(|result : miette::Result | match result { Ok(item) => Some(item), Err(e) => { eprintln!("Error processing record: {}", e); None } } ).into_iter().collect::>()) } }) }); let feed_items_with_errs: Vec, Box>> = join_all(future_feed_items).await; let feed_items: Vec = feed_items_with_errs.into_iter().filter_map(|result| match result { Ok(items) => Some(items), Err(e) => { eprintln!("Error processing feed: {}", e); None } } ).flatten().collect::>(); let rss_channel = ChannelBuilder::default() .title("Bluesky Trending Topics") .link("https://public.api.bsky.app/xrpc/app.bsky.unspecced.getTrendingTopics?limit=14") .description("All Currently Trending Topics on Bluesky") .language(Some("en-US".into())) .last_build_date(chrono::Utc::now().to_rfc2822()) .items(feed_items) .build(); rss_channel.write_to(std::io::stdout()).into_diagnostic()?; Ok(()) } fn main() { let _ = tokio::runtime::Builder::new_multi_thread() .enable_all() .build() .unwrap() .block_on(get_posts(match Browser::default() { Ok(browser) => Some(browser), Err(e) => { eprintln!("Failed to construct browser due to {:?}, will proceed in fallback mode.", e); None } })); }