Rust app that exports an RSS file from currently trending Bluesky topics
at main 364 lines 23 kB view raw
1use futures::future::join_all; 2use headless_chrome::Browser; 3use headless_chrome::Tab; 4use headless_chrome::protocol::cdp::Target::CreateTarget; 5use jacquard::api::app_bsky::embed::record_with_media::ViewMedia; 6use jacquard::api::app_bsky::embed::record::ViewUnionRecord; 7use jacquard::api::app_bsky::feed::get_feed::{GetFeed, GetFeedResponse}; 8use jacquard::api::app_bsky::unspecced::get_trending_topics::GetTrendingTopics; 9use jacquard::api::app_bsky::feed::post::Post; 10use jacquard::api::app_bsky::feed::FeedViewPost; 11use jacquard::api::app_bsky::feed::PostView; 12use jacquard::api::app_bsky::feed::PostViewEmbed; 13use jacquard::api::app_bsky::graph::ListPurpose; 14use jacquard::api::app_bsky::graph::starterpack::Starterpack; 15use jacquard::types::string::AtUri; 16use jacquard::types::string::Handle; 17use jacquard::types::string::Language; 18use jacquard::identity::PublicResolver; 19use jacquard::types::value::from_data; 20use jacquard::xrpc::RespOutput; 21use jacquard::xrpc::XrpcExt; 22use jacquard::xrpc::XrpcResponse; 23use jacquard::DataDeserializerError; 24use lazy_static::lazy_static; 25use miette::IntoDiagnostic; 26 27mod oembed; 28pub use oembed::{EmbedResponse, EmbedType}; 29 30use regex::Regex; 31use rss::CategoryBuilder; 32use rss::ChannelBuilder; 33use rss::ItemBuilder; 34use rss::GuidBuilder; 35use rss::extension::dublincore::DublinCoreExtensionBuilder; 36 37enum EmbedViewGeneric<'a, 'b> { 38 ImagesView(&'b Box<jacquard::api::app_bsky::embed::images::View<'a>>), 39 VideoView(&'b Box<jacquard::api::app_bsky::embed::video::View<'a>>), 40 ExternalView(&'b Box<jacquard::api::app_bsky::embed::external::View<'a>>), 41 Unknown(&'b jacquard::types::value::Data<'a>) 42} 43 44impl<'a, 'b> From<&'a ViewMedia<'a>> for EmbedViewGeneric<'a, 'b> { 45 fn from(item: &'a ViewMedia<'a>) -> EmbedViewGeneric<'a, 'b> { 46 match item { 47 ViewMedia::ImagesView(images) => EmbedViewGeneric::ImagesView(images), 48 ViewMedia::VideoView(video) => EmbedViewGeneric::VideoView(video), 49 ViewMedia::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext), 50 ViewMedia::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown) 51 } 52 } 53} 54 55impl<'a, 'b> From<&'a PostViewEmbed<'a>> for EmbedViewGeneric<'a, 'b> { 56 fn from(item: &'a PostViewEmbed<'a>) -> Self { 57 match item { 58 PostViewEmbed::ImagesView(images) => EmbedViewGeneric::ImagesView(images), 59 PostViewEmbed::VideoView(video) => EmbedViewGeneric::VideoView(video), 60 PostViewEmbed::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext), 61 PostViewEmbed::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown), 62 _ => panic!("Defined on restricted subset of enum variants, got {:?}", item) 63 } 64 } 65} 66 67async fn get_posts(maybe_browser : Option<Browser> ) -> miette::Result<()> { 68 lazy_static! { 69 static ref RESOLVER: PublicResolver = PublicResolver::default(); 70 static ref PROFILE_RE: Regex = Regex::new(r"^(?P<app>https://bsky.app)?/profile/(?P<user>[^/]+)/feed/(?P<rkey>[^/?]+)").unwrap_or_else(|e| panic!("Invalid regex: {}", e)); 71 static ref HTTP: reqwest::Client = reqwest::Client::new(); 72 static ref API_DOMAIN: url::Url = url::Url::parse("https://public.api.bsky.app").unwrap_or_else(|e| panic!("Invalid URL: {}", e)); 73 static ref O_EMBED_ENDPOINT: url::Url = url::Url::parse("https://embed.bsky.app/oembed").unwrap_or_else(|e| panic!("Invalid URL: {}", e)); 74 } 75 76 // Stateless XRPC - no auth required for public feeds 77 let request = GetTrendingTopics::new().limit(14).build(); 78 let response = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?; 79 let output = response.into_output()?; 80 81 let maybe_browser = &maybe_browser; 82 eprintln!("Current trending topics from Bluesky:"); 83 let future_feed_items = output.topics.iter().enumerate().flat_map(|(i, topic)| { 84 PROFILE_RE.captures(&topic.link).map(|groups| { 85 let user_handle: String = groups["user"].into(); 86 let rkey: String = groups["rkey"].into(); 87 88 let feed_category: rss::Category = CategoryBuilder::default() 89 .name(topic.topic.clone()) 90 .domain(format!("https://bsky.app/profile/{user}/feed/{rkey}", user=user_handle, rkey=rkey)) 91 .build(); 92 93 async move { 94 let (did, response, warnings) = RESOLVER.resolve_handle_and_doc(&Handle::new(&user_handle).unwrap()).await?; 95 if warnings.len() > 0 { 96 for warning in warnings { 97 eprintln!("\tWarning! {:?} for response {:?} when validating {}", warning, response.buffer, user_handle); 98 } 99 } 100 let new_url = format!("at://{user}/app.bsky.feed.generator/{rkey}", user=did, rkey=rkey); 101 eprintln!("{:2} Latest Posts From: {} {} ( aka {} )", i + 1, topic.topic, topic.link, new_url); 102 let feed_uri = AtUri::new_cow(new_url.into()).unwrap(); 103 104 let request = GetFeed::new().feed(feed_uri).limit(10).build(); 105 106 let response: XrpcResponse<GetFeed> = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?; 107 let output: RespOutput<'static, GetFeedResponse> = response.into_output()?; 108 let future_posts = output.feed.iter().enumerate().into_iter().map(|(j, item) : (usize, &FeedViewPost)| /*-> impl Future<Output = miette::Result<rss:Item>> /* why rust is lame?? */ */ { 109 let feed_category = feed_category.clone(); 110 let maybe_browser = &maybe_browser; 111 async move { 112 let post_view: &PostView = &item.post; 113 // Deserialize the post record from the Data type 114 let de_post: Result<Post, DataDeserializerError> = from_data(&post_view.record); 115 let post: Post = de_post.into_diagnostic()?; 116 eprintln!("\t{:02}.(@{})\n\t{} ", j + 1, post_view.author.handle, post.text); 117 118 let post_handle = &post_view.author.handle; 119 let post_rkey = post_view.uri.path().as_ref().map(|path| path.rkey.as_ref()).flatten().unwrap().as_ref(); 120 121 let post_web_url = format!("https://bsky.app/profile/{handle}/post/{rkey}", handle=post_handle, rkey=post_rkey); 122 let get_o_embed_html = async || { 123 let mut query_url = O_EMBED_ENDPOINT.clone(); 124 query_url.query_pairs_mut().append_pair("url", &post_web_url); 125 let o_embed_response: reqwest::Response = HTTP.get(query_url).send().await.into_diagnostic()?; 126 Ok::<String, miette::Error>(match o_embed_response.error_for_status() { 127 Err(e) => { 128 match e.status() { 129 Some(code) => { 130 match code { 131 reqwest::StatusCode::FORBIDDEN => String::from("<h1>You must be logged in to view this content</h1>"), 132 reqwest::StatusCode::NOT_FOUND => String::from("<h1>Post not found??!</h1>"), 133 _ => format!("Unexpected HTTP status {} on error {:?}", code, e) 134 } 135 } 136 _ => format!("Unknown error: {:?}", e) 137 } 138 } 139 Ok(response) => { 140 let o_embed_bytes: jacquard::bytes::Bytes = response.bytes().await.into_diagnostic()?; 141 let o_embed_json: EmbedResponse = serde_json::from_slice::<EmbedResponse>(o_embed_bytes.as_ref()).into_diagnostic()?; 142 143 match o_embed_json.oembed_type { 144 EmbedType::Rich(rich) => rich.html, 145 embed => { 146 eprintln!("Bluesky embed server sends rich embeds, but we got: {:?}", embed); 147 String::from("<h1>Unexpected oEmbed Response??</h1>") 148 } 149 } 150 } 151 }) 152 }; 153 154 155 156 let o_embed_html: String = match maybe_browser { 157 Some(browser) => match (|| -> anyhow::Result<String> { 158 let embed_url = format!("https://embed.bsky.app/embed/{authority}/app.bsky.feed.post/{rkey}", authority=post_view.uri.authority(), rkey=post_rkey); 159 eprintln!("opening tab on {}", &embed_url); 160 let tab: std::sync::Arc<Tab> = browser.new_tab_with_options(CreateTarget { 161 url: embed_url, 162 width: None, 163 height: None, 164 browser_context_id: None, 165 enable_begin_frame_control: None, 166 new_window: None, 167 background: None, 168 for_tab: None 169 })?; 170 let tab = tab.wait_until_navigated()?; 171 eprintln!("!!PSA!! page finished loading"); 172 let embed_div = tab.wait_for_element("div#app div")?; 173 eprintln!("!!PSA!! embed div loaded"); 174 let _ = embed_div.wait_for_element("p > span")?; /* has the page *REALLY* finished rendering? */ 175 let result: String = embed_div.get_content()?; 176 tab.close_target()?; 177 Ok(result) 178 })() { 179 Ok(html) => html, 180 Err(e) => { 181 eprintln!("Browser failed with error {:?}, falling back to oEmbed", e); 182 get_o_embed_html().await? 183 } 184 } 185 _ => get_o_embed_html().await? 186 }; 187 188 Ok(ItemBuilder::default() 189 .title(match &post_view.author.display_name { 190 Some(display_name) => format!("Bluesky Post by {} (@{})", display_name, post_view.author.handle), 191 _ => format!("Bluesky Post by {}", post_view.author.handle) 192 }) 193 .dublin_core_ext( 194 DublinCoreExtensionBuilder::default() 195 .creators(vec![format!("https://bsky.app/profile/{handle}", handle=post_view.author.handle)]) 196 .languages(post.langs.unwrap_or(vec![]).into_iter().map( 197 |lang : Language| 198 String::from(lang.as_str()) 199 ).collect::<Vec<String>>()) 200 .build() 201 ) 202 .guid(GuidBuilder::default().value(post_view.uri.as_str()).permalink(true).build()) 203 .link(post_web_url) 204 .pub_date(post.created_at.as_ref().to_rfc2822()) 205 .description(Some((|| -> String { 206 let describe_record = |record : &ViewUnionRecord| -> String { 207 match record { 208 ViewUnionRecord::ViewRecord(record) => { 209 let de_post: Result<Post, DataDeserializerError> = from_data(&record.value); 210 match de_post.into_diagnostic() { 211 Err(e) => { 212 eprintln!("Unable to decode embedded post: {:?}", e); 213 String::from("<<ERROR DECODING QUOTED RECORD??>>") 214 } 215 Ok(post) => 216 format!("Quoted Bluesky Post by {}:\n{}", 217 match &record.author.display_name { 218 Some(display_name) => format!("{} (@{})", display_name, record.author.handle), 219 _ => String::from(record.author.handle.as_ref()) 220 }, 221 post.text) 222 } 223 }, 224 ViewUnionRecord::ViewNotFound(not_found) => format!("Missing record at {}", not_found.uri), 225 ViewUnionRecord::ViewBlocked(blocked) => format!("Blocked post at {}", blocked.uri), 226 ViewUnionRecord::ViewDetached(detached) => format!("Detached quote at {}", detached.uri), 227 ViewUnionRecord::GeneratorView(feed) => 228 format!("Algorithmic Feed {} ({})", 229 feed.display_name, 230 feed.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), 231 ViewUnionRecord::ListView(list) => format!("{} List {} ({})", (match &list.purpose { 232 ListPurpose::AppBskyGraphDefsModlist => String::from("Moderation"), 233 ListPurpose::AppBskyGraphDefsCuratelist => String::from("Curated"), 234 ListPurpose::AppBskyGraphDefsReferencelist => String::from("Reference"), 235 ListPurpose::Other(purpose) => String::from(purpose.as_ref()) 236 }), list.name, list.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), 237 ViewUnionRecord::LabelerView(labeler) => 238 format!("Moderation Service By {} ({})", 239 labeler.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(labeler.creator.handle.as_ref())), 240 labeler.creator.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())), 241 ViewUnionRecord::StarterPackViewBasic(starter_pack) => { 242 let de_starter_pack: Result<Starterpack, DataDeserializerError> = from_data(&starter_pack.record); 243 format!("Starter Pack {} By {}", 244 (match de_starter_pack.into_diagnostic() { 245 Err(e) => { 246 eprintln!("Unable to decode embedded starter pack: {:?}", e); 247 String::from("<<ERROR DECODING EMBEDDED RECORD??>>") 248 } 249 Ok(pack) => { 250 match &pack.description { 251 Some(desc) => format!("{} (@{})", pack.name, desc), 252 _ => String::from(pack.name) 253 } 254 } 255 }), 256 starter_pack.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(starter_pack.creator.handle.as_ref()))) 257 } 258 ViewUnionRecord::Unknown(unknown) => format!("Unknown embed {:?}", unknown) 259 } 260 }; 261 262 let describe_media = |media : EmbedViewGeneric| -> String { 263 match media { 264 EmbedViewGeneric::ImagesView(images) => { 265 images.images.iter().enumerate().map(|(i, img)| 266 match img.alt.as_ref() { 267 "" => format!("Image #{} has no alt-text.\n", i + 1), 268 alt => format!("Embedded Image #{} has alt text: {}\n", i + 1, alt) 269 } 270 ).collect::<Vec<String>>().join("\n") 271 } 272 EmbedViewGeneric::VideoView(video) => { 273 match video.alt.as_ref() { 274 Some(alt) if alt.len() > 0 => format!("Video has alt text: {}\n", alt), 275 _ => format!("Embedded Video has no alt-text.\n"), 276 } 277 } 278 EmbedViewGeneric::ExternalView(ext) => { 279 format!("External embed {} ({}):\n{}\n", ext.external.title, ext.external.uri.as_str(), ext.external.description) 280 } 281 EmbedViewGeneric::Unknown(wat) => { 282 eprintln!("What is going on: {:?}", wat); 283 String::from("Unknown embedded media was elided") 284 } 285 } 286 }; 287 288 format!("{}\n\n{}", post.text, match &post_view.embed { 289 Some(embed) => { 290 match embed { 291 /* it's a bit tacky to have to rematch these three... */ 292 media@(PostViewEmbed::ImagesView(_) | PostViewEmbed::VideoView(_) | PostViewEmbed::ExternalView(_)) => describe_media(media.into()), 293 294 PostViewEmbed::RecordView(rec) => describe_record(&rec.record), 295 PostViewEmbed::RecordWithMediaView(rwm) => 296 format!("Post embeds record ({}) with media ({})", 297 describe_record(&rwm.record.record), 298 describe_media((&rwm.media).into())), 299 PostViewEmbed::Unknown(wat) => { 300 eprintln!("What is going on: {:?}", wat); 301 String::from("Unknown embed") 302 } 303 } 304 } 305 _ => { 306 String::from("") 307 } 308 }) 309 // unfortunately the escaping logic in rss crate is "not smart" so .... 310 // escaping this with quick_xml::escape::escape all doesn't help get rid 311 // of CDATA spam 312 })())) 313 .category(feed_category.clone()) 314 .content(o_embed_html) 315 .build()) 316 } 317 }); 318 let posts = join_all(future_posts).await; 319 Ok::<_, Box<dyn std::error::Error>>(posts.into_iter().filter_map(|result : miette::Result<rss::Item> | 320 match result { 321 Ok(item) => Some(item), 322 Err(e) => { 323 eprintln!("Error processing record: {}", e); 324 None 325 } 326 } 327 ).into_iter().collect::<Vec<rss::Item>>()) 328 } 329 }) 330 }); 331 332 let feed_items_with_errs: Vec<Result<Vec<rss::Item>, Box<dyn std::error::Error>>> = join_all(future_feed_items).await; 333 let feed_items: Vec<rss::Item> = feed_items_with_errs.into_iter().filter_map(|result| 334 match result { 335 Ok(items) => Some(items), 336 Err(e) => { 337 eprintln!("Error processing feed: {}", e); 338 None 339 } 340 } 341 ).flatten().collect::<Vec<rss::Item>>(); 342 343 let rss_channel = ChannelBuilder::default() 344 .title("Bluesky Trending Topics") 345 .link("https://public.api.bsky.app/xrpc/app.bsky.unspecced.getTrendingTopics?limit=14") 346 .description("All Currently Trending Topics on Bluesky") 347 .language(Some("en-US".into())) 348 .last_build_date(chrono::Utc::now().to_rfc2822()) 349 .items(feed_items) 350 .build(); 351 rss_channel.write_to(std::io::stdout()).into_diagnostic()?; 352 Ok(()) 353} 354 355fn main() { 356 let _ = tokio::runtime::Builder::new_multi_thread() 357 .enable_all() 358 .build() 359 .unwrap() 360 .block_on(get_posts(match Browser::default() { 361 Ok(browser) => Some(browser), 362 Err(e) => { eprintln!("Failed to construct browser due to {:?}, will proceed in fallback mode.", e); None } 363 })); 364 }