Rust app that exports an RSS file from currently trending Bluesky topics
1use futures::future::join_all;
2use headless_chrome::Browser;
3use headless_chrome::Tab;
4use headless_chrome::protocol::cdp::Target::CreateTarget;
5use jacquard::api::app_bsky::embed::record_with_media::ViewMedia;
6use jacquard::api::app_bsky::embed::record::ViewUnionRecord;
7use jacquard::api::app_bsky::feed::get_feed::{GetFeed, GetFeedResponse};
8use jacquard::api::app_bsky::unspecced::get_trending_topics::GetTrendingTopics;
9use jacquard::api::app_bsky::feed::post::Post;
10use jacquard::api::app_bsky::feed::FeedViewPost;
11use jacquard::api::app_bsky::feed::PostView;
12use jacquard::api::app_bsky::feed::PostViewEmbed;
13use jacquard::api::app_bsky::graph::ListPurpose;
14use jacquard::api::app_bsky::graph::starterpack::Starterpack;
15use jacquard::types::string::AtUri;
16use jacquard::types::string::Handle;
17use jacquard::types::string::Language;
18use jacquard::identity::PublicResolver;
19use jacquard::types::value::from_data;
20use jacquard::xrpc::RespOutput;
21use jacquard::xrpc::XrpcExt;
22use jacquard::xrpc::XrpcResponse;
23use jacquard::DataDeserializerError;
24use lazy_static::lazy_static;
25use miette::IntoDiagnostic;
26
27mod oembed;
28pub use oembed::{EmbedResponse, EmbedType};
29
30use regex::Regex;
31use rss::CategoryBuilder;
32use rss::ChannelBuilder;
33use rss::ItemBuilder;
34use rss::GuidBuilder;
35use rss::extension::dublincore::DublinCoreExtensionBuilder;
36
37enum EmbedViewGeneric<'a, 'b> {
38 ImagesView(&'b Box<jacquard::api::app_bsky::embed::images::View<'a>>),
39 VideoView(&'b Box<jacquard::api::app_bsky::embed::video::View<'a>>),
40 ExternalView(&'b Box<jacquard::api::app_bsky::embed::external::View<'a>>),
41 Unknown(&'b jacquard::types::value::Data<'a>)
42}
43
44impl<'a, 'b> From<&'a ViewMedia<'a>> for EmbedViewGeneric<'a, 'b> {
45 fn from(item: &'a ViewMedia<'a>) -> EmbedViewGeneric<'a, 'b> {
46 match item {
47 ViewMedia::ImagesView(images) => EmbedViewGeneric::ImagesView(images),
48 ViewMedia::VideoView(video) => EmbedViewGeneric::VideoView(video),
49 ViewMedia::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext),
50 ViewMedia::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown)
51 }
52 }
53}
54
55impl<'a, 'b> From<&'a PostViewEmbed<'a>> for EmbedViewGeneric<'a, 'b> {
56 fn from(item: &'a PostViewEmbed<'a>) -> Self {
57 match item {
58 PostViewEmbed::ImagesView(images) => EmbedViewGeneric::ImagesView(images),
59 PostViewEmbed::VideoView(video) => EmbedViewGeneric::VideoView(video),
60 PostViewEmbed::ExternalView(ext) => EmbedViewGeneric::ExternalView(ext),
61 PostViewEmbed::Unknown(unknown) => EmbedViewGeneric::Unknown(unknown),
62 _ => panic!("Defined on restricted subset of enum variants, got {:?}", item)
63 }
64 }
65}
66
67async fn get_posts(maybe_browser : Option<Browser> ) -> miette::Result<()> {
68 lazy_static! {
69 static ref RESOLVER: PublicResolver = PublicResolver::default();
70 static ref PROFILE_RE: Regex = Regex::new(r"^(?P<app>https://bsky.app)?/profile/(?P<user>[^/]+)/feed/(?P<rkey>[^/?]+)").unwrap_or_else(|e| panic!("Invalid regex: {}", e));
71 static ref HTTP: reqwest::Client = reqwest::Client::new();
72 static ref API_DOMAIN: url::Url = url::Url::parse("https://public.api.bsky.app").unwrap_or_else(|e| panic!("Invalid URL: {}", e));
73 static ref O_EMBED_ENDPOINT: url::Url = url::Url::parse("https://embed.bsky.app/oembed").unwrap_or_else(|e| panic!("Invalid URL: {}", e));
74 }
75
76 // Stateless XRPC - no auth required for public feeds
77 let request = GetTrendingTopics::new().limit(14).build();
78 let response = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?;
79 let output = response.into_output()?;
80
81 let maybe_browser = &maybe_browser;
82 eprintln!("Current trending topics from Bluesky:");
83 let future_feed_items = output.topics.iter().enumerate().flat_map(|(i, topic)| {
84 PROFILE_RE.captures(&topic.link).map(|groups| {
85 let user_handle: String = groups["user"].into();
86 let rkey: String = groups["rkey"].into();
87
88 let feed_category: rss::Category = CategoryBuilder::default()
89 .name(topic.topic.clone())
90 .domain(format!("https://bsky.app/profile/{user}/feed/{rkey}", user=user_handle, rkey=rkey))
91 .build();
92
93 async move {
94 let (did, response, warnings) = RESOLVER.resolve_handle_and_doc(&Handle::new(&user_handle).unwrap()).await?;
95 if warnings.len() > 0 {
96 for warning in warnings {
97 eprintln!("\tWarning! {:?} for response {:?} when validating {}", warning, response.buffer, user_handle);
98 }
99 }
100 let new_url = format!("at://{user}/app.bsky.feed.generator/{rkey}", user=did, rkey=rkey);
101 eprintln!("{:2} Latest Posts From: {} {} ( aka {} )", i + 1, topic.topic, topic.link, new_url);
102 let feed_uri = AtUri::new_cow(new_url.into()).unwrap();
103
104 let request = GetFeed::new().feed(feed_uri).limit(10).build();
105
106 let response: XrpcResponse<GetFeed> = HTTP.xrpc(API_DOMAIN.clone()).send(&request).await?;
107 let output: RespOutput<'static, GetFeedResponse> = response.into_output()?;
108 let future_posts = output.feed.iter().enumerate().into_iter().map(|(j, item) : (usize, &FeedViewPost)| /*-> impl Future<Output = miette::Result<rss:Item>> /* why rust is lame?? */ */ {
109 let feed_category = feed_category.clone();
110 let maybe_browser = &maybe_browser;
111 async move {
112 let post_view: &PostView = &item.post;
113 // Deserialize the post record from the Data type
114 let de_post: Result<Post, DataDeserializerError> = from_data(&post_view.record);
115 let post: Post = de_post.into_diagnostic()?;
116 eprintln!("\t{:02}.(@{})\n\t{} ", j + 1, post_view.author.handle, post.text);
117
118 let post_handle = &post_view.author.handle;
119 let post_rkey = post_view.uri.path().as_ref().map(|path| path.rkey.as_ref()).flatten().unwrap().as_ref();
120
121 let post_web_url = format!("https://bsky.app/profile/{handle}/post/{rkey}", handle=post_handle, rkey=post_rkey);
122 let get_o_embed_html = async || {
123 let mut query_url = O_EMBED_ENDPOINT.clone();
124 query_url.query_pairs_mut().append_pair("url", &post_web_url);
125 let o_embed_response: reqwest::Response = HTTP.get(query_url).send().await.into_diagnostic()?;
126 Ok::<String, miette::Error>(match o_embed_response.error_for_status() {
127 Err(e) => {
128 match e.status() {
129 Some(code) => {
130 match code {
131 reqwest::StatusCode::FORBIDDEN => String::from("<h1>You must be logged in to view this content</h1>"),
132 reqwest::StatusCode::NOT_FOUND => String::from("<h1>Post not found??!</h1>"),
133 _ => format!("Unexpected HTTP status {} on error {:?}", code, e)
134 }
135 }
136 _ => format!("Unknown error: {:?}", e)
137 }
138 }
139 Ok(response) => {
140 let o_embed_bytes: jacquard::bytes::Bytes = response.bytes().await.into_diagnostic()?;
141 let o_embed_json: EmbedResponse = serde_json::from_slice::<EmbedResponse>(o_embed_bytes.as_ref()).into_diagnostic()?;
142
143 match o_embed_json.oembed_type {
144 EmbedType::Rich(rich) => rich.html,
145 embed => {
146 eprintln!("Bluesky embed server sends rich embeds, but we got: {:?}", embed);
147 String::from("<h1>Unexpected oEmbed Response??</h1>")
148 }
149 }
150 }
151 })
152 };
153
154
155
156 let o_embed_html: String = match maybe_browser {
157 Some(browser) => match (|| -> anyhow::Result<String> {
158 let embed_url = format!("https://embed.bsky.app/embed/{authority}/app.bsky.feed.post/{rkey}", authority=post_view.uri.authority(), rkey=post_rkey);
159 eprintln!("opening tab on {}", &embed_url);
160 let tab: std::sync::Arc<Tab> = browser.new_tab_with_options(CreateTarget {
161 url: embed_url,
162 width: None,
163 height: None,
164 browser_context_id: None,
165 enable_begin_frame_control: None,
166 new_window: None,
167 background: None,
168 for_tab: None
169 })?;
170 let tab = tab.wait_until_navigated()?;
171 eprintln!("!!PSA!! page finished loading");
172 let embed_div = tab.wait_for_element("div#app div")?;
173 eprintln!("!!PSA!! embed div loaded");
174 let _ = embed_div.wait_for_element("p > span")?; /* has the page *REALLY* finished rendering? */
175 let result: String = embed_div.get_content()?;
176 tab.close_target()?;
177 Ok(result)
178 })() {
179 Ok(html) => html,
180 Err(e) => {
181 eprintln!("Browser failed with error {:?}, falling back to oEmbed", e);
182 get_o_embed_html().await?
183 }
184 }
185 _ => get_o_embed_html().await?
186 };
187
188 Ok(ItemBuilder::default()
189 .title(match &post_view.author.display_name {
190 Some(display_name) => format!("Bluesky Post by {} (@{})", display_name, post_view.author.handle),
191 _ => format!("Bluesky Post by {}", post_view.author.handle)
192 })
193 .dublin_core_ext(
194 DublinCoreExtensionBuilder::default()
195 .creators(vec![format!("https://bsky.app/profile/{handle}", handle=post_view.author.handle)])
196 .languages(post.langs.unwrap_or(vec![]).into_iter().map(
197 |lang : Language|
198 String::from(lang.as_str())
199 ).collect::<Vec<String>>())
200 .build()
201 )
202 .guid(GuidBuilder::default().value(post_view.uri.as_str()).permalink(true).build())
203 .link(post_web_url)
204 .pub_date(post.created_at.as_ref().to_rfc2822())
205 .description(Some((|| -> String {
206 let describe_record = |record : &ViewUnionRecord| -> String {
207 match record {
208 ViewUnionRecord::ViewRecord(record) => {
209 let de_post: Result<Post, DataDeserializerError> = from_data(&record.value);
210 match de_post.into_diagnostic() {
211 Err(e) => {
212 eprintln!("Unable to decode embedded post: {:?}", e);
213 String::from("<<ERROR DECODING QUOTED RECORD??>>")
214 }
215 Ok(post) =>
216 format!("Quoted Bluesky Post by {}:\n{}",
217 match &record.author.display_name {
218 Some(display_name) => format!("{} (@{})", display_name, record.author.handle),
219 _ => String::from(record.author.handle.as_ref())
220 },
221 post.text)
222 }
223 },
224 ViewUnionRecord::ViewNotFound(not_found) => format!("Missing record at {}", not_found.uri),
225 ViewUnionRecord::ViewBlocked(blocked) => format!("Blocked post at {}", blocked.uri),
226 ViewUnionRecord::ViewDetached(detached) => format!("Detached quote at {}", detached.uri),
227 ViewUnionRecord::GeneratorView(feed) =>
228 format!("Algorithmic Feed {} ({})",
229 feed.display_name,
230 feed.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())),
231 ViewUnionRecord::ListView(list) => format!("{} List {} ({})", (match &list.purpose {
232 ListPurpose::AppBskyGraphDefsModlist => String::from("Moderation"),
233 ListPurpose::AppBskyGraphDefsCuratelist => String::from("Curated"),
234 ListPurpose::AppBskyGraphDefsReferencelist => String::from("Reference"),
235 ListPurpose::Other(purpose) => String::from(purpose.as_ref())
236 }), list.name, list.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())),
237 ViewUnionRecord::LabelerView(labeler) =>
238 format!("Moderation Service By {} ({})",
239 labeler.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(labeler.creator.handle.as_ref())),
240 labeler.creator.description.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::new())),
241 ViewUnionRecord::StarterPackViewBasic(starter_pack) => {
242 let de_starter_pack: Result<Starterpack, DataDeserializerError> = from_data(&starter_pack.record);
243 format!("Starter Pack {} By {}",
244 (match de_starter_pack.into_diagnostic() {
245 Err(e) => {
246 eprintln!("Unable to decode embedded starter pack: {:?}", e);
247 String::from("<<ERROR DECODING EMBEDDED RECORD??>>")
248 }
249 Ok(pack) => {
250 match &pack.description {
251 Some(desc) => format!("{} (@{})", pack.name, desc),
252 _ => String::from(pack.name)
253 }
254 }
255 }),
256 starter_pack.creator.display_name.as_ref().map(|d| String::from(d.as_ref())).unwrap_or(String::from(starter_pack.creator.handle.as_ref())))
257 }
258 ViewUnionRecord::Unknown(unknown) => format!("Unknown embed {:?}", unknown)
259 }
260 };
261
262 let describe_media = |media : EmbedViewGeneric| -> String {
263 match media {
264 EmbedViewGeneric::ImagesView(images) => {
265 images.images.iter().enumerate().map(|(i, img)|
266 match img.alt.as_ref() {
267 "" => format!("Image #{} has no alt-text.\n", i + 1),
268 alt => format!("Embedded Image #{} has alt text: {}\n", i + 1, alt)
269 }
270 ).collect::<Vec<String>>().join("\n")
271 }
272 EmbedViewGeneric::VideoView(video) => {
273 match video.alt.as_ref() {
274 Some(alt) if alt.len() > 0 => format!("Video has alt text: {}\n", alt),
275 _ => format!("Embedded Video has no alt-text.\n"),
276 }
277 }
278 EmbedViewGeneric::ExternalView(ext) => {
279 format!("External embed {} ({}):\n{}\n", ext.external.title, ext.external.uri.as_str(), ext.external.description)
280 }
281 EmbedViewGeneric::Unknown(wat) => {
282 eprintln!("What is going on: {:?}", wat);
283 String::from("Unknown embedded media was elided")
284 }
285 }
286 };
287
288 format!("{}\n\n{}", post.text, match &post_view.embed {
289 Some(embed) => {
290 match embed {
291 /* it's a bit tacky to have to rematch these three... */
292 media@(PostViewEmbed::ImagesView(_) | PostViewEmbed::VideoView(_) | PostViewEmbed::ExternalView(_)) => describe_media(media.into()),
293
294 PostViewEmbed::RecordView(rec) => describe_record(&rec.record),
295 PostViewEmbed::RecordWithMediaView(rwm) =>
296 format!("Post embeds record ({}) with media ({})",
297 describe_record(&rwm.record.record),
298 describe_media((&rwm.media).into())),
299 PostViewEmbed::Unknown(wat) => {
300 eprintln!("What is going on: {:?}", wat);
301 String::from("Unknown embed")
302 }
303 }
304 }
305 _ => {
306 String::from("")
307 }
308 })
309 // unfortunately the escaping logic in rss crate is "not smart" so ....
310 // escaping this with quick_xml::escape::escape all doesn't help get rid
311 // of CDATA spam
312 })()))
313 .category(feed_category.clone())
314 .content(o_embed_html)
315 .build())
316 }
317 });
318 let posts = join_all(future_posts).await;
319 Ok::<_, Box<dyn std::error::Error>>(posts.into_iter().filter_map(|result : miette::Result<rss::Item> |
320 match result {
321 Ok(item) => Some(item),
322 Err(e) => {
323 eprintln!("Error processing record: {}", e);
324 None
325 }
326 }
327 ).into_iter().collect::<Vec<rss::Item>>())
328 }
329 })
330 });
331
332 let feed_items_with_errs: Vec<Result<Vec<rss::Item>, Box<dyn std::error::Error>>> = join_all(future_feed_items).await;
333 let feed_items: Vec<rss::Item> = feed_items_with_errs.into_iter().filter_map(|result|
334 match result {
335 Ok(items) => Some(items),
336 Err(e) => {
337 eprintln!("Error processing feed: {}", e);
338 None
339 }
340 }
341 ).flatten().collect::<Vec<rss::Item>>();
342
343 let rss_channel = ChannelBuilder::default()
344 .title("Bluesky Trending Topics")
345 .link("https://public.api.bsky.app/xrpc/app.bsky.unspecced.getTrendingTopics?limit=14")
346 .description("All Currently Trending Topics on Bluesky")
347 .language(Some("en-US".into()))
348 .last_build_date(chrono::Utc::now().to_rfc2822())
349 .items(feed_items)
350 .build();
351 rss_channel.write_to(std::io::stdout()).into_diagnostic()?;
352 Ok(())
353}
354
355fn main() {
356 let _ = tokio::runtime::Builder::new_multi_thread()
357 .enable_all()
358 .build()
359 .unwrap()
360 .block_on(get_posts(match Browser::default() {
361 Ok(browser) => Some(browser),
362 Err(e) => { eprintln!("Failed to construct browser due to {:?}, will proceed in fallback mode.", e); None }
363 }));
364 }