tangled
alpha
login
or
join now
nonbinary.computer
/
jacquard
80
fork
atom
A better Rust ATProto crate
80
fork
atom
overview
issues
9
pulls
pipelines
resolution, incl opengraph stuff
Orual
5 months ago
e706c991
0cbf3d8e
+341
-17
4 changed files
expand all
collapse all
unified
split
Cargo.lock
Cargo.toml
crates
jacquard
Cargo.toml
src
richtext.rs
+166
Cargo.lock
···
1432
1432
]
1433
1433
1434
1434
[[package]]
1435
1435
+
name = "futf"
1436
1436
+
version = "0.1.5"
1437
1437
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1438
1438
+
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
1439
1439
+
dependencies = [
1440
1440
+
"mac",
1441
1441
+
"new_debug_unreachable",
1442
1442
+
]
1443
1443
+
1444
1444
+
[[package]]
1435
1445
name = "futures"
1436
1446
version = "0.3.31"
1437
1447
source = "registry+https://github.com/rust-lang/crates.io-index"
···
1813
1823
]
1814
1824
1815
1825
[[package]]
1826
1826
+
name = "html5ever"
1827
1827
+
version = "0.27.0"
1828
1828
+
source = "registry+https://github.com/rust-lang/crates.io-index"
1829
1829
+
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
1830
1830
+
dependencies = [
1831
1831
+
"log",
1832
1832
+
"mac",
1833
1833
+
"markup5ever",
1834
1834
+
"proc-macro2",
1835
1835
+
"quote",
1836
1836
+
"syn 2.0.106",
1837
1837
+
]
1838
1838
+
1839
1839
+
[[package]]
1816
1840
name = "http"
1817
1841
version = "1.3.1"
1818
1842
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2252
2276
"trait-variant",
2253
2277
"url",
2254
2278
"viuer",
2279
2279
+
"webpage",
2255
2280
]
2256
2281
2257
2282
[[package]]
···
2775
2800
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
2776
2801
2777
2802
[[package]]
2803
2803
+
name = "mac"
2804
2804
+
version = "0.1.1"
2805
2805
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2806
2806
+
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
2807
2807
+
2808
2808
+
[[package]]
2778
2809
name = "malloc_buf"
2779
2810
version = "0.0.6"
2780
2811
source = "registry+https://github.com/rust-lang/crates.io-index"
···
2784
2815
]
2785
2816
2786
2817
[[package]]
2818
2818
+
name = "markup5ever"
2819
2819
+
version = "0.12.1"
2820
2820
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2821
2821
+
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
2822
2822
+
dependencies = [
2823
2823
+
"log",
2824
2824
+
"phf",
2825
2825
+
"phf_codegen",
2826
2826
+
"string_cache",
2827
2827
+
"string_cache_codegen",
2828
2828
+
"tendril",
2829
2829
+
]
2830
2830
+
2831
2831
+
[[package]]
2832
2832
+
name = "markup5ever_rcdom"
2833
2833
+
version = "0.3.0"
2834
2834
+
source = "registry+https://github.com/rust-lang/crates.io-index"
2835
2835
+
checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
2836
2836
+
dependencies = [
2837
2837
+
"html5ever",
2838
2838
+
"markup5ever",
2839
2839
+
"tendril",
2840
2840
+
"xml5ever",
2841
2841
+
]
2842
2842
+
2843
2843
+
[[package]]
2787
2844
name = "match-lookup"
2788
2845
version = "0.1.1"
2789
2846
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3283
3340
checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
3284
3341
3285
3342
[[package]]
3343
3343
+
name = "phf"
3344
3344
+
version = "0.11.3"
3345
3345
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3346
3346
+
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
3347
3347
+
dependencies = [
3348
3348
+
"phf_shared",
3349
3349
+
]
3350
3350
+
3351
3351
+
[[package]]
3352
3352
+
name = "phf_codegen"
3353
3353
+
version = "0.11.3"
3354
3354
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3355
3355
+
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
3356
3356
+
dependencies = [
3357
3357
+
"phf_generator",
3358
3358
+
"phf_shared",
3359
3359
+
]
3360
3360
+
3361
3361
+
[[package]]
3362
3362
+
name = "phf_generator"
3363
3363
+
version = "0.11.3"
3364
3364
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3365
3365
+
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
3366
3366
+
dependencies = [
3367
3367
+
"phf_shared",
3368
3368
+
"rand 0.8.5",
3369
3369
+
]
3370
3370
+
3371
3371
+
[[package]]
3372
3372
+
name = "phf_shared"
3373
3373
+
version = "0.11.3"
3374
3374
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3375
3375
+
checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5"
3376
3376
+
dependencies = [
3377
3377
+
"siphasher",
3378
3378
+
]
3379
3379
+
3380
3380
+
[[package]]
3286
3381
name = "pin-project"
3287
3382
version = "1.1.10"
3288
3383
source = "registry+https://github.com/rust-lang/crates.io-index"
···
3377
3472
dependencies = [
3378
3473
"zerocopy",
3379
3474
]
3475
3475
+
3476
3476
+
[[package]]
3477
3477
+
name = "precomputed-hash"
3478
3478
+
version = "0.1.1"
3479
3479
+
source = "registry+https://github.com/rust-lang/crates.io-index"
3480
3480
+
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
3380
3481
3381
3482
[[package]]
3382
3483
name = "pretty_assertions"
···
4406
4507
]
4407
4508
4408
4509
[[package]]
4510
4510
+
name = "siphasher"
4511
4511
+
version = "1.0.1"
4512
4512
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4513
4513
+
checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d"
4514
4514
+
4515
4515
+
[[package]]
4409
4516
name = "slab"
4410
4517
version = "0.4.11"
4411
4518
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4502
4609
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
4503
4610
4504
4611
[[package]]
4612
4612
+
name = "string_cache"
4613
4613
+
version = "0.8.9"
4614
4614
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4615
4615
+
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
4616
4616
+
dependencies = [
4617
4617
+
"new_debug_unreachable",
4618
4618
+
"parking_lot",
4619
4619
+
"phf_shared",
4620
4620
+
"precomputed-hash",
4621
4621
+
"serde",
4622
4622
+
]
4623
4623
+
4624
4624
+
[[package]]
4625
4625
+
name = "string_cache_codegen"
4626
4626
+
version = "0.5.4"
4627
4627
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4628
4628
+
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
4629
4629
+
dependencies = [
4630
4630
+
"phf_generator",
4631
4631
+
"phf_shared",
4632
4632
+
"proc-macro2",
4633
4633
+
"quote",
4634
4634
+
]
4635
4635
+
4636
4636
+
[[package]]
4505
4637
name = "strsim"
4506
4638
version = "0.11.1"
4507
4639
source = "registry+https://github.com/rust-lang/crates.io-index"
···
4638
4770
"once_cell",
4639
4771
"rustix 1.1.2",
4640
4772
"windows-sys 0.61.2",
4773
4773
+
]
4774
4774
+
4775
4775
+
[[package]]
4776
4776
+
name = "tendril"
4777
4777
+
version = "0.4.3"
4778
4778
+
source = "registry+https://github.com/rust-lang/crates.io-index"
4779
4779
+
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
4780
4780
+
dependencies = [
4781
4781
+
"futf",
4782
4782
+
"mac",
4783
4783
+
"utf-8",
4641
4784
]
4642
4785
4643
4786
[[package]]
···
5431
5574
]
5432
5575
5433
5576
[[package]]
5577
5577
+
name = "webpage"
5578
5578
+
version = "2.0.1"
5579
5579
+
source = "registry+https://github.com/rust-lang/crates.io-index"
5580
5580
+
checksum = "70862efc041d46e6bbaa82bb9c34ae0596d090e86cbd14bd9e93b36ee6802eac"
5581
5581
+
dependencies = [
5582
5582
+
"html5ever",
5583
5583
+
"markup5ever_rcdom",
5584
5584
+
"serde_json",
5585
5585
+
"url",
5586
5586
+
]
5587
5587
+
5588
5588
+
[[package]]
5434
5589
name = "webpki-roots"
5435
5590
version = "1.0.3"
5436
5591
source = "registry+https://github.com/rust-lang/crates.io-index"
···
5977
6132
version = "0.6.1"
5978
6133
source = "registry+https://github.com/rust-lang/crates.io-index"
5979
6134
checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
6135
6135
+
6136
6136
+
[[package]]
6137
6137
+
name = "xml5ever"
6138
6138
+
version = "0.18.1"
6139
6139
+
source = "registry+https://github.com/rust-lang/crates.io-index"
6140
6140
+
checksum = "9bbb26405d8e919bc1547a5aa9abc95cbfa438f04844f5fdd9dc7596b748bf69"
6141
6141
+
dependencies = [
6142
6142
+
"log",
6143
6143
+
"mac",
6144
6144
+
"markup5ever",
6145
6145
+
]
5980
6146
5981
6147
[[package]]
5982
6148
name = "yansi"
+1
Cargo.toml
···
83
83
84
84
# Text processing
85
85
regex = "1.11"
86
86
+
webpage = { version = "2.0", default-features = false }
+1
crates/jacquard/Cargo.toml
···
147
147
smol_str.workspace = true
148
148
percent-encoding.workspace = true
149
149
regex.workspace = true
150
150
+
webpage.workspace = true
150
151
jose-jwk = { workspace = true, features = ["p256"] }
151
152
p256 = { workspace = true, features = ["ecdsa"] }
152
153
rand_core.workspace = true
+173
-17
crates/jacquard/src/richtext.rs
···
46
46
/// Marker type indicating some facets may need resolution (handles → DIDs)
47
47
pub struct Unresolved;
48
48
49
49
+
/// Rich text with facets (mentions, links, tags)
50
50
+
#[derive(Debug, Clone)]
51
51
+
#[cfg(feature = "api_bluesky")]
52
52
+
pub struct RichText<'a> {
53
53
+
/// The text content
54
54
+
pub text: CowStr<'a>,
55
55
+
/// Facets (mentions, links, tags)
56
56
+
pub facets: Option<Vec<Facet<'a>>>,
57
57
+
}
58
58
+
59
59
+
#[cfg(feature = "api_bluesky")]
60
60
+
impl RichText<'static> {
61
61
+
/// Entry point for parsing text with automatic facet detection
62
62
+
///
63
63
+
/// Uses default embed domains (bsky.app, deer.social) for at-URI extraction.
64
64
+
pub fn parse(text: impl Into<String>) -> RichTextBuilder<Unresolved> {
65
65
+
parse(text)
66
66
+
}
67
67
+
68
68
+
/// Entry point for manual richtext construction
69
69
+
pub fn builder() -> RichTextBuilder<Resolved> {
70
70
+
RichTextBuilder::builder()
71
71
+
}
72
72
+
}
73
73
+
49
74
/// Detected embed candidate from URL or at-URI
50
75
#[derive(Debug, Clone)]
51
76
#[cfg(feature = "api_bluesky")]
···
84
109
text: String,
85
110
facet_candidates: Vec<FacetCandidate>,
86
111
#[cfg(feature = "api_bluesky")]
87
87
-
embed_candidates: Vec<EmbedCandidate<'static>>,
112
112
+
embed_candidates: Option<Vec<EmbedCandidate<'static>>>,
88
113
_state: PhantomData<State>,
89
114
}
90
115
···
193
218
RichTextBuilder {
194
219
text: text_processed,
195
220
facet_candidates,
196
196
-
embed_candidates,
221
221
+
embed_candidates: if embed_candidates.is_empty() {
222
222
+
None
223
223
+
} else {
224
224
+
Some(embed_candidates)
225
225
+
},
197
226
_state: PhantomData,
198
227
}
199
228
}
···
237
266
text: String::new(),
238
267
facet_candidates: Vec::new(),
239
268
#[cfg(feature = "api_bluesky")]
240
240
-
embed_candidates: Vec::new(),
269
269
+
embed_candidates: None,
241
270
_state: PhantomData,
242
271
}
243
272
}
244
273
245
274
/// Add a mention by handle (transitions to Unresolved state)
246
275
pub fn mention_handle(
247
247
-
self,
276
276
+
mut self,
248
277
handle: impl AsRef<str>,
249
278
range: Option<Range<usize>>,
250
279
) -> RichTextBuilder<Unresolved> {
···
255
284
self.find_substring(&search).unwrap_or(0..0)
256
285
});
257
286
258
258
-
let mut facet_candidates = self.facet_candidates;
259
259
-
facet_candidates.push(FacetCandidate::Mention { range, did: None });
287
287
+
self.facet_candidates
288
288
+
.push(FacetCandidate::Mention { range, did: None });
260
289
261
290
RichTextBuilder {
262
291
text: self.text,
263
263
-
facet_candidates,
292
292
+
facet_candidates: self.facet_candidates,
264
293
#[cfg(feature = "api_bluesky")]
265
294
embed_candidates: self.embed_candidates,
266
295
_state: PhantomData,
···
326
355
strong_ref: Option<crate::api::com_atproto::repo::strong_ref::StrongRef<'static>>,
327
356
) -> Self {
328
357
self.embed_candidates
358
358
+
.get_or_insert_with(Vec::new)
329
359
.push(EmbedCandidate::Record { at_uri, strong_ref });
330
360
self
331
361
}
···
337
367
url: impl Into<CowStr<'static>>,
338
368
metadata: Option<ExternalMetadata<'static>>,
339
369
) -> Self {
340
340
-
self.embed_candidates.push(EmbedCandidate::External {
341
341
-
url: url.into(),
342
342
-
metadata,
343
343
-
});
370
370
+
self.embed_candidates
371
371
+
.get_or_insert_with(Vec::new)
372
372
+
.push(EmbedCandidate::External {
373
373
+
url: url.into(),
374
374
+
metadata,
375
375
+
});
344
376
self
345
377
}
346
378
···
620
652
#[cfg(feature = "api_bluesky")]
621
653
impl RichTextBuilder<Resolved> {
622
654
/// Build the richtext (sync - all facets must be resolved)
623
623
-
pub fn build(self) -> Result<(String, Option<Vec<Facet<'static>>>), RichTextError> {
655
655
+
pub fn build(self) -> Result<RichText<'static>, RichTextError> {
624
656
use std::collections::BTreeMap;
625
657
if self.facet_candidates.is_empty() {
626
626
-
return Ok((self.text, None));
658
658
+
return Ok(RichText {
659
659
+
text: CowStr::from(self.text),
660
660
+
facets: None,
661
661
+
});
627
662
}
628
663
629
664
// Sort facets by start position
···
755
790
last_end = range.end;
756
791
}
757
792
758
758
-
Ok((self.text, Some(facets.into_static())))
793
793
+
Ok(RichText {
794
794
+
text: CowStr::from(self.text),
795
795
+
facets: Some(facets.into_static()),
796
796
+
})
759
797
}
760
798
}
761
799
···
765
803
pub async fn build_async<R>(
766
804
self,
767
805
resolver: &R,
768
768
-
) -> Result<(String, Option<Vec<Facet<'static>>>), RichTextError>
806
806
+
) -> Result<RichText<'static>, RichTextError>
769
807
where
770
808
R: jacquard_identity::resolver::IdentityResolver + Sync,
771
809
{
···
775
813
use std::collections::BTreeMap;
776
814
777
815
if self.facet_candidates.is_empty() {
778
778
-
return Ok((self.text, None));
816
816
+
return Ok(RichText {
817
817
+
text: CowStr::from(self.text),
818
818
+
facets: None,
819
819
+
});
779
820
}
780
821
781
822
// Sort facets by start position
···
906
947
last_end = range.end;
907
948
}
908
949
909
909
-
Ok((self.text, Some(facets.into_static())))
950
950
+
Ok(RichText {
951
951
+
text: CowStr::from(self.text),
952
952
+
facets: Some(facets.into_static()),
953
953
+
})
910
954
}
955
955
+
956
956
+
/// Build richtext with embed resolution using HttpClient
957
957
+
///
958
958
+
/// This resolves handles to DIDs and fetches OpenGraph metadata for external links.
959
959
+
pub async fn build_with_embeds_async<C>(
960
960
+
mut self,
961
961
+
client: &C,
962
962
+
) -> Result<(RichText<'static>, Option<Vec<EmbedCandidate<'static>>>), RichTextError>
963
963
+
where
964
964
+
C: jacquard_common::http_client::HttpClient
965
965
+
+ jacquard_identity::resolver::IdentityResolver
966
966
+
+ Sync,
967
967
+
{
968
968
+
// Extract embed candidates
969
969
+
let embed_candidates = self.embed_candidates.take().unwrap_or_default();
970
970
+
971
971
+
// Build facets (resolves handles)
972
972
+
let richtext = self.build_async(client).await?;
973
973
+
974
974
+
// Now resolve embed candidates
975
975
+
let mut resolved_embeds = Vec::new();
976
976
+
977
977
+
for candidate in embed_candidates {
978
978
+
match candidate {
979
979
+
EmbedCandidate::Record { at_uri, strong_ref } => {
980
980
+
// TODO: could fetch the record to get CID for strong_ref
981
981
+
// For now, just pass through
982
982
+
resolved_embeds.push(EmbedCandidate::Record { at_uri, strong_ref });
983
983
+
}
984
984
+
EmbedCandidate::External {
985
985
+
url,
986
986
+
metadata: None,
987
987
+
} => {
988
988
+
// Fetch OpenGraph metadata
989
989
+
match fetch_opengraph_metadata(client, &url).await {
990
990
+
Ok(Some(metadata)) => {
991
991
+
resolved_embeds.push(EmbedCandidate::External {
992
992
+
url,
993
993
+
metadata: Some(metadata),
994
994
+
});
995
995
+
}
996
996
+
Ok(None) | Err(_) => {
997
997
+
// If we fail to fetch metadata, include embed without metadata
998
998
+
resolved_embeds.push(EmbedCandidate::External {
999
999
+
url,
1000
1000
+
metadata: None,
1001
1001
+
});
1002
1002
+
}
1003
1003
+
}
1004
1004
+
}
1005
1005
+
other => resolved_embeds.push(other),
1006
1006
+
}
1007
1007
+
}
1008
1008
+
1009
1009
+
Ok((richtext, Some(resolved_embeds).filter(|v| !v.is_empty())))
1010
1010
+
}
1011
1011
+
}
1012
1012
+
1013
1013
+
/// Fetch OpenGraph metadata from a URL using the webpage crate
1014
1014
+
#[cfg(feature = "api_bluesky")]
1015
1015
+
async fn fetch_opengraph_metadata<C>(
1016
1016
+
client: &C,
1017
1017
+
url: &str,
1018
1018
+
) -> Result<Option<ExternalMetadata<'static>>, Box<dyn std::error::Error + Send + Sync>>
1019
1019
+
where
1020
1020
+
C: jacquard_common::http_client::HttpClient,
1021
1021
+
{
1022
1022
+
// Build HTTP GET request
1023
1023
+
let request = http::Request::builder()
1024
1024
+
.method("GET")
1025
1025
+
.uri(url)
1026
1026
+
.header("User-Agent", "jacquard/0.6")
1027
1027
+
.body(Vec::new())
1028
1028
+
.map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
1029
1029
+
1030
1030
+
// Fetch the page
1031
1031
+
let response = client
1032
1032
+
.send_http(request)
1033
1033
+
.await
1034
1034
+
.map_err(|e| Box::new(e) as Box<dyn std::error::Error + Send + Sync>)?;
1035
1035
+
1036
1036
+
// Parse HTML body
1037
1037
+
let html = String::from_utf8_lossy(response.body());
1038
1038
+
1039
1039
+
// Use webpage crate to extract OpenGraph metadata
1040
1040
+
let info = webpage::HTML::from_string(html.to_string(), Some(url.to_string()))
1041
1041
+
.ok()
1042
1042
+
.map(|html| html.opengraph);
1043
1043
+
1044
1044
+
if let Some(og) = info {
1045
1045
+
// Extract title, description, and thumbnail
1046
1046
+
1047
1047
+
use jacquard_common::cowstr::ToCowStr;
1048
1048
+
let title = og.properties.get("title").map(|s| s.to_cowstr());
1049
1049
+
1050
1050
+
let description = og.properties.get("description").map(|s| s.to_cowstr());
1051
1051
+
1052
1052
+
let thumbnail = og.images.first().map(|img| CowStr::from(img.url.clone()));
1053
1053
+
1054
1054
+
// Only return metadata if we have at least a title
1055
1055
+
if let Some(title) = title {
1056
1056
+
return Ok(Some(ExternalMetadata {
1057
1057
+
title: title.into_static(),
1058
1058
+
description: description
1059
1059
+
.unwrap_or_else(|| CowStr::new_static(""))
1060
1060
+
.into_static(),
1061
1061
+
thumbnail: thumbnail.into_static(),
1062
1062
+
}));
1063
1063
+
}
1064
1064
+
}
1065
1065
+
1066
1066
+
Ok(None)
911
1067
}