a digital entity named phi that roams bsky
1"""Tests for rich text URL parsing, including bare domain URLs."""
2
3from bot.core.rich_text import parse_urls
4
5
6def test_full_url():
7 facets = parse_urls("check out https://example.com/path")
8 assert len(facets) == 1
9 assert facets[0]["features"][0]["uri"] == "https://example.com/path"
10
11
12def test_bare_domain_url():
13 facets = parse_urls("check out cnbc.com/2025/markets")
14 assert len(facets) == 1
15 assert facets[0]["features"][0]["uri"] == "https://cnbc.com/2025/markets"
16
17
18def test_bare_domain_no_path():
19 facets = parse_urls("visit example.com")
20 assert len(facets) == 1
21 assert facets[0]["features"][0]["uri"] == "https://example.com"
22
23
24def test_full_url_not_duplicated():
25 """Full https:// URL should produce exactly one facet, not a bare URL duplicate."""
26 facets = parse_urls("see https://cnbc.com/path for details")
27 assert len(facets) == 1
28 assert facets[0]["features"][0]["uri"] == "https://cnbc.com/path"
29
30
31def test_mixed_full_and_bare():
32 facets = parse_urls("https://a.com and also b.org/page")
33 assert len(facets) == 2
34 uris = {f["features"][0]["uri"] for f in facets}
35 assert uris == {"https://a.com", "https://b.org/page"}
36
37
38def test_byte_positions_bare_url():
39 text = "see cnbc.com/path ok"
40 facets = parse_urls(text)
41 assert len(facets) == 1
42 start = facets[0]["index"]["byteStart"]
43 end = facets[0]["index"]["byteEnd"]
44 assert text.encode("UTF-8")[start:end] == b"cnbc.com/path"