tangled
alpha
login
or
join now
vielle.dev
/
meview
0
fork
atom
this repo has no description
0
fork
atom
overview
issues
pulls
pipelines
ingest new events to database
vielle.dev
2 months ago
29d5fa5e
548d6489
verified
This commit was signed with the committer's
known signature
.
vielle.dev
SSH Key Fingerprint:
SHA256:/4bvxqoEh9iMdjAPgcgAgXKZZQTROL3ULiPt6nH9RSs=
+313
-3
4 changed files
expand all
collapse all
unified
split
src
ingest
ingest.rs
mod.rs
queue.rs
main.rs
+196
src/ingest/ingest.rs
···
1
1
+
use std::{collections::VecDeque, sync::Arc};
2
2
+
3
3
+
use futures_util::future;
4
4
+
use ipld_core::ipld::Ipld;
5
5
+
use jacquard::{
6
6
+
api::com_atproto::sync::subscribe_repos::{Commit, SubscribeReposMessage, Sync},
7
7
+
types::string::Handle,
8
8
+
};
9
9
+
use jacquard_repo::{BlockStore, MemoryBlockStore};
10
10
+
use sqlx::{Pool, Postgres, query};
11
11
+
use thiserror::Error;
12
12
+
use tokio::{sync::Mutex, task::JoinHandle};
13
13
+
14
14
+
use crate::{backfill::backfill, utils::ipld_json::ipld_to_json_value};
15
15
+
16
16
+
trait Ingest {
17
17
+
type Error;
18
18
+
async fn ingest(&self, conn: Arc<Pool<Postgres>>) -> Result<(), Self::Error>;
19
19
+
}
20
20
+
21
21
+
#[derive(Debug, Error)]
22
22
+
enum CommitError {
23
23
+
#[error("Error parsing #commit event: {}", .0)]
24
24
+
ParseCarBytes(#[from] jacquard_repo::RepoError),
25
25
+
}
26
26
+
27
27
+
impl Ingest for Commit<'_> {
28
28
+
type Error = CommitError;
29
29
+
async fn ingest(&self, conn: Arc<Pool<Postgres>>) -> Result<(), Self::Error> {
30
30
+
let car = jacquard_repo::car::parse_car_bytes(&self.blocks).await?;
31
31
+
let storage = Arc::new(MemoryBlockStore::new_from_blocks(car.blocks));
32
32
+
33
33
+
let ops = future::join_all(self.ops.clone().into_iter().map(|op| async {
34
34
+
// get block data by cid, or None if errors/not found
35
35
+
if let Some(cid) = &op.cid {
36
36
+
if let Ok(cid) = cid.0.to_ipld()
37
37
+
&& let Ok(contents) = storage.get(&cid).await
38
38
+
&& let Some(contents) = contents
39
39
+
&& let Ok(val) = serde_ipld_dagcbor::from_slice::<Ipld>(&contents)
40
40
+
{
41
41
+
(op, ipld_to_json_value(&val).ok())
42
42
+
} else {
43
43
+
(op, None)
44
44
+
}
45
45
+
} else {
46
46
+
(op, None)
47
47
+
}
48
48
+
}))
49
49
+
.await;
50
50
+
51
51
+
future::join_all(ops.into_iter().map(|(op, val)| async {
52
52
+
let mut path = op.path.split("/");
53
53
+
let Some(collection) = path.next() else {
54
54
+
eprintln!("Invalid path ({})", op.path.as_str());
55
55
+
return;
56
56
+
};
57
57
+
let Some(rkey) = path.next() else {
58
58
+
eprintln!("Invalid path ({})", op.path.as_str());
59
59
+
return;
60
60
+
};
61
61
+
// assert the path is only collection/rkey
62
62
+
if path.next().is_some() {
63
63
+
eprintln!("Invalid path ({})", op.path.as_str());
64
64
+
return;
65
65
+
};
66
66
+
match op.action.clone().as_str() {
67
67
+
"create" => {
68
68
+
let Some(cid) = op.cid.map(|x| x.0.to_string()) else {
69
69
+
eprintln!("Missing cid for create {}/{}", collection, rkey);
70
70
+
return;
71
71
+
};
72
72
+
let Some(val) = val else {
73
73
+
eprintln!("Missing value for create {}/{}/{}", collection, rkey, cid);
74
74
+
return;
75
75
+
};
76
76
+
if let Err(err) = query!(
77
77
+
"INSERT INTO records (collection, rkey, cid, record)
78
78
+
VALUES ($1, $2, $3, $4)",
79
79
+
collection,
80
80
+
rkey,
81
81
+
cid,
82
82
+
val
83
83
+
)
84
84
+
.execute(&*conn)
85
85
+
.await
86
86
+
{
87
87
+
eprintln!("Error creating {}/{}/{}\n{}", collection, rkey, cid, err);
88
88
+
} else {
89
89
+
println!("wrote {}/{}/{} successfully.", collection, rkey, cid);
90
90
+
};
91
91
+
}
92
92
+
"update" => {
93
93
+
let Some(cid) = op.cid.map(|x| x.0.to_string()) else {
94
94
+
eprintln!("Missing cid for update {}/{}", collection, rkey);
95
95
+
return;
96
96
+
};
97
97
+
let Some(val) = val else {
98
98
+
eprintln!("Missing value for update {}/{}/{}", collection, rkey, cid);
99
99
+
return;
100
100
+
};
101
101
+
if let Err(err) = query!(
102
102
+
"UPDATE records SET
103
103
+
collection = $1,
104
104
+
rkey = $2,
105
105
+
cid = $3,
106
106
+
record = $4
107
107
+
WHERE
108
108
+
collection = $1
109
109
+
and rkey = $2",
110
110
+
collection,
111
111
+
rkey,
112
112
+
cid,
113
113
+
val
114
114
+
)
115
115
+
.execute(&*conn)
116
116
+
.await
117
117
+
{
118
118
+
eprintln!("Error updating {}/{}/{}\n{}", collection, rkey, cid, err);
119
119
+
} else {
120
120
+
println!("updated {}/{}/{} successfully.", collection, rkey, cid);
121
121
+
};
122
122
+
}
123
123
+
"delete" => {
124
124
+
if let Err(err) = query!(
125
125
+
"DELETE FROM records WHERE
126
126
+
collection = $1
127
127
+
and rkey = $2",
128
128
+
collection,
129
129
+
rkey,
130
130
+
)
131
131
+
.execute(&*conn)
132
132
+
.await
133
133
+
{
134
134
+
eprintln!("Error deleting {}/{}\n{}", collection, rkey, err);
135
135
+
} else {
136
136
+
println!("deleted {}/{} successfully.", collection, rkey);
137
137
+
};
138
138
+
}
139
139
+
_ => {
140
140
+
println!("missing #{} {:#?} {:#?}", op.action.as_str(), op, val)
141
141
+
}
142
142
+
}
143
143
+
}))
144
144
+
.await;
145
145
+
146
146
+
Ok(())
147
147
+
}
148
148
+
}
149
149
+
150
150
+
impl Ingest for Sync<'_> {
151
151
+
type Error = crate::backfill::Error;
152
152
+
async fn ingest(&self, conn: Arc<Pool<Postgres>>) -> Result<(), Self::Error> {
153
153
+
backfill(conn, None).await
154
154
+
}
155
155
+
}
156
156
+
157
157
+
pub fn ingest(
158
158
+
queue: Arc<Mutex<VecDeque<SubscribeReposMessage<'static>>>>,
159
159
+
conn: Arc<Pool<Postgres>>,
160
160
+
) -> JoinHandle<()> {
161
161
+
tokio::spawn(async move {
162
162
+
loop {
163
163
+
let Some(next) = queue.lock().await.pop_front() else {
164
164
+
continue;
165
165
+
};
166
166
+
167
167
+
match next {
168
168
+
SubscribeReposMessage::Commit(commit) => {
169
169
+
commit.ingest(conn.clone()).await.unwrap_or_else(|err| {
170
170
+
eprintln!("error handling #commit({}): {:?}", commit.clone().rev, err)
171
171
+
})
172
172
+
}
173
173
+
SubscribeReposMessage::Sync(sync) => {
174
174
+
sync.ingest(conn.clone()).await.unwrap_or_else(|err| {
175
175
+
eprintln!("error handling #sync({}): {:?}", sync.clone().rev, err)
176
176
+
})
177
177
+
}
178
178
+
SubscribeReposMessage::Identity(identity) => println!(
179
179
+
"ignoring #identity({}) event. has user migrated?",
180
180
+
identity.handle.unwrap_or(Handle::raw("handle.invalid"))
181
181
+
),
182
182
+
SubscribeReposMessage::Account(account) => println!(
183
183
+
"ignoring #account({} {}) event. has user deactivated?",
184
184
+
account.active,
185
185
+
account.status.unwrap_or("unknown".into())
186
186
+
),
187
187
+
SubscribeReposMessage::Info(info) => {
188
188
+
println!("ignoring #info({}) event", info.name)
189
189
+
}
190
190
+
SubscribeReposMessage::Unknown(_) => {
191
191
+
println!("ignoring unknown event. is meview outdated?")
192
192
+
}
193
193
+
};
194
194
+
}
195
195
+
})
196
196
+
}
+4
src/ingest/mod.rs
···
1
1
+
pub mod ingest;
2
2
+
pub mod queue;
3
3
+
pub use self::ingest::ingest;
4
4
+
pub use self::queue::queue;
+94
src/ingest/queue.rs
···
1
1
+
use std::{collections::VecDeque, sync::Arc};
2
2
+
3
3
+
use futures_util::stream::StreamExt;
4
4
+
use jacquard::api::com_atproto::sync::subscribe_repos::{SubscribeRepos, SubscribeReposMessage};
5
5
+
use jacquard::url::Url;
6
6
+
use jacquard::{common::xrpc::TungsteniteSubscriptionClient, xrpc::SubscriptionClient};
7
7
+
use tokio::{
8
8
+
sync::Mutex,
9
9
+
task::{self, JoinHandle},
10
10
+
};
11
11
+
12
12
+
use crate::config;
13
13
+
14
14
+
pub async fn queue() -> (
15
15
+
Arc<Mutex<VecDeque<SubscribeReposMessage<'static>>>>,
16
16
+
JoinHandle<()>,
17
17
+
) {
18
18
+
let queue = Arc::new(Mutex::new(VecDeque::new()));
19
19
+
20
20
+
// USER_SUBSCRIBE_URL is formatted as a domain
21
21
+
let uri = Url::parse(&format!("wss://{}/", config::USER_SUBSCRIBE_URL))
22
22
+
.expect("Env var USER_SUBSCRIBE_URL should be formated as a domain.");
23
23
+
let client = TungsteniteSubscriptionClient::from_base_uri(uri);
24
24
+
let (_sink, mut messages) = client
25
25
+
.subscribe(&SubscribeRepos::new().build())
26
26
+
.await
27
27
+
.expect("Could not subscribe to new events")
28
28
+
.into_stream();
29
29
+
30
30
+
let queue_clone = queue.clone();
31
31
+
let handle = task::spawn(async move {
32
32
+
let queue = queue_clone;
33
33
+
34
34
+
loop {
35
35
+
if let Some(msg) = messages.next().await {
36
36
+
let msg = match msg {
37
37
+
Ok(val) => val,
38
38
+
Err(err) => {
39
39
+
eprintln!("Warning: Websocket error: {} ({:?})", err, err.source());
40
40
+
continue;
41
41
+
}
42
42
+
};
43
43
+
44
44
+
// filter messages by user did
45
45
+
// note that #identity #account #info and #unknown will probably be ignored
46
46
+
let ev = match msg.clone() {
47
47
+
SubscribeReposMessage::Commit(commit) => {
48
48
+
if commit.repo != *config::USER_DID {
49
49
+
continue;
50
50
+
} else {
51
51
+
SubscribeReposMessage::Commit(commit)
52
52
+
}
53
53
+
}
54
54
+
SubscribeReposMessage::Sync(sync) => {
55
55
+
if sync.did != *config::USER_DID {
56
56
+
continue;
57
57
+
} else {
58
58
+
SubscribeReposMessage::Sync(sync)
59
59
+
}
60
60
+
}
61
61
+
62
62
+
SubscribeReposMessage::Identity(identity) => {
63
63
+
if identity.did != *config::USER_DID {
64
64
+
continue;
65
65
+
} else {
66
66
+
eprintln!(
67
67
+
"Warning: Recieved #identity event. Configuration may be out of date"
68
68
+
);
69
69
+
SubscribeReposMessage::Identity(identity)
70
70
+
}
71
71
+
}
72
72
+
SubscribeReposMessage::Account(account) => {
73
73
+
if account.did != *config::USER_DID {
74
74
+
continue;
75
75
+
} else {
76
76
+
eprintln!(
77
77
+
"Warning: Recieved #account event. Account active: `{}`. Account status: `{}`",
78
78
+
account.active,
79
79
+
account.status.clone().unwrap_or("Unknown".into())
80
80
+
);
81
81
+
SubscribeReposMessage::Account(account)
82
82
+
}
83
83
+
}
84
84
+
SubscribeReposMessage::Info(info) => SubscribeReposMessage::Info(info),
85
85
+
SubscribeReposMessage::Unknown(data) => SubscribeReposMessage::Unknown(data),
86
86
+
};
87
87
+
88
88
+
queue.lock().await.push_back(ev);
89
89
+
}
90
90
+
}
91
91
+
});
92
92
+
93
93
+
(queue, handle)
94
94
+
}
+19
-3
src/main.rs
···
1
1
-
use sqlx::{Pool, Postgres};
2
2
-
3
1
use crate::backfill::backfill;
4
2
5
3
mod backfill;
6
4
mod config;
7
5
mod db;
6
6
+
mod ingest;
8
7
mod utils;
9
8
10
9
#[derive(Debug)]
···
33
32
config::USER_SUBSCRIBE_URL.get().await,
34
33
config::DATABASE_URL.get().await
35
34
);
36
36
-
let conn: Pool<Postgres> = db::conn().await;
35
35
+
let conn = db::conn().await;
37
36
println!("Database connected and initialized");
37
37
+
38
38
+
let (queue, queue_handle) = ingest::queue().await;
38
39
39
40
println!("Starting backfill");
40
41
let timer = std::time::Instant::now();
···
45
46
println!("Backfill complete. Took {:?}", timer.elapsed());
46
47
47
48
println!("Completed sucessfully!");
49
49
+
50
50
+
// Set up Ctrl-C handler
51
51
+
let (tx, rx) = tokio::sync::oneshot::channel();
52
52
+
tokio::spawn(async move {
53
53
+
tokio::signal::ctrl_c().await.ok();
54
54
+
let _ = tx.send(());
55
55
+
});
56
56
+
57
57
+
println!("Handling new events. Ctrl+C to quit.");
58
58
+
let ingest_handle = ingest::ingest(queue, conn.clone());
59
59
+
60
60
+
let _ = rx.await;
61
61
+
queue_handle.abort();
62
62
+
ingest_handle.abort();
63
63
+
48
64
Ok(())
49
65
}