···11+# Database configuration
12DATABASE_URL=postgresql://slice:slice@localhost:5432/slice
33+44+# Server configuration
55+PORT=3000
66+77+# Authentication service base URL
28AUTH_BASE_URL=http://localhost:8081
99+1010+# AT Protocol relay endpoint for syncing data
1111+RELAY_ENDPOINT=https://relay1.us-west.bsky.network
1212+1313+# Logging level
314RUST_LOG=debug
···3232chrono = { version = "0.4", features = ["serde"] }
33333434# UUID generation
3535-uuid = { version = "1.0", features = ["v4"] }
3535+uuid = { version = "1.0", features = ["v4", "serde"] }
36363737# Environment variables
3838dotenvy = "0.15"
···5151# Middleware for HTTP requests with retry logic
5252reqwest-middleware = { version = "0.4.2", features = ["json", "multipart"] }
5353reqwest-chain = "1.0.0"
5454+5555+# Job queue
5656+sqlxmq = "0.6"
+289
api/migrations/004_sqlxmq_setup.sql
···11+CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
22+33+-- The UDT for creating messages
44+CREATE TYPE mq_new_t AS (
55+ -- Unique message ID
66+ id UUID,
77+ -- Delay before message is processed
88+ delay INTERVAL,
99+ -- Number of retries if initial processing fails
1010+ retries INT,
1111+ -- Initial backoff between retries
1212+ retry_backoff INTERVAL,
1313+ -- Name of channel
1414+ channel_name TEXT,
1515+ -- Arguments to channel
1616+ channel_args TEXT,
1717+ -- Interval for two-phase commit (or NULL to disable two-phase commit)
1818+ commit_interval INTERVAL,
1919+ -- Whether this message should be processed in order with respect to other
2020+ -- ordered messages.
2121+ ordered BOOLEAN,
2222+ -- Name of message
2323+ name TEXT,
2424+ -- JSON payload
2525+ payload_json TEXT,
2626+ -- Binary payload
2727+ payload_bytes BYTEA
2828+);
2929+3030+-- Small, frequently updated table of messages
3131+CREATE TABLE mq_msgs (
3232+ id UUID PRIMARY KEY,
3333+ created_at TIMESTAMPTZ DEFAULT NOW(),
3434+ attempt_at TIMESTAMPTZ DEFAULT NOW(),
3535+ attempts INT NOT NULL DEFAULT 5,
3636+ retry_backoff INTERVAL NOT NULL DEFAULT INTERVAL '1 second',
3737+ channel_name TEXT NOT NULL,
3838+ channel_args TEXT NOT NULL,
3939+ commit_interval INTERVAL,
4040+ after_message_id UUID DEFAULT uuid_nil() REFERENCES mq_msgs(id) ON DELETE SET DEFAULT
4141+);
4242+4343+-- Insert dummy message so that the 'nil' UUID can be referenced
4444+INSERT INTO mq_msgs (id, channel_name, channel_args, after_message_id) VALUES (uuid_nil(), '', '', NULL);
4545+4646+-- Internal helper function to check that a UUID is neither NULL nor NIL
4747+CREATE FUNCTION mq_uuid_exists(
4848+ id UUID
4949+) RETURNS BOOLEAN AS $$
5050+ SELECT id IS NOT NULL AND id != uuid_nil()
5151+$$ LANGUAGE SQL IMMUTABLE;
5252+5353+-- Index for polling
5454+CREATE INDEX ON mq_msgs(channel_name, channel_args, attempt_at) WHERE id != uuid_nil() AND NOT mq_uuid_exists(after_message_id);
5555+-- Index for adding messages
5656+CREATE INDEX ON mq_msgs(channel_name, channel_args, created_at, id) WHERE id != uuid_nil() AND after_message_id IS NOT NULL;
5757+5858+-- Index for ensuring strict message order
5959+CREATE UNIQUE INDEX mq_msgs_channel_name_channel_args_after_message_id_idx ON mq_msgs(channel_name, channel_args, after_message_id);
6060+6161+6262+-- Large, less frequently updated table of message payloads
6363+CREATE TABLE mq_payloads(
6464+ id UUID PRIMARY KEY,
6565+ name TEXT NOT NULL,
6666+ payload_json JSONB,
6767+ payload_bytes BYTEA
6868+);
6969+7070+-- Internal helper function to return the most recently added message in a queue.
7171+CREATE FUNCTION mq_latest_message(from_channel_name TEXT, from_channel_args TEXT)
7272+RETURNS UUID AS $$
7373+ SELECT COALESCE(
7474+ (
7575+ SELECT id FROM mq_msgs
7676+ WHERE channel_name = from_channel_name
7777+ AND channel_args = from_channel_args
7878+ AND after_message_id IS NOT NULL
7979+ AND id != uuid_nil()
8080+ ORDER BY created_at DESC, id DESC
8181+ LIMIT 1
8282+ ),
8383+ uuid_nil()
8484+ )
8585+$$ LANGUAGE SQL STABLE;
8686+8787+-- Internal helper function to randomly select a set of channels with "ready" messages.
8888+CREATE FUNCTION mq_active_channels(channel_names TEXT[], batch_size INT)
8989+RETURNS TABLE(name TEXT, args TEXT) AS $$
9090+ SELECT channel_name, channel_args
9191+ FROM mq_msgs
9292+ WHERE id != uuid_nil()
9393+ AND attempt_at <= NOW()
9494+ AND (channel_names IS NULL OR channel_name = ANY(channel_names))
9595+ AND NOT mq_uuid_exists(after_message_id)
9696+ GROUP BY channel_name, channel_args
9797+ ORDER BY RANDOM()
9898+ LIMIT batch_size
9999+$$ LANGUAGE SQL STABLE;
100100+101101+-- Main entry-point for job runner: pulls a batch of messages from the queue.
102102+CREATE FUNCTION mq_poll(channel_names TEXT[], batch_size INT DEFAULT 1)
103103+RETURNS TABLE(
104104+ id UUID,
105105+ is_committed BOOLEAN,
106106+ name TEXT,
107107+ payload_json TEXT,
108108+ payload_bytes BYTEA,
109109+ retry_backoff INTERVAL,
110110+ wait_time INTERVAL
111111+) AS $$
112112+BEGIN
113113+ RETURN QUERY UPDATE mq_msgs
114114+ SET
115115+ attempt_at = CASE WHEN mq_msgs.attempts = 1 THEN NULL ELSE NOW() + mq_msgs.retry_backoff END,
116116+ attempts = mq_msgs.attempts - 1,
117117+ retry_backoff = mq_msgs.retry_backoff * 2
118118+ FROM (
119119+ SELECT
120120+ msgs.id
121121+ FROM mq_active_channels(channel_names, batch_size) AS active_channels
122122+ INNER JOIN LATERAL (
123123+ SELECT * FROM mq_msgs
124124+ WHERE mq_msgs.id != uuid_nil()
125125+ AND mq_msgs.attempt_at <= NOW()
126126+ AND mq_msgs.channel_name = active_channels.name
127127+ AND mq_msgs.channel_args = active_channels.args
128128+ AND NOT mq_uuid_exists(mq_msgs.after_message_id)
129129+ ORDER BY mq_msgs.attempt_at ASC
130130+ LIMIT batch_size
131131+ ) AS msgs ON TRUE
132132+ LIMIT batch_size
133133+ ) AS messages_to_update
134134+ LEFT JOIN mq_payloads ON mq_payloads.id = messages_to_update.id
135135+ WHERE mq_msgs.id = messages_to_update.id
136136+ RETURNING
137137+ mq_msgs.id,
138138+ mq_msgs.commit_interval IS NULL,
139139+ mq_payloads.name,
140140+ mq_payloads.payload_json::TEXT,
141141+ mq_payloads.payload_bytes,
142142+ mq_msgs.retry_backoff / 2,
143143+ interval '0' AS wait_time;
144144+145145+ IF NOT FOUND THEN
146146+ RETURN QUERY SELECT
147147+ NULL::UUID,
148148+ NULL::BOOLEAN,
149149+ NULL::TEXT,
150150+ NULL::TEXT,
151151+ NULL::BYTEA,
152152+ NULL::INTERVAL,
153153+ MIN(mq_msgs.attempt_at) - NOW()
154154+ FROM mq_msgs
155155+ WHERE mq_msgs.id != uuid_nil()
156156+ AND NOT mq_uuid_exists(mq_msgs.after_message_id)
157157+ AND (channel_names IS NULL OR mq_msgs.channel_name = ANY(channel_names));
158158+ END IF;
159159+END;
160160+$$ LANGUAGE plpgsql;
161161+162162+-- Creates new messages
163163+CREATE FUNCTION mq_insert(new_messages mq_new_t[])
164164+RETURNS VOID AS $$
165165+BEGIN
166166+ PERFORM pg_notify(CONCAT('mq_', channel_name), '')
167167+ FROM unnest(new_messages) AS new_msgs
168168+ GROUP BY channel_name;
169169+170170+ IF FOUND THEN
171171+ PERFORM pg_notify('mq', '');
172172+ END IF;
173173+174174+ INSERT INTO mq_payloads (
175175+ id,
176176+ name,
177177+ payload_json,
178178+ payload_bytes
179179+ ) SELECT
180180+ id,
181181+ name,
182182+ payload_json::JSONB,
183183+ payload_bytes
184184+ FROM UNNEST(new_messages);
185185+186186+ INSERT INTO mq_msgs (
187187+ id,
188188+ attempt_at,
189189+ attempts,
190190+ retry_backoff,
191191+ channel_name,
192192+ channel_args,
193193+ commit_interval,
194194+ after_message_id
195195+ )
196196+ SELECT
197197+ id,
198198+ NOW() + delay + COALESCE(commit_interval, INTERVAL '0'),
199199+ retries + 1,
200200+ retry_backoff,
201201+ channel_name,
202202+ channel_args,
203203+ commit_interval,
204204+ CASE WHEN ordered
205205+ THEN
206206+ LAG(id, 1, mq_latest_message(channel_name, channel_args))
207207+ OVER (PARTITION BY channel_name, channel_args, ordered ORDER BY id)
208208+ ELSE
209209+ NULL
210210+ END
211211+ FROM UNNEST(new_messages);
212212+END;
213213+$$ LANGUAGE plpgsql;
214214+215215+-- Commits messages previously created with a non-NULL commit interval.
216216+CREATE FUNCTION mq_commit(msg_ids UUID[])
217217+RETURNS VOID AS $$
218218+BEGIN
219219+ UPDATE mq_msgs
220220+ SET
221221+ attempt_at = attempt_at - commit_interval,
222222+ commit_interval = NULL
223223+ WHERE id = ANY(msg_ids)
224224+ AND commit_interval IS NOT NULL;
225225+END;
226226+$$ LANGUAGE plpgsql;
227227+228228+229229+-- Deletes messages from the queue. This occurs when a message has been
230230+-- processed, or when it expires without being processed.
231231+CREATE FUNCTION mq_delete(msg_ids UUID[])
232232+RETURNS VOID AS $$
233233+BEGIN
234234+ PERFORM pg_notify(CONCAT('mq_', channel_name), '')
235235+ FROM mq_msgs
236236+ WHERE id = ANY(msg_ids)
237237+ AND after_message_id = uuid_nil()
238238+ GROUP BY channel_name;
239239+240240+ IF FOUND THEN
241241+ PERFORM pg_notify('mq', '');
242242+ END IF;
243243+244244+ DELETE FROM mq_msgs WHERE id = ANY(msg_ids);
245245+ DELETE FROM mq_payloads WHERE id = ANY(msg_ids);
246246+END;
247247+$$ LANGUAGE plpgsql;
248248+249249+250250+-- Can be called during the initial commit interval, or when processing
251251+-- a message. Indicates that the caller is still active and will prevent either
252252+-- the commit interval elapsing or the message being retried for the specified
253253+-- interval.
254254+CREATE FUNCTION mq_keep_alive(msg_ids UUID[], duration INTERVAL)
255255+RETURNS VOID AS $$
256256+ UPDATE mq_msgs
257257+ SET
258258+ attempt_at = NOW() + duration,
259259+ commit_interval = commit_interval + ((NOW() + duration) - attempt_at)
260260+ WHERE id = ANY(msg_ids)
261261+ AND attempt_at < NOW() + duration;
262262+$$ LANGUAGE SQL;
263263+264264+265265+-- Called during lengthy processing of a message to checkpoint the progress.
266266+-- As well as behaving like `mq_keep_alive`, the message payload can be
267267+-- updated.
268268+CREATE FUNCTION mq_checkpoint(
269269+ msg_id UUID,
270270+ duration INTERVAL,
271271+ new_payload_json TEXT,
272272+ new_payload_bytes BYTEA,
273273+ extra_retries INT
274274+)
275275+RETURNS VOID AS $$
276276+ UPDATE mq_msgs
277277+ SET
278278+ attempt_at = GREATEST(attempt_at, NOW() + duration),
279279+ attempts = attempts + COALESCE(extra_retries, 0)
280280+ WHERE id = msg_id;
281281+282282+ UPDATE mq_payloads
283283+ SET
284284+ payload_json = COALESCE(new_payload_json::JSONB, payload_json),
285285+ payload_bytes = COALESCE(new_payload_bytes, payload_bytes)
286286+ WHERE
287287+ id = msg_id;
288288+$$ LANGUAGE SQL;
289289+
+20
api/migrations/005_job_results.sql
···11+-- Job results table to store sync job outcomes
22+CREATE TABLE job_results (
33+ job_id UUID PRIMARY KEY,
44+ user_did TEXT NOT NULL,
55+ slice_uri TEXT NOT NULL,
66+ status TEXT NOT NULL CHECK (status IN ('completed', 'failed')),
77+ success BOOLEAN NOT NULL,
88+ total_records BIGINT NOT NULL DEFAULT 0,
99+ collections_synced JSONB NOT NULL DEFAULT '[]'::jsonb,
1010+ repos_processed BIGINT NOT NULL DEFAULT 0,
1111+ message TEXT NOT NULL,
1212+ error_message TEXT,
1313+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
1414+ completed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
1515+);
1616+1717+-- Index for looking up jobs by user
1818+CREATE INDEX idx_job_results_user_did ON job_results(user_did);
1919+-- Index for looking up recent jobs
2020+CREATE INDEX idx_job_results_completed_at ON job_results(completed_at DESC);