Elixir ATProtocol ingestion and sync library.

feat: add Jetstream integration

ovyerus.com 026ae6ef 7aa87458

verified
+1086
+2
CHANGELOG.md
··· 18 18 - Support for the 19 19 [Tap](https://github.com/bluesky-social/indigo/blob/main/cmd/tap/README.md) 20 20 sync and backfill utility service, via `Drinkup.Tap`. 21 + - Support for [Jetstream](https://github.com/bluesky-social/jetstream), a 22 + simplified JSON event stream for ATProto, via `Drinkup.Jetstream`. 21 23 22 24 ### Changed 23 25
examples/basic_consumer.ex examples/firehose/basic_consumer.ex
+118
examples/jetstream/jetstream_consumer.ex
··· 1 + defmodule JetstreamConsumer do 2 + @moduledoc """ 3 + Example Jetstream consumer implementation. 4 + 5 + This consumer demonstrates handling different types of Jetstream events: 6 + - Commit events (create, update, delete operations) 7 + - Identity events (handle changes, etc.) 8 + - Account events (status changes) 9 + """ 10 + 11 + @behaviour Drinkup.Jetstream.Consumer 12 + 13 + def handle_event(%Drinkup.Jetstream.Event.Commit{operation: :create} = event) do 14 + IO.inspect(event, label: "New record created") 15 + :ok 16 + end 17 + 18 + def handle_event(%Drinkup.Jetstream.Event.Commit{operation: :update} = event) do 19 + IO.inspect(event, label: "Record updated") 20 + :ok 21 + end 22 + 23 + def handle_event(%Drinkup.Jetstream.Event.Commit{operation: :delete} = event) do 24 + IO.inspect(event, label: "Record deleted") 25 + :ok 26 + end 27 + 28 + def handle_event(%Drinkup.Jetstream.Event.Identity{} = event) do 29 + IO.inspect(event, label: "Identity updated") 30 + :ok 31 + end 32 + 33 + def handle_event(%Drinkup.Jetstream.Event.Account{active: false} = event) do 34 + IO.inspect(event, label: "Account inactive") 35 + :ok 36 + end 37 + 38 + def handle_event(%Drinkup.Jetstream.Event.Account{active: true} = event) do 39 + IO.inspect(event, label: "Account active") 40 + :ok 41 + end 42 + 43 + def handle_event(event) do 44 + IO.inspect(event, label: "Unknown event") 45 + :ok 46 + end 47 + end 48 + 49 + defmodule ExampleJetstreamSupervisor do 50 + @moduledoc """ 51 + Example supervisor that starts a Jetstream connection. 52 + 53 + ## Usage 54 + 55 + # Start the supervisor 56 + {:ok, pid} = ExampleJetstreamSupervisor.start_link() 57 + 58 + # Update filters dynamically 59 + Drinkup.Jetstream.update_options(MyJetstream, %{ 60 + wanted_collections: ["app.bsky.feed.post", "app.bsky.feed.like"] 61 + }) 62 + 63 + # Stop the supervisor 64 + Supervisor.stop(pid) 65 + """ 66 + 67 + use Supervisor 68 + 69 + def start_link(arg \\ []) do 70 + Supervisor.start_link(__MODULE__, arg, name: __MODULE__) 71 + end 72 + 73 + @impl true 74 + def init(_) do 75 + children = [ 76 + # Connect to public Jetstream instance and filter for posts and likes 77 + {Drinkup.Jetstream, 78 + %{ 79 + consumer: JetstreamConsumer, 80 + name: MyJetstream, 81 + wanted_collections: ["app.bsky.feed.post", "app.bsky.feed.like"] 82 + }} 83 + ] 84 + 85 + Supervisor.init(children, strategy: :one_for_one) 86 + end 87 + end 88 + 89 + # Example: Filter for all graph operations (follows, blocks, etc.) 90 + defmodule GraphEventsConsumer do 91 + @behaviour Drinkup.Jetstream.Consumer 92 + 93 + def handle_event(%Drinkup.Jetstream.Event.Commit{collection: "app.bsky.graph." <> _} = event) do 94 + IO.puts("Graph event: #{event.collection} - #{event.operation}") 95 + :ok 96 + end 97 + 98 + def handle_event(_event), do: :ok 99 + end 100 + 101 + # Example: Filter for specific DIDs 102 + defmodule SpecificDIDConsumer do 103 + @behaviour Drinkup.Jetstream.Consumer 104 + 105 + @watched_dids [ 106 + "did:plc:abc123", 107 + "did:plc:def456" 108 + ] 109 + 110 + def handle_event(%Drinkup.Jetstream.Event.Commit{did: did} = event) 111 + when did in @watched_dids do 112 + IO.puts("Activity from watched DID: #{did}") 113 + IO.inspect(event) 114 + :ok 115 + end 116 + 117 + def handle_event(_event), do: :ok 118 + end
examples/multiple_consumers.ex examples/firehose/multiple_consumers.ex
examples/record_consumer.ex examples/firehose/record_consumer.ex
examples/tap_consumer.ex examples/tap/tap_consumer.ex
+2
lib/firehose/options.ex
··· 53 53 HTTP/HTTPS URL of the ATProto Firehose relay. 54 54 55 55 Defaults to `"https://bsky.network"` which is the public Bluesky relay. 56 + 57 + You can find a list of third-party relays at https://compare.hose.cam/. 56 58 """ 57 59 @type host() :: String.t() 58 60
+207
lib/jetstream.ex
··· 1 + defmodule Drinkup.Jetstream do 2 + @moduledoc """ 3 + Supervisor for Jetstream event stream connections. 4 + 5 + Jetstream is a simplified JSON event stream that converts the CBOR-encoded 6 + ATProto Firehose into lightweight, friendly JSON events. It provides zstd 7 + compression and filtering capabilities for collections and DIDs. 8 + 9 + ## Usage 10 + 11 + Add Jetstream to your supervision tree: 12 + 13 + children = [ 14 + {Drinkup.Jetstream, %{ 15 + consumer: MyJetstreamConsumer, 16 + name: MyJetstream, 17 + wanted_collections: ["app.bsky.feed.post", "app.bsky.feed.like"] 18 + }} 19 + ] 20 + 21 + ## Configuration 22 + 23 + See `Drinkup.Jetstream.Options` for all available configuration options. 24 + 25 + ## Dynamic Filter Updates 26 + 27 + You can update filters after the connection is established: 28 + 29 + Drinkup.Jetstream.update_options(MyJetstream, %{ 30 + wanted_collections: ["app.bsky.graph.follow"], 31 + wanted_dids: ["did:plc:abc123"] 32 + }) 33 + 34 + ## Public Instances 35 + 36 + By default Drinkup connects to `jetstream2.us-east.bsky.network`. 37 + 38 + Bluesky operates a few different Jetstream instances: 39 + - `jetstream1.us-east.bsky.network` 40 + - `jetstream2.us-east.bsky.network` 41 + - `jetstream1.us-west.bsky.network` 42 + - `jetstream2.us-west.bsky.network` 43 + 44 + There also some third-party instances not run by Bluesky PBC: 45 + - `jetstream.fire.hose.cam` 46 + - `jetstream2.fr.hose.cam` 47 + - `jetstream1.us-east.fire.hose.cam` 48 + """ 49 + 50 + use Supervisor 51 + require Logger 52 + alias Drinkup.Jetstream.Options 53 + 54 + @dialyzer nowarn_function: {:init, 1} 55 + 56 + @impl true 57 + def init({%Options{name: name} = drinkup_options, supervisor_options}) do 58 + children = [ 59 + {Task.Supervisor, name: {:via, Registry, {Drinkup.Registry, {name, JetstreamTasks}}}}, 60 + {Drinkup.Jetstream.Socket, drinkup_options} 61 + ] 62 + 63 + Supervisor.start_link( 64 + children, 65 + supervisor_options ++ 66 + [name: {:via, Registry, {Drinkup.Registry, {name, JetstreamSupervisor}}}] 67 + ) 68 + end 69 + 70 + @spec child_spec(Options.options()) :: Supervisor.child_spec() 71 + def child_spec(%{} = options), do: child_spec({options, [strategy: :one_for_one]}) 72 + 73 + @spec child_spec({Options.options(), Keyword.t()}) :: Supervisor.child_spec() 74 + def child_spec({drinkup_options, supervisor_options}) do 75 + %{ 76 + id: Map.get(drinkup_options, :name, __MODULE__), 77 + start: {__MODULE__, :init, [{Options.from(drinkup_options), supervisor_options}]}, 78 + type: :supervisor, 79 + restart: :permanent, 80 + shutdown: 500 81 + } 82 + end 83 + 84 + # Options Update API 85 + 86 + @typedoc """ 87 + Options that can be updated dynamically via `update_options/2`. 88 + 89 + - `:wanted_collections` - List of collection NSIDs or prefixes (max 100) 90 + - `:wanted_dids` - List of DIDs to filter (max 10,000) 91 + - `:max_message_size_bytes` - Maximum message size to receive 92 + 93 + Empty arrays will disable the corresponding filter (i.e., receive all). 94 + """ 95 + @type update_opts :: %{ 96 + optional(:wanted_collections) => [String.t()], 97 + optional(:wanted_dids) => [String.t()], 98 + optional(:max_message_size_bytes) => integer() 99 + } 100 + 101 + @doc """ 102 + Update filters and options for an active Jetstream connection. 103 + 104 + Sends an options update message to the Jetstream server over the websocket 105 + connection. This allows you to dynamically change which collections and DIDs 106 + you're interested in without reconnecting. 107 + 108 + ## Parameters 109 + 110 + - `name` - The name of the Jetstream instance (default: `Drinkup.Jetstream`) 111 + - `opts` - Map with optional fields: 112 + - `:wanted_collections` - List of collection NSIDs or prefixes (max 100) 113 + - `:wanted_dids` - List of DIDs to filter (max 10,000) 114 + - `:max_message_size_bytes` - Maximum message size to receive 115 + 116 + ## Examples 117 + 118 + # Filter to only posts 119 + Drinkup.Jetstream.update_options(MyJetstream, %{ 120 + wanted_collections: ["app.bsky.feed.post"] 121 + }) 122 + 123 + # Filter to specific DIDs 124 + Drinkup.Jetstream.update_options(MyJetstream, %{ 125 + wanted_dids: ["did:plc:abc123", "did:plc:def456"] 126 + }) 127 + 128 + # Disable all filters (receive all events) 129 + Drinkup.Jetstream.update_options(MyJetstream, %{ 130 + wanted_collections: [], 131 + wanted_dids: [] 132 + }) 133 + 134 + ## Return Value 135 + 136 + Returns `:ok` if the message was sent successfully, or `{:error, reason}` if 137 + the socket process could not be found or the message could not be sent. 138 + 139 + Note: The server may reject invalid updates (e.g., too many collections/DIDs). 140 + Invalid updates will result in the connection being closed by the server. 141 + """ 142 + @spec update_options(atom(), update_opts()) :: :ok | {:error, term()} 143 + def update_options(name \\ Drinkup.Jetstream, opts) when is_map(opts) do 144 + case find_connection(name) do 145 + {:ok, {conn, stream}} -> 146 + message = build_options_update_message(opts) 147 + :ok = :gun.ws_send(conn, stream, {:text, message}) 148 + 149 + Logger.debug("[Drinkup.Jetstream] Sent options update") 150 + :ok 151 + 152 + {:error, reason} -> 153 + {:error, reason} 154 + end 155 + end 156 + 157 + # Private functions 158 + 159 + @spec find_connection(atom()) :: {:ok, {pid(), :gun.stream_ref()}} | {:error, :not_connected} 160 + defp find_connection(name) do 161 + # Look up the connection details from Registry 162 + case Registry.lookup(Drinkup.Registry, {name, JetstreamConnection}) do 163 + [{_socket_pid, {conn, stream}}] -> 164 + {:ok, {conn, stream}} 165 + 166 + [] -> 167 + {:error, :not_connected} 168 + end 169 + end 170 + 171 + @spec build_options_update_message(update_opts()) :: String.t() 172 + defp build_options_update_message(opts) do 173 + payload = 174 + %{} 175 + |> maybe_add_wanted_collections(Map.get(opts, :wanted_collections)) 176 + |> maybe_add_wanted_dids(Map.get(opts, :wanted_dids)) 177 + |> maybe_add_max_message_size(Map.get(opts, :max_message_size_bytes)) 178 + 179 + message = %{ 180 + "type" => "options_update", 181 + "payload" => payload 182 + } 183 + 184 + Jason.encode!(message) 185 + end 186 + 187 + @spec maybe_add_wanted_collections(map(), [String.t()] | nil) :: map() 188 + defp maybe_add_wanted_collections(payload, nil), do: payload 189 + 190 + defp maybe_add_wanted_collections(payload, collections) when is_list(collections) do 191 + Map.put(payload, "wantedCollections", collections) 192 + end 193 + 194 + @spec maybe_add_wanted_dids(map(), [String.t()] | nil) :: map() 195 + defp maybe_add_wanted_dids(payload, nil), do: payload 196 + 197 + defp maybe_add_wanted_dids(payload, dids) when is_list(dids) do 198 + Map.put(payload, "wantedDids", dids) 199 + end 200 + 201 + @spec maybe_add_max_message_size(map(), integer() | nil) :: map() 202 + defp maybe_add_max_message_size(payload, nil), do: payload 203 + 204 + defp maybe_add_max_message_size(payload, max_size) when is_integer(max_size) do 205 + Map.put(payload, "maxMessageSizeBytes", max_size) 206 + end 207 + end
+61
lib/jetstream/consumer.ex
··· 1 + defmodule Drinkup.Jetstream.Consumer do 2 + @moduledoc """ 3 + Consumer behaviour for handling Jetstream events. 4 + 5 + Implement this behaviour to process events from a Jetstream instance. 6 + Events are dispatched asynchronously via `Task.Supervisor`. 7 + 8 + Unlike Tap, Jetstream does not require event acknowledgments. Events are 9 + processed in a fire-and-forget manner. 10 + 11 + ## Example 12 + 13 + defmodule MyJetstreamConsumer do 14 + @behaviour Drinkup.Jetstream.Consumer 15 + 16 + def handle_event(%Drinkup.Jetstream.Event.Commit{operation: :create} = event) do 17 + # Handle new record creation 18 + IO.inspect(event, label: "New record") 19 + :ok 20 + end 21 + 22 + def handle_event(%Drinkup.Jetstream.Event.Commit{operation: :delete} = event) do 23 + # Handle record deletion 24 + IO.inspect(event, label: "Deleted record") 25 + :ok 26 + end 27 + 28 + def handle_event(%Drinkup.Jetstream.Event.Identity{} = event) do 29 + # Handle identity changes 30 + IO.inspect(event, label: "Identity update") 31 + :ok 32 + end 33 + 34 + def handle_event(%Drinkup.Jetstream.Event.Account{active: false} = event) do 35 + # Handle account deactivation 36 + IO.inspect(event, label: "Account inactive") 37 + :ok 38 + end 39 + 40 + def handle_event(_event), do: :ok 41 + end 42 + 43 + ## Event Types 44 + 45 + The consumer will receive one of three event types: 46 + 47 + - `Drinkup.Jetstream.Event.Commit` - Repository commits (create, update, delete) 48 + - `Drinkup.Jetstream.Event.Identity` - Identity updates (handle changes, etc.) 49 + - `Drinkup.Jetstream.Event.Account` - Account status changes (active, taken down, etc.) 50 + 51 + ## Error Handling 52 + 53 + If your `handle_event/1` implementation raises an exception, it will be logged 54 + but will not affect the stream. The error is caught and logged by the event 55 + dispatcher. 56 + """ 57 + 58 + alias Drinkup.Jetstream.Event 59 + 60 + @callback handle_event(Event.t()) :: any() 61 + end
+100
lib/jetstream/event.ex
··· 1 + defmodule Drinkup.Jetstream.Event do 2 + @moduledoc """ 3 + Event handling and dispatch for Jetstream events. 4 + 5 + Parses incoming JSON events from Jetstream and dispatches them to the 6 + configured consumer via Task.Supervisor. 7 + """ 8 + 9 + require Logger 10 + alias Drinkup.Jetstream.{Event, Options} 11 + 12 + @type t() :: Event.Commit.t() | Event.Identity.t() | Event.Account.t() 13 + 14 + @doc """ 15 + Parse a JSON map into an event struct. 16 + 17 + Jetstream events have a top-level structure with a "kind" field that 18 + determines the event type, and a nested object with the event data. 19 + 20 + ## Example Event Structure 21 + 22 + %{ 23 + "did" => "did:plc:...", 24 + "time_us" => 1726880765818347, 25 + "kind" => "commit", 26 + "commit" => %{...} 27 + } 28 + 29 + Returns the appropriate event struct based on the "kind" field, or `nil` 30 + if the event type is not recognized. 31 + """ 32 + @spec from(map()) :: t() | nil 33 + def from(%{"did" => did, "time_us" => time_us, "kind" => kind} = payload) do 34 + case kind do 35 + "commit" -> 36 + case Map.get(payload, "commit") do 37 + nil -> 38 + Logger.warning("Commit event missing 'commit' field: #{inspect(payload)}") 39 + nil 40 + 41 + commit -> 42 + Event.Commit.from(did, time_us, commit) 43 + end 44 + 45 + "identity" -> 46 + case Map.get(payload, "identity") do 47 + nil -> 48 + Logger.warning("Identity event missing 'identity' field: #{inspect(payload)}") 49 + nil 50 + 51 + identity -> 52 + Event.Identity.from(did, time_us, identity) 53 + end 54 + 55 + "account" -> 56 + case Map.get(payload, "account") do 57 + nil -> 58 + Logger.warning("Account event missing 'account' field: #{inspect(payload)}") 59 + nil 60 + 61 + account -> 62 + Event.Account.from(did, time_us, account) 63 + end 64 + 65 + _ -> 66 + Logger.warning("Received unrecognized event kind from Jetstream: #{inspect(kind)}") 67 + nil 68 + end 69 + end 70 + 71 + def from(payload) do 72 + Logger.warning("Received invalid event structure from Jetstream: #{inspect(payload)}") 73 + nil 74 + end 75 + 76 + @doc """ 77 + Dispatch an event to the consumer via Task.Supervisor. 78 + 79 + Spawns a task that processes the event via the consumer's `handle_event/1` 80 + callback. Unlike Tap, Jetstream does not require acknowledgments. 81 + """ 82 + @spec dispatch(t(), Options.t()) :: :ok 83 + def dispatch(event, %Options{consumer: consumer, name: name}) do 84 + supervisor_name = {:via, Registry, {Drinkup.Registry, {name, JetstreamTasks}}} 85 + 86 + {:ok, _pid} = 87 + Task.Supervisor.start_child(supervisor_name, fn -> 88 + try do 89 + consumer.handle_event(event) 90 + rescue 91 + e -> 92 + Logger.error( 93 + "Error in Jetstream event handler: #{Exception.format(:error, e, __STACKTRACE__)}" 94 + ) 95 + end 96 + end) 97 + 98 + :ok 99 + end 100 + end
+106
lib/jetstream/event/account.ex
··· 1 + defmodule Drinkup.Jetstream.Event.Account do 2 + @moduledoc """ 3 + Struct for account events from Jetstream. 4 + 5 + Represents a change to an account's status on a host (e.g., PDS or Relay). 6 + The semantics of this event are that the status is at the host which emitted 7 + the event, not necessarily that at the currently active PDS. 8 + 9 + For example, a Relay takedown would emit a takedown with `active: false`, 10 + even if the PDS is still active. 11 + """ 12 + 13 + use TypedStruct 14 + 15 + typedstruct enforce: true do 16 + @typedoc """ 17 + The status of an inactive account. 18 + 19 + Known values from the ATProto lexicon: 20 + - `:takendown` - Account has been taken down 21 + - `:suspended` - Account is suspended 22 + - `:deleted` - Account has been deleted 23 + - `:deactivated` - Account has been deactivated by the user 24 + - `:desynchronized` - Account is out of sync 25 + - `:throttled` - Account is throttled 26 + 27 + The status can also be any other string value for future compatibility. 28 + """ 29 + @type status() :: 30 + :takendown 31 + | :suspended 32 + | :deleted 33 + | :deactivated 34 + | :desynchronized 35 + | :throttled 36 + | String.t() 37 + 38 + field :did, String.t() 39 + field :time_us, integer() 40 + field :kind, :account, default: :account 41 + field :active, boolean() 42 + field :seq, integer() 43 + field :time, NaiveDateTime.t() 44 + field :status, status() | nil 45 + end 46 + 47 + @doc """ 48 + Parses a Jetstream account payload into an Account struct. 49 + 50 + ## Example Payload (Active) 51 + 52 + %{ 53 + "active" => true, 54 + "did" => "did:plc:ufbl4k27gp6kzas5glhz7fim", 55 + "seq" => 1409753013, 56 + "time" => "2024-09-05T06:11:04.870Z" 57 + } 58 + 59 + ## Example Payload (Inactive) 60 + 61 + %{ 62 + "active" => false, 63 + "did" => "did:plc:abc123", 64 + "seq" => 1409753014, 65 + "time" => "2024-09-05T06:12:00.000Z", 66 + "status" => "takendown" 67 + } 68 + """ 69 + @spec from(String.t(), integer(), map()) :: t() 70 + def from( 71 + did, 72 + time_us, 73 + %{ 74 + "active" => active, 75 + "seq" => seq, 76 + "time" => time 77 + } = account 78 + ) do 79 + %__MODULE__{ 80 + did: did, 81 + time_us: time_us, 82 + active: active, 83 + seq: seq, 84 + time: parse_datetime(time), 85 + status: parse_status(Map.get(account, "status")) 86 + } 87 + end 88 + 89 + @spec parse_datetime(String.t()) :: NaiveDateTime.t() 90 + defp parse_datetime(time_str) do 91 + case NaiveDateTime.from_iso8601(time_str) do 92 + {:ok, datetime} -> datetime 93 + {:error, _} -> raise "Invalid datetime format: #{time_str}" 94 + end 95 + end 96 + 97 + @spec parse_status(String.t() | nil) :: status() | nil 98 + defp parse_status(nil), do: nil 99 + defp parse_status("takendown"), do: :takendown 100 + defp parse_status("suspended"), do: :suspended 101 + defp parse_status("deleted"), do: :deleted 102 + defp parse_status("deactivated"), do: :deactivated 103 + defp parse_status("desynchronized"), do: :desynchronized 104 + defp parse_status("throttled"), do: :throttled 105 + defp parse_status(status) when is_binary(status), do: status 106 + end
+78
lib/jetstream/event/commit.ex
··· 1 + defmodule Drinkup.Jetstream.Event.Commit do 2 + @moduledoc """ 3 + Struct for commit events from Jetstream. 4 + 5 + Represents a repository commit containing either a create, update, or delete 6 + operation on a record. Unlike the Firehose commit events, Jetstream provides 7 + simplified JSON structures without CAR/CBOR encoding. 8 + """ 9 + 10 + use TypedStruct 11 + 12 + typedstruct enforce: true do 13 + @typedoc """ 14 + The operation type for this commit. 15 + 16 + - `:create` - A new record was created 17 + - `:update` - An existing record was updated 18 + - `:delete` - An existing record was deleted 19 + """ 20 + @type operation() :: :create | :update | :delete 21 + 22 + field :did, String.t() 23 + field :time_us, integer() 24 + field :kind, :commit, default: :commit 25 + field :operation, operation() 26 + field :collection, String.t() 27 + field :rkey, String.t() 28 + field :rev, String.t() 29 + field :record, map() | nil 30 + field :cid, String.t() | nil 31 + end 32 + 33 + @doc """ 34 + Parses a Jetstream commit payload into a Commit struct. 35 + 36 + ## Example Payload 37 + 38 + %{ 39 + "rev" => "3l3qo2vutsw2b", 40 + "operation" => "create", 41 + "collection" => "app.bsky.feed.like", 42 + "rkey" => "3l3qo2vuowo2b", 43 + "record" => %{ 44 + "$type" => "app.bsky.feed.like", 45 + "createdAt" => "2024-09-09T19:46:02.102Z", 46 + "subject" => %{...} 47 + }, 48 + "cid" => "bafyreidwaivazkwu67xztlmuobx35hs2lnfh3kolmgfmucldvhd3sgzcqi" 49 + } 50 + """ 51 + @spec from(String.t(), integer(), map()) :: t() 52 + def from( 53 + did, 54 + time_us, 55 + %{ 56 + "rev" => rev, 57 + "operation" => operation, 58 + "collection" => collection, 59 + "rkey" => rkey 60 + } = commit 61 + ) do 62 + %__MODULE__{ 63 + did: did, 64 + time_us: time_us, 65 + operation: parse_operation(operation), 66 + collection: collection, 67 + rkey: rkey, 68 + rev: rev, 69 + record: Map.get(commit, "record"), 70 + cid: Map.get(commit, "cid") 71 + } 72 + end 73 + 74 + @spec parse_operation(String.t()) :: operation() 75 + defp parse_operation("create"), do: :create 76 + defp parse_operation("update"), do: :update 77 + defp parse_operation("delete"), do: :delete 78 + end
+58
lib/jetstream/event/identity.ex
··· 1 + defmodule Drinkup.Jetstream.Event.Identity do 2 + @moduledoc """ 3 + Struct for identity events from Jetstream. 4 + 5 + Represents a change to an account's identity, such as an updated handle, 6 + signing key, or PDS hosting endpoint. This serves as a signal to downstream 7 + services to refresh their identity cache. 8 + """ 9 + 10 + use TypedStruct 11 + 12 + typedstruct enforce: true do 13 + field :did, String.t() 14 + field :time_us, integer() 15 + field :kind, :identity, default: :identity 16 + field :handle, String.t() | nil 17 + field :seq, integer() 18 + field :time, NaiveDateTime.t() 19 + end 20 + 21 + @doc """ 22 + Parses a Jetstream identity payload into an Identity struct. 23 + 24 + ## Example Payload 25 + 26 + %{ 27 + "did" => "did:plc:ufbl4k27gp6kzas5glhz7fim", 28 + "handle" => "yohenrique.bsky.social", 29 + "seq" => 1409752997, 30 + "time" => "2024-09-05T06:11:04.870Z" 31 + } 32 + """ 33 + @spec from(String.t(), integer(), map()) :: t() 34 + def from( 35 + did, 36 + time_us, 37 + %{ 38 + "seq" => seq, 39 + "time" => time 40 + } = identity 41 + ) do 42 + %__MODULE__{ 43 + did: did, 44 + time_us: time_us, 45 + handle: Map.get(identity, "handle"), 46 + seq: seq, 47 + time: parse_datetime(time) 48 + } 49 + end 50 + 51 + @spec parse_datetime(String.t()) :: NaiveDateTime.t() 52 + defp parse_datetime(time_str) do 53 + case NaiveDateTime.from_iso8601(time_str) do 54 + {:ok, datetime} -> datetime 55 + {:error, _} -> raise "Invalid datetime format: #{time_str}" 56 + end 57 + end 58 + end
+151
lib/jetstream/options.ex
··· 1 + defmodule Drinkup.Jetstream.Options do 2 + @moduledoc """ 3 + Configuration options for Jetstream event stream connection. 4 + 5 + Jetstream is a simplified JSON event stream that converts the CBOR-encoded 6 + ATProto Firehose into lightweight, friendly JSON. It provides zstd compression 7 + and filtering capabilities for collections and DIDs. 8 + 9 + ## Options 10 + 11 + - `:consumer` (required) - Module implementing `Drinkup.Jetstream.Consumer` behaviour 12 + - `:name` - Unique name for this Jetstream instance in the supervision tree (default: `Drinkup.Jetstream`) 13 + - `:host` - Jetstream service URL (default: `"wss://jetstream2.us-east.bsky.network"`) 14 + - `:wanted_collections` - List of collection NSIDs or prefixes to filter (default: `[]` = all collections) 15 + - `:wanted_dids` - List of DIDs to filter (default: `[]` = all repos) 16 + - `:cursor` - Unix microseconds timestamp to resume from (default: `nil` = live-tail) 17 + - `:require_hello` - Pause replay until first options update is sent (default: `false`) 18 + - `:max_message_size_bytes` - Maximum message size to receive (default: `nil` = no limit) 19 + 20 + ## Example 21 + 22 + %{ 23 + consumer: MyJetstreamConsumer, 24 + name: MyJetstream, 25 + host: "wss://jetstream2.us-east.bsky.network", 26 + wanted_collections: ["app.bsky.feed.post", "app.bsky.feed.like"], 27 + wanted_dids: ["did:plc:abc123"], 28 + cursor: 1725519626134432 29 + } 30 + 31 + ## Collection Filters 32 + 33 + The `wanted_collections` option supports: 34 + - Full NSIDs: `"app.bsky.feed.post"` 35 + - NSID prefixes: `"app.bsky.graph.*"`, `"app.bsky.*"` 36 + 37 + You can specify up to 100 collection filters. 38 + 39 + ## DID Filters 40 + 41 + The `wanted_dids` option accepts a list of DID strings. 42 + You can specify up to 10,000 DIDs. 43 + 44 + ## Compression 45 + 46 + Jetstream always uses zstd compression with a custom dictionary. 47 + This is handled automatically by the socket implementation. 48 + """ 49 + 50 + use TypedStruct 51 + 52 + @default_host "wss://jetstream2.us-east.bsky.network" 53 + 54 + @typedoc """ 55 + Map of configuration options accepted by `Drinkup.Jetstream.child_spec/1`. 56 + """ 57 + @type options() :: %{ 58 + required(:consumer) => consumer(), 59 + optional(:name) => name(), 60 + optional(:host) => host(), 61 + optional(:wanted_collections) => wanted_collections(), 62 + optional(:wanted_dids) => wanted_dids(), 63 + optional(:cursor) => cursor(), 64 + optional(:require_hello) => require_hello(), 65 + optional(:max_message_size_bytes) => max_message_size_bytes() 66 + } 67 + 68 + @typedoc """ 69 + Module implementing the `Drinkup.Jetstream.Consumer` behaviour. 70 + """ 71 + @type consumer() :: module() 72 + 73 + @typedoc """ 74 + Unique identifier for this Jetstream instance in the supervision tree. 75 + 76 + Used for Registry lookups and naming child processes. 77 + """ 78 + @type name() :: atom() 79 + 80 + @typedoc """ 81 + WebSocket URL of the Jetstream service. 82 + 83 + Defaults to `"wss://jetstream2.us-east.bsky.network"` which is a public Bluesky instance. 84 + """ 85 + @type host() :: String.t() 86 + 87 + @typedoc """ 88 + List of collection NSIDs or NSID prefixes to filter. 89 + 90 + Examples: 91 + - `["app.bsky.feed.post"]` - Only posts 92 + - `["app.bsky.graph.*"]` - All graph collections 93 + - `["app.bsky.*"]` - All Bluesky app collections 94 + 95 + You can specify up to 100 collection filters. 96 + Defaults to `[]` (all collections). 97 + """ 98 + @type wanted_collections() :: [String.t()] 99 + 100 + @typedoc """ 101 + List of DIDs to filter events by. 102 + 103 + You can specify up to 10,000 DIDs. 104 + Defaults to `[]` (all repos). 105 + """ 106 + @type wanted_dids() :: [String.t()] 107 + 108 + @typedoc """ 109 + Unix microseconds timestamp to resume streaming from. 110 + 111 + When provided, Jetstream will replay events starting from this timestamp. 112 + Useful for resuming after a restart without missing events. The cursor is 113 + automatically tracked and updated as events are received. 114 + 115 + Defaults to `nil` (live-tail from current time). 116 + """ 117 + @type cursor() :: pos_integer() | nil 118 + 119 + @typedoc """ 120 + Whether to pause replay/live-tail until the first options update is sent. 121 + 122 + When `true`, the connection will wait for a `Drinkup.Jetstream.update_options/2` 123 + call before starting to receive events. 124 + 125 + Defaults to `false`. 126 + """ 127 + @type require_hello() :: boolean() 128 + 129 + @typedoc """ 130 + Maximum message size in bytes that the client would like to receive. 131 + 132 + Zero or `nil` means no limit. Negative values are treated as zero. 133 + Defaults to `nil` (no maximum size). 134 + """ 135 + @type max_message_size_bytes() :: integer() | nil 136 + 137 + typedstruct do 138 + field :consumer, consumer(), enforce: true 139 + field :name, name(), default: Drinkup.Jetstream 140 + field :host, host(), default: @default_host 141 + # TODO: Add NSID prefix validation once available in atex 142 + field :wanted_collections, wanted_collections(), default: [] 143 + field :wanted_dids, wanted_dids(), default: [] 144 + field :cursor, cursor() 145 + field :require_hello, require_hello(), default: false 146 + field :max_message_size_bytes, max_message_size_bytes() 147 + end 148 + 149 + @spec from(options()) :: t() 150 + def from(%{consumer: _} = options), do: struct(__MODULE__, options) 151 + end
+201
lib/jetstream/socket.ex
··· 1 + defmodule Drinkup.Jetstream.Socket do 2 + @moduledoc """ 3 + WebSocket connection handler for Jetstream event streams. 4 + 5 + Implements the Drinkup.Socket behaviour to manage connections to a Jetstream 6 + service, handling zstd-compressed JSON events and dispatching them to the 7 + configured consumer. 8 + """ 9 + 10 + use Drinkup.Socket 11 + 12 + require Logger 13 + alias Drinkup.Jetstream.{Event, Options} 14 + 15 + @dict_path "priv/jetstream/zstd_dictionary" 16 + @external_resource @dict_path 17 + @zstd_dict File.read!(@dict_path) 18 + 19 + @impl true 20 + def init(opts) do 21 + options = Keyword.fetch!(opts, :options) 22 + 23 + {:ok, %{options: options, host: options.host, cursor: options.cursor}} 24 + end 25 + 26 + def start_link(%Options{} = options, statem_opts) do 27 + socket_opts = [ 28 + host: options.host, 29 + options: options 30 + ] 31 + 32 + statem_opts = 33 + Keyword.put( 34 + statem_opts, 35 + :name, 36 + {:via, Registry, {Drinkup.Registry, {options.name, JetstreamSocket}}} 37 + ) 38 + 39 + Drinkup.Socket.start_link(__MODULE__, socket_opts, statem_opts) 40 + end 41 + 42 + @impl true 43 + def build_path(%{options: options}) do 44 + query_params = [compress: "true"] 45 + 46 + query_params = 47 + query_params 48 + |> put_collections(options.wanted_collections) 49 + |> put_dids(options.wanted_dids) 50 + |> put_cursor(options.cursor) 51 + |> put_max_size(options.max_message_size_bytes) 52 + |> put_require_hello(options.require_hello) 53 + 54 + "/subscribe?" <> URI.encode_query(query_params) 55 + end 56 + 57 + @impl true 58 + def handle_frame( 59 + {:binary, compressed_data}, 60 + {%{options: options} = data, _conn, _stream} 61 + ) do 62 + case decompress_and_parse(compressed_data) do 63 + {:ok, payload} -> 64 + case Event.from(payload) do 65 + nil -> 66 + # Event.from already logs warnings for unrecognized events 67 + :noop 68 + 69 + event -> 70 + Event.dispatch(event, options) 71 + # Update cursor with the event's time_us 72 + new_cursor = Map.get(payload, "time_us") 73 + {:ok, %{data | cursor: new_cursor}} 74 + end 75 + 76 + # TODO: sometimes getting ZSTD_CONTENTSIZE_UNKNOWN 77 + {:error, reason} -> 78 + Logger.error( 79 + "[Drinkup.Jetstream.Socket] Failed to decompress/parse frame: #{inspect(reason)}" 80 + ) 81 + 82 + :noop 83 + end 84 + end 85 + 86 + @impl true 87 + def handle_frame({:text, json}, {%{options: options} = data, _conn, _stream}) do 88 + # Text frames shouldn't happen since we force compression, but handle them anyway 89 + case Jason.decode(json) do 90 + {:ok, payload} -> 91 + case Event.from(payload) do 92 + nil -> 93 + :noop 94 + 95 + event -> 96 + Event.dispatch(event, options) 97 + new_cursor = Map.get(payload, "time_us") 98 + {:ok, %{data | cursor: new_cursor}} 99 + end 100 + 101 + {:error, reason} -> 102 + Logger.error("[Drinkup.Jetstream.Socket] Failed to decode JSON: #{inspect(reason)}") 103 + :noop 104 + end 105 + end 106 + 107 + @impl true 108 + def handle_frame(:close, _data) do 109 + Logger.info("[Drinkup.Jetstream.Socket] WebSocket closed, reason unknown") 110 + nil 111 + end 112 + 113 + @impl true 114 + def handle_frame({:close, errno, reason}, _data) do 115 + Logger.info( 116 + "[Drinkup.Jetstream.Socket] WebSocket closed, errno: #{errno}, reason: #{inspect(reason)}" 117 + ) 118 + 119 + nil 120 + end 121 + 122 + @impl true 123 + def handle_connected({user_data, conn, stream}) do 124 + # Register connection for options updates 125 + Registry.register( 126 + Drinkup.Registry, 127 + {user_data.options.name, JetstreamConnection}, 128 + {conn, stream} 129 + ) 130 + 131 + {:ok, user_data} 132 + end 133 + 134 + @impl true 135 + def handle_disconnected(_reason, {user_data, _conn, _stream}) do 136 + # Unregister connection when disconnected 137 + Registry.unregister(Drinkup.Registry, {user_data.options.name, JetstreamConnection}) 138 + {:ok, user_data} 139 + end 140 + 141 + # Can't use `create_ddict` as the value of `@zstd_dict` because it returns a reference :( 142 + @spec get_dictionary() :: reference() 143 + defp get_dictionary() do 144 + case :ezstd.create_ddict(@zstd_dict) do 145 + {:error, reason} -> 146 + raise ArgumentError, 147 + "somehow failed to created Jetstream's ZSTD dictionary: #{inspect(reason)}" 148 + 149 + dict -> 150 + dict 151 + end 152 + end 153 + 154 + @spec decompress_and_parse(binary()) :: {:ok, map()} | {:error, term()} 155 + defp decompress_and_parse(compressed_data) do 156 + with ctx when is_reference(ctx) <- 157 + :ezstd.create_decompression_context(byte_size(compressed_data)), 158 + :ok <- :ezstd.select_ddict(ctx, get_dictionary()), 159 + iolist when is_list(iolist) <- :ezstd.decompress_streaming(ctx, compressed_data), 160 + decompressed <- IO.iodata_to_binary(iolist), 161 + {:ok, payload} <- JSON.decode(decompressed) do 162 + {:ok, payload} 163 + else 164 + {:error, reason} -> {:error, reason} 165 + end 166 + end 167 + 168 + @spec put_collections(keyword(), [String.t()]) :: keyword() 169 + defp put_collections(params, []), do: params 170 + 171 + defp put_collections(params, collections) when is_list(collections) do 172 + Enum.reduce(collections, params, fn collection, acc -> 173 + [{:wantedCollections, collection} | acc] 174 + end) 175 + end 176 + 177 + @spec put_dids(keyword(), [String.t()]) :: keyword() 178 + defp put_dids(params, []), do: params 179 + 180 + defp put_dids(params, dids) when is_list(dids) do 181 + Enum.reduce(dids, params, fn did, acc -> 182 + [{:wantedDids, did} | acc] 183 + end) 184 + end 185 + 186 + @spec put_cursor(keyword(), integer() | nil) :: keyword() 187 + defp put_cursor(params, nil), do: params 188 + 189 + defp put_cursor(params, cursor) when is_integer(cursor), do: [{:cursor, cursor} | params] 190 + 191 + @spec put_max_size(keyword(), integer() | nil) :: keyword() 192 + defp put_max_size(params, nil), do: params 193 + 194 + defp put_max_size(params, max_size) when is_integer(max_size), 195 + do: [{:maxMessageSizeBytes, max_size} | params] 196 + 197 + @spec put_require_hello(keyword(), boolean()) :: keyword() 198 + defp put_require_hello(params, false), do: params 199 + 200 + defp put_require_hello(params, true), do: [{:requireHello, "true"} | params] 201 + end
+1
mix.exs
··· 34 34 {:certifi, "~> 2.15"}, 35 35 {:credo, "~> 1.7", only: [:dev, :test], runtime: false}, 36 36 {:ex_doc, "~> 0.34", only: :dev, runtime: false}, 37 + {:ezstd, "~> 1.1"}, 37 38 {:gun, "~> 2.2"}, 38 39 {:typedstruct, "~> 0.5"}, 39 40 {:jason, "~> 1.4"},
+1
mix.lock
··· 12 12 "earmark_parser": {:hex, :earmark_parser, "1.4.44", "f20830dd6b5c77afe2b063777ddbbff09f9759396500cdbe7523efd58d7a339c", [:mix], [], "hexpm", "4778ac752b4701a5599215f7030989c989ffdc4f6df457c5f36938cc2d2a2750"}, 13 13 "ex_cldr": {:hex, :ex_cldr, "2.44.1", "0d220b175874e1ce77a0f7213bdfe700b9be11aefbf35933a0e98837803ebdc5", [:mix], [{:cldr_utils, "~> 2.28", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:gettext, "~> 0.19 or ~> 1.0", [hex: :gettext, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: true]}], "hexpm", "3880cd6137ea21c74250cd870d3330c4a9fdec07fabd5e37d1b239547929e29b"}, 14 14 "ex_doc": {:hex, :ex_doc, "0.39.3", "519c6bc7e84a2918b737aec7ef48b96aa4698342927d080437f61395d361dcee", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "0590955cf7ad3b625780ee1c1ea627c28a78948c6c0a9b0322bd976a079996e1"}, 15 + "ezstd": {:hex, :ezstd, "1.2.3", "98748f4099e6e2a067f77ace43041ebaa53c13194b08ce22370e4c93079e9e16", [:rebar3], [], "hexpm", "de32e0b41ba36a9ed46db8215da74777d2f141bb75f67bfc05dbb4b7c3386dee"}, 15 16 "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, 16 17 "finch": {:hex, :finch, "0.20.0", "5330aefb6b010f424dcbbc4615d914e9e3deae40095e73ab0c1bb0968933cadf", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "2658131a74d051aabfcba936093c903b8e89da9a1b63e430bee62045fa9b2ee2"}, 17 18 "gun": {:hex, :gun, "2.2.0", "b8f6b7d417e277d4c2b0dc3c07dfdf892447b087f1cc1caff9c0f556b884e33d", [:make, :rebar3], [{:cowlib, ">= 2.15.0 and < 3.0.0", [hex: :cowlib, repo: "hexpm", optional: false]}], "hexpm", "76022700c64287feb4df93a1795cff6741b83fb37415c40c34c38d2a4645261a"},
priv/jetstream/zstd_dictionary

This is a binary file and will not be displayed.