···1313- `Atex.Lexicon` module that provides the `deflexicon` macro, taking in a JSON
1414 Lexicon definition and converts it into a series of schemas for each
1515 definition within it.
1616+- `mix atex.lexicons` for converting lexicon JSON files into modules using
1717+ `deflexicon` easily.
16181719## [0.3.0] - 2025-06-29
1820
+1-1
README.md
···1010- [x] XRPC client
1111- [x] DID & handle resolution service with a cache
1212- [x] Macro for converting a Lexicon definition into a runtime-validation schema
1313- - [ ] Codegen to convert a directory of lexicons
1313+ - [x] Codegen to convert a directory of lexicons
1414- [ ] Extended XRPC client with support for validated inputs/outputs
1515- [ ] Oauth stuff
1616
+1-1
lib/atex/lexicon.ex
···5858 def_to_schema(nsid, def_name, record)
5959 end
60606161- # TODO: add `$type` field. It's just a string though.
6161+ # TODO: need to spit out an extra 'branded' type with `$type` field, for use in union refs.
6262 defp def_to_schema(
6363 nsid,
6464 def_name,
-2
lib/atex/lexicon/validators/integer.ex
···11defmodule Atex.Lexicon.Validators.Integer do
22- alias Atex.Lexicon.Validators
33-42 @type option() ::
53 {:minimum, integer()}
64 | {:maximum, integer()}
+8-2
lib/atex/nsid.ex
···1111 # maybe stuff for fetching the repo that belongs to an authority
12121313 @spec to_atom(String.t()) :: atom()
1414- def to_atom(nsid) do
1414+ def to_atom(nsid, fully_qualify \\ true) do
1515 nsid
1616 |> String.split(".")
1717 |> Enum.map(&String.capitalize/1)
1818- |> then(&["Elixir" | &1])
1818+ |> then(fn parts ->
1919+ if fully_qualify do
2020+ ["Elixir" | parts]
2121+ else
2222+ parts
2323+ end
2424+ end)
1925 |> Enum.join(".")
2026 |> String.to_atom()
2127 end
···11-defmodule Sh.Comet.V0.Feed.Track do
22- use Atex.Lexicon
33-44- deflexicon(%{
55- "defs" => %{
66- "main" => %{
77- "description" =>
88- "A Comet audio track. TODO: should probably have some sort of pre-calculated waveform, or have a query to get one from a blob?",
99- "key" => "tid",
1010- "record" => %{
1111- "properties" => %{
1212- "audio" => %{
1313- "accept" => ["audio/ogg"],
1414- "description" =>
1515- "Audio of the track, ideally encoded as 96k Opus. Limited to 100mb.",
1616- "maxSize" => 100_000_000,
1717- "type" => "blob"
1818- },
1919- "createdAt" => %{
2020- "description" => "Timestamp for when the track entry was originally created.",
2121- "format" => "datetime",
2222- "type" => "string"
2323- },
2424- "description" => %{
2525- "description" => "Description of the track.",
2626- "maxGraphemes" => 2000,
2727- "maxLength" => 20000,
2828- "type" => "string"
2929- },
3030- "descriptionFacets" => %{
3131- "description" => "Annotations of the track's description.",
3232- "ref" => "sh.comet.v0.richtext.facet",
3333- "type" => "ref"
3434- },
3535- "explicit" => %{
3636- "description" =>
3737- "Whether the track contains explicit content that may objectionable to some people, usually swearing or adult themes.",
3838- "type" => "boolean"
3939- },
4040- "image" => %{
4141- "accept" => ["image/png", "image/jpeg"],
4242- "description" => "Image to be displayed representing the track.",
4343- "maxSize" => 1_000_000,
4444- "type" => "blob"
4545- },
4646- "link" => %{"ref" => "sh.comet.v0.feed.defs#link", "type" => "ref"},
4747- "releasedAt" => %{
4848- "description" =>
4949- "Timestamp for when the track was released. If in the future, may be used to implement pre-savable tracks.",
5050- "format" => "datetime",
5151- "type" => "string"
5252- },
5353- "tags" => %{
5454- "description" => "Hashtags for the track, usually for genres.",
5555- "items" => %{
5656- "maxGraphemes" => 64,
5757- "maxLength" => 640,
5858- "type" => "string"
5959- },
6060- "maxLength" => 8,
6161- "type" => "array"
6262- },
6363- "title" => %{
6464- "description" =>
6565- "Title of the track. Usually shouldn't include the creator's name.",
6666- "maxGraphemes" => 256,
6767- "maxLength" => 2560,
6868- "minLength" => 1,
6969- "type" => "string"
7070- }
7171- },
7272- "required" => ["audio", "title", "createdAt"],
7373- "type" => "object"
7474- },
7575- "type" => "record"
7676- },
7777- "view" => %{
7878- "properties" => %{
7979- "audio" => %{
8080- "description" =>
8181- "URL pointing to where the audio data for the track can be fetched. May be re-encoded from the original blob.",
8282- "format" => "uri",
8383- "type" => "string"
8484- },
8585- "author" => %{
8686- "ref" => "sh.comet.v0.actor.profile#viewFull",
8787- "type" => "ref"
8888- },
8989- "cid" => %{"format" => "cid", "type" => "string"},
9090- "commentCount" => %{"type" => "integer"},
9191- "image" => %{
9292- "description" => "URL pointing to where the image for the track can be fetched.",
9393- "format" => "uri",
9494- "type" => "string"
9595- },
9696- "indexedAt" => %{"format" => "datetime", "type" => "string"},
9797- "likeCount" => %{"type" => "integer"},
9898- "playCount" => %{"type" => "integer"},
9999- "record" => %{"ref" => "#main", "type" => "ref"},
100100- "repostCount" => %{"type" => "integer"},
101101- "uri" => %{"format" => "at-uri", "type" => "string"},
102102- "viewer" => %{
103103- "ref" => "sh.comet.v0.feed.defs#viewerState",
104104- "type" => "ref"
105105- }
106106- },
107107- "required" => ["uri", "cid", "author", "audio", "record", "indexedAt"],
108108- "type" => "object"
109109- }
110110- },
111111- "id" => "sh.comet.v0.feed.track",
112112- "lexicon" => 1
113113- })
114114-end
-70
lib/atproto/sh/comet/v0/richtext/facet.ex
···11-defmodule Sh.Comet.V0.Richtext.Facet do
22- use Atex.Lexicon
33-44- deflexicon(%{
55- "defs" => %{
66- "byteSlice" => %{
77- "description" =>
88- "Specifies the sub-string range a facet feature applies to. Start index is inclusive, end index is exclusive. Indices are zero-indexed, counting bytes of the UTF-8 encoded text. NOTE: some languages, like Javascript, use UTF-16 or Unicode codepoints for string slice indexing; in these languages, convert to byte arrays before working with facets.",
99- "properties" => %{
1010- "byteEnd" => %{"minimum" => 0, "type" => "integer"},
1111- "byteStart" => %{"minimum" => 0, "type" => "integer"}
1212- },
1313- "required" => ["byteStart", "byteEnd"],
1414- "type" => "object"
1515- },
1616- "link" => %{
1717- "description" =>
1818- "Facet feature for a URL. The text URL may have been simplified or truncated, but the facet reference should be a complete URL.",
1919- "properties" => %{"uri" => %{"format" => "uri", "type" => "string"}},
2020- "required" => ["uri"],
2121- "type" => "object"
2222- },
2323- "main" => %{
2424- "description" => "Annotation of a sub-string within rich text.",
2525- "properties" => %{
2626- "features" => %{
2727- "items" => %{
2828- "refs" => ["#mention", "#link", "#tag"],
2929- "type" => "union"
3030- },
3131- "type" => "array"
3232- },
3333- "index" => %{"ref" => "#byteSlice", "type" => "ref"}
3434- },
3535- "required" => ["index", "features"],
3636- "type" => "object"
3737- },
3838- "mention" => %{
3939- "description" =>
4040- "Facet feature for mention of another account. The text is usually a handle, including a '@' prefix, but the facet reference is a DID.",
4141- "properties" => %{"did" => %{"format" => "did", "type" => "string"}},
4242- "required" => ["did"],
4343- "type" => "object"
4444- },
4545- "tag" => %{
4646- "description" =>
4747- "Facet feature for a hashtag. The text usually includes a '#' prefix, but the facet reference should not (except in the case of 'double hash tags').",
4848- "properties" => %{
4949- "tag" => %{"maxGraphemes" => 64, "maxLength" => 640, "type" => "string"}
5050- },
5151- "required" => ["tag"],
5252- "type" => "object"
5353- },
5454- "timestamp" => %{
5555- "description" =>
5656- "Facet feature for a timestamp in a track. The text usually is in the format of 'hh:mm:ss' with the hour section being omitted if unnecessary.",
5757- "properties" => %{
5858- "timestamp" => %{
5959- "description" => "Reference time, in seconds.",
6060- "minimum" => 0,
6161- "type" => "integer"
6262- }
6363- },
6464- "type" => "object"
6565- }
6666- },
6767- "id" => "sh.comet.v0.richtext.facet",
6868- "lexicon" => 1
6969- })
7070-end
+94
lib/mix/tasks/atex.lexicons.ex
···11+defmodule Mix.Tasks.Atex.Lexicons do
22+ @moduledoc """
33+ Generate Elixir modules from AT Protocol lexicons, which can then be used to
44+ validate data at runtime.
55+66+ AT Protocol lexicons are JSON files that define parts of the AT Protocol data
77+ model. This task processes these lexicon files and generates corresponding
88+ Elixir modules.
99+1010+ ## Usage
1111+1212+ mix atex.lexicons [OPTIONS] [PATHS]
1313+1414+ ## Arguments
1515+1616+ - `PATHS` - List of lexicon files to process. Also supports standard glob
1717+ syntax for reading many lexicons at once.
1818+1919+ ## Options
2020+2121+ - `-o`/`--output` - Output directory for generated modules (default:
2222+ `lib/atproto`)
2323+2424+ ## Examples
2525+2626+ Process all JSON files in the lexicons directory:
2727+2828+ mix atex.lexicons lexicons/**/*.json
2929+3030+ Process specific lexicon files:
3131+3232+ mix atex.lexicons lexicons/com/atproto/repo/*.json lexicons/app/bsky/actor/profile.json
3333+3434+ Generate modules to a custom output directory:
3535+3636+ mix atex.lexicons lexicons/**/*.json --output lib/my_atproto
3737+ """
3838+ @shortdoc "Generate Elixir modules from AT Protocol lexicons."
3939+4040+ use Mix.Task
4141+ require EEx
4242+4343+ @switches [output: :string]
4444+ @aliases [o: :output]
4545+ @template_path Path.expand("../../../priv/templates/lexicon.eex", __DIR__)
4646+4747+ @impl Mix.Task
4848+ def run(args) do
4949+ {options, globs} = OptionParser.parse!(args, switches: @switches, aliases: @aliases)
5050+5151+ output = Keyword.get(options, :output, "lib/atproto")
5252+ paths = Enum.flat_map(globs, &Path.wildcard/1)
5353+5454+ if length(paths) == 0 do
5555+ Mix.shell().error("No valid search paths have been provided, aborting.")
5656+ else
5757+ Mix.shell().info("Generating modules for lexicons into #{output}")
5858+5959+ Enum.each(paths, fn path ->
6060+ Mix.shell().info("- #{path}")
6161+ generate(path, output)
6262+ end)
6363+ end
6464+ end
6565+6666+ # TODO: validate schema?
6767+ defp generate(input, output) do
6868+ lexicon =
6969+ input
7070+ |> File.read!()
7171+ |> JSON.decode!()
7272+7373+ if not is_binary(lexicon["id"]) do
7474+ raise ArgumentError, message: "Malformed lexicon: does not have an `id` field."
7575+ end
7676+7777+ code = lexicon |> template() |> Code.format_string!() |> Enum.join("")
7878+7979+ file_path =
8080+ lexicon["id"]
8181+ |> String.split(".")
8282+ |> Enum.join("/")
8383+ |> then(&(&1 <> ".ex"))
8484+ |> then(&Path.join(output, &1))
8585+8686+ file_path
8787+ |> Path.dirname()
8888+ |> File.mkdir_p!()
8989+9090+ File.write!(file_path, code)
9191+ end
9292+9393+ EEx.function_from_file(:defp, :template, @template_path, [:lexicon])
9494+end
+5
priv/templates/lexicon.eex
···11+defmodule <%= Atex.NSID.to_atom(lexicon["id"], false) %> do
22+ use Atex.Lexicon
33+44+ deflexicon(<%= inspect(lexicon, limit: :infinity, pretty: true, printable_limit: :infinity) %>)
55+end