An easy-to-host PDS on the ATProtocol, MacOS. Grandma-approved.

fix(relay): address PR review issues for MM-137 OTel baseline

Critical:
- Move #[tracing::instrument] to after doc comment blocks in db/mod.rs
(was splitting /// paragraphs, breaking rustdoc)
- Add skip(url) to open_pool's instrument attribute to avoid recording
the database URL as a span field

Important:
- Replace eprintln! in OtelGuard::drop with tracing::error! — the
subscriber is still live at drop time, so structured logging is correct
- Add tracing::debug! in HeaderMapCarrier::get for non-UTF-8 header
values instead of silently discarding them
- Add comment at _otel_guard binding explaining the naming requirement:
bare _ drops immediately, which would shut down the exporter early
- Add otlp_endpoint validation in validate_and_build: must be non-empty
and start with http:// or https://
- Add unit tests for HeaderMapCarrier (get, absent, case-insensitive,
keys) including the non-UTF-8 silent-drop behaviour being intentional

Suggestions:
- Add eprintln warning when RUST_LOG is invalid (subscriber not up yet)
- Update apply_env_overrides doc to mention OTEL_SERVICE_NAME
- Update validate_and_build doc to include invite_code_required default
and telemetry validation rules

authored by malpercio.dev and committed by

Tangled 6b8bdd69 b5b51a8a

+99 -13
+26 -7
crates/common/src/config.rs
··· 115 115 Invalid(String), 116 116 } 117 117 118 - /// Apply `EZPDS_*` environment variable overrides to a [`RawConfig`], returning the updated config. 118 + /// Apply `EZPDS_*` and selected OTel standard environment variable overrides to a [`RawConfig`], 119 + /// returning the updated config. 120 + /// 121 + /// Also reads `OTEL_SERVICE_NAME` (without the `EZPDS_` prefix) as a standard OpenTelemetry 122 + /// convention for overriding the telemetry service name. 119 123 /// 120 124 /// Receives the environment as a map so this function stays isolated from I/O (no `std::env` 121 125 /// access). Takes `raw` by value and returns it so callers can chain calls without mutation. ··· 178 182 /// Validate a [`RawConfig`] and build a [`Config`], applying defaults for optional fields. 179 183 /// 180 184 /// Required fields: `data_dir`, `public_url`, `available_user_domains` (non-empty). 181 - /// Defaults: `bind_address = "0.0.0.0"`, `port = 8080`, 182 - /// `database_url = "{data_dir}/relay.db"` (derived; fails if `data_dir` is non-UTF-8). 185 + /// Defaults: `bind_address = "0.0.0.0"`, `port = 8080`, `invite_code_required = true`, 186 + /// `database_url = "{data_dir}/relay.db"` (derived; fails if `data_dir` is non-UTF-8), 187 + /// `telemetry.enabled = false`, `telemetry.otlp_endpoint = "http://localhost:4317"`, 188 + /// `telemetry.service_name = "ezpds-relay"`. 189 + /// When provided, `telemetry.otlp_endpoint` must be non-empty and start with `http://` or 190 + /// `https://`. 183 191 pub(crate) fn validate_and_build(raw: RawConfig) -> Result<Config, ConfigError> { 184 192 let bind_address = raw.bind_address.unwrap_or_else(|| "0.0.0.0".to_string()); 185 193 let port = raw.port.unwrap_or(8080); ··· 216 224 let invite_code_required = raw.invite_code_required.unwrap_or(true); 217 225 218 226 let telemetry_defaults = TelemetryConfig::default(); 227 + let otlp_endpoint = raw 228 + .telemetry 229 + .otlp_endpoint 230 + .unwrap_or(telemetry_defaults.otlp_endpoint); 231 + if otlp_endpoint.is_empty() { 232 + return Err(ConfigError::Invalid( 233 + "telemetry.otlp_endpoint must not be empty".to_string(), 234 + )); 235 + } 236 + if !otlp_endpoint.starts_with("http://") && !otlp_endpoint.starts_with("https://") { 237 + return Err(ConfigError::Invalid(format!( 238 + "telemetry.otlp_endpoint must start with http:// or https://, got: {otlp_endpoint:?}" 239 + ))); 240 + } 219 241 let telemetry = TelemetryConfig { 220 242 enabled: raw.telemetry.enabled.unwrap_or(telemetry_defaults.enabled), 221 - otlp_endpoint: raw 222 - .telemetry 223 - .otlp_endpoint 224 - .unwrap_or(telemetry_defaults.otlp_endpoint), 243 + otlp_endpoint, 225 244 service_name: raw 226 245 .telemetry 227 246 .service_name
+59 -1
crates/relay/src/app.rs
··· 15 15 16 16 impl Extractor for HeaderMapCarrier<'_> { 17 17 fn get(&self, key: &str) -> Option<&str> { 18 - self.0.get(key).and_then(|v| v.to_str().ok()) 18 + self.0.get(key).and_then(|v| { 19 + v.to_str().map_or_else( 20 + |_| { 21 + tracing::debug!(header = key, "trace propagation header contains non-UTF-8 bytes; ignoring"); 22 + None 23 + }, 24 + Some, 25 + ) 26 + }) 19 27 } 20 28 21 29 fn keys(&self) -> Vec<&str> { ··· 226 234 .expect("db pool in AppState must be queryable"); 227 235 } 228 236 } 237 + 238 + #[cfg(test)] 239 + mod header_carrier_tests { 240 + use super::*; 241 + use axum::http::HeaderMap; 242 + use opentelemetry::propagation::Extractor; 243 + 244 + #[test] 245 + fn get_returns_ascii_header_value() { 246 + let mut map = HeaderMap::new(); 247 + map.insert("traceparent", "00-abc123-def456-01".parse().unwrap()); 248 + 249 + let carrier = HeaderMapCarrier(&map); 250 + assert_eq!( 251 + carrier.get("traceparent"), 252 + Some("00-abc123-def456-01") 253 + ); 254 + } 255 + 256 + #[test] 257 + fn get_returns_none_for_absent_header() { 258 + let map = HeaderMap::new(); 259 + let carrier = HeaderMapCarrier(&map); 260 + assert_eq!(carrier.get("traceparent"), None); 261 + } 262 + 263 + #[test] 264 + fn get_is_case_insensitive_via_header_map() { 265 + let mut map = HeaderMap::new(); 266 + // HTTP/2 headers are lower-case; HeaderMap normalises on insert. 267 + map.insert("tracestate", "vendor=value".parse().unwrap()); 268 + 269 + let carrier = HeaderMapCarrier(&map); 270 + // HeaderMap normalises to lower-case, so look-up is case-insensitive. 271 + assert_eq!(carrier.get("tracestate"), Some("vendor=value")); 272 + } 273 + 274 + #[test] 275 + fn keys_returns_all_header_names() { 276 + let mut map = HeaderMap::new(); 277 + map.insert("traceparent", "value1".parse().unwrap()); 278 + map.insert("tracestate", "value2".parse().unwrap()); 279 + 280 + let carrier = HeaderMapCarrier(&map); 281 + let keys = carrier.keys(); 282 + assert!(keys.contains(&"traceparent")); 283 + assert!(keys.contains(&"tracestate")); 284 + assert_eq!(keys.len(), 2); 285 + } 286 + }
+2 -2
crates/relay/src/db/mod.rs
··· 33 33 }]; 34 34 35 35 /// Open a WAL-mode SQLite connection pool with a maximum of 1 connection. 36 - #[tracing::instrument(err, fields(db.system = "sqlite"))] 37 36 /// 38 37 /// Accepts any sqlx URL string (e.g. `"sqlite:relay.db"`, `"sqlite::memory:"`). 39 38 /// `create_if_missing` is enabled so the file is created on first run. ··· 42 41 /// 43 42 /// Note: Pool creation succeeds even if the file path is invalid; the failure surfaces 44 43 /// at the first query. To fail fast on bad config, consider adding `min_connections(1)`. 44 + #[tracing::instrument(skip(url), err, fields(db.system = "sqlite"))] 45 45 pub async fn open_pool(url: &str) -> Result<SqlitePool, DbError> { 46 46 let opts = SqliteConnectOptions::from_str(url) 47 47 .map_err(|e| DbError::InvalidUrl(e.to_string()))? ··· 56 56 } 57 57 58 58 /// Apply any pending migrations from `MIGRATIONS` to the given pool. 59 - #[tracing::instrument(skip(pool), err, fields(db.system = "sqlite"))] 60 59 /// 61 60 /// The schema_migrations bootstrap DDL runs outside any transaction. Pending migrations 62 61 /// and their bookkeeping inserts run inside a single transaction per call. 63 62 /// On commit() failure, the transaction is rolled back by Drop — no partial schema 64 63 /// is applied, and the operation is safe to re-run. 64 + #[tracing::instrument(skip(pool), err, fields(db.system = "sqlite"))] 65 65 pub async fn run_migrations(pool: &SqlitePool) -> Result<(), DbError> { 66 66 // Bootstrap the tracking table before any migration SQL runs. 67 67 sqlx::query(
+4
crates/relay/src/main.rs
··· 53 53 // Initialize tracing after config is loaded so telemetry settings can be applied. 54 54 // Any config parse error surfaces via eprintln (the error propagation above); tracing 55 55 // is not available until this line succeeds. 56 + // 57 + // IMPORTANT: must be `_otel_guard`, NOT bare `_`. A bare `_` binding drops 58 + // immediately (Rust only keeps `_foo` bindings alive for the scope), which would 59 + // shut down the OTLP exporter before the server starts. 56 60 let _otel_guard = telemetry::init_subscriber(&config.telemetry)?; 57 61 58 62 tracing::info!(
+8 -3
crates/relay/src/telemetry.rs
··· 17 17 impl Drop for OtelGuard { 18 18 fn drop(&mut self) { 19 19 if let Err(e) = self.provider.shutdown() { 20 - eprintln!("failed to flush OTel spans on shutdown: {e:?}"); 20 + // The tracing subscriber is still live at drop time, so tracing::error! is 21 + // appropriate and consistent with the rest of the codebase's diagnostics. 22 + tracing::error!(error = ?e, "failed to flush OTel spans on shutdown"); 21 23 } 22 24 } 23 25 } ··· 35 37 /// Returns an [`OtelGuard`] when telemetry is enabled. Drop it after the server shuts 36 38 /// down to guarantee all buffered spans are flushed before the process exits. 37 39 pub fn init_subscriber(telemetry: &TelemetryConfig) -> anyhow::Result<Option<OtelGuard>> { 38 - let env_filter = 39 - EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")); 40 + let env_filter = EnvFilter::try_from_default_env().unwrap_or_else(|e| { 41 + // Subscriber isn't up yet, so eprintln is the right diagnostic tool here. 42 + eprintln!("RUST_LOG is invalid ({e}); defaulting to INFO"); 43 + EnvFilter::new("info") 44 + }); 40 45 let fmt_layer = tracing_subscriber::fmt::layer(); 41 46 42 47 if !telemetry.enabled {