Openstatus www.openstatus.dev
at 4c0f4c00a38753a5d0dfd7e7b7b7706dec6f1503 201 lines 5.4 kB view raw
1import { AsyncLocalStorage } from "node:async_hooks"; 2// import * as Sentry from "@sentry/node"; 3import { sentry } from "@hono/sentry"; 4import { 5 configure, 6 getConsoleSink, 7 getLogger, 8 jsonLinesFormatter, 9 withContext, 10} from "@logtape/logtape"; 11import { getOpenTelemetrySink } from "@logtape/otel"; 12 13// import { getSentrySink } from "@logtape/sentry"; 14import { Hono } from "hono"; 15import { showRoutes } from "hono/dev"; 16import { requestId } from "hono/request-id"; 17// import { logger } from "hono/logger"; 18import { checkerRoute } from "./checker"; 19import { cronRouter } from "./cron"; 20import { env } from "./env"; 21 22import { resourceFromAttributes } from "@opentelemetry/resources"; 23import { ATTR_DEPLOYMENT_ENVIRONMENT_NAME } from "@opentelemetry/semantic-conventions/incubating"; 24 25const { NODE_ENV, PORT } = env(); 26 27export type Env = { 28 Variables: { 29 event: Record<string, unknown>; 30 }; 31}; 32 33/** 34 * Tail sampling strategy based on loggingsucks.com best practices 35 * Makes sampling decisions post-request completion to capture: 36 * - All errors (5xx status codes, explicit errors) 37 * - Slow requests (above p99 threshold) 38 * - Client errors (4xx) at higher rate than successful requests 39 * - Random sample of remaining successful, fast requests 40 */ 41function shouldSample(event: Record<string, unknown>): boolean { 42 const statusCode = event.status_code as number | undefined; 43 const durationMs = event.duration_ms as number | undefined; 44 45 // Always capture: server errors 46 if (statusCode && statusCode >= 500) return true; 47 48 // Always capture: explicit errors 49 if (event.error) return true; 50 51 // Always capture: slow requests (above p99 - 2s threshold) 52 if (durationMs && durationMs > 2000) return true; 53 54 // Higher sampling for client errors (4xx) - 50% 55 if (statusCode && statusCode >= 400 && statusCode < 500) { 56 return true; 57 } 58 59 // Random sample successful, fast requests at 20% 60 return Math.random() < 0.2; 61} 62 63const defaultLogger = getOpenTelemetrySink({ 64 serviceName: "openstatus-workflows", 65 otlpExporterConfig: { 66 url: "https://eu-central-1.aws.edge.axiom.co/v1/logs", 67 headers: { 68 Authorization: `Bearer ${env().AXIOM_TOKEN}`, 69 "X-Axiom-Dataset": env().AXIOM_DATASET, 70 }, 71 }, 72 additionalResource: resourceFromAttributes({ 73 [ATTR_DEPLOYMENT_ENVIRONMENT_NAME]: env().NODE_ENV, 74 }), 75}); 76 77await configure({ 78 sinks: { 79 console: getConsoleSink({ formatter: jsonLinesFormatter }), 80 // sentry: getSentrySink(), 81 otel: defaultLogger, 82 }, 83 loggers: [ 84 { 85 category: "workflow", 86 lowestLevel: "debug", 87 sinks: ["console"], 88 }, 89 { 90 category: "workflow-otel", 91 lowestLevel: "info", 92 sinks: ["otel"], 93 }, 94 ], 95 contextLocalStorage: new AsyncLocalStorage(), 96}); 97 98const logger = getLogger(["workflow"]); 99const otelLogger = getLogger(["workflow-otel"]); 100 101const app = new Hono<Env>({ strict: false }); 102 103app.use("*", requestId()); 104 105app.use("*", sentry({ dsn: env().SENTRY_DSN })); 106 107app.use("*", async (c, next) => { 108 const requestId = c.get("requestId"); 109 const startTime = Date.now(); 110 111 const event: Record<string, unknown> = { 112 timestamp: new Date().toISOString(), 113 }; 114 c.set("event", event); 115 116 await withContext( 117 { 118 request_id: requestId, 119 method: c.req.method, 120 url: c.req.url, 121 user_agent: c.req.header("User-Agent"), 122 // ipAddress: c.req.header("CF-Connecting-IP") || c.req.header("X-Forwarded-For") 123 }, 124 async () => { 125 // Build wide event context at request start 126 event.request_id = requestId; 127 event.method = c.req.method; 128 event.path = c.req.path; 129 event.url = c.req.url; 130 event.user_agent = c.req.header("User-Agent"); 131 event.content_type = c.req.header("Content-Type"); 132 event.cf_ray = c.req.header("CF-Ray"); 133 event.cf_connecting_ip = c.req.header("CF-Connecting-IP"); 134 135 await next(); 136 137 const duration = Date.now() - startTime; 138 139 event.status_code = c.res.status; 140 if (c.error) { 141 event.outcome = "error"; 142 event.error = { 143 type: c.error.name, 144 message: c.error.message, 145 stack: c.error.stack, 146 }; 147 } else { 148 event.outcome = "success"; 149 } 150 event.duration_ms = duration; 151 // Emit canonical log line with all context (wide event pattern) 152 if (shouldSample(event)) { 153 otelLogger.info("request", event); 154 } 155 logger.debug("Request completed", { 156 status_code: c.res.status, 157 duration_ms: duration, 158 request_id: requestId, 159 }); 160 }, 161 ); 162}); 163 164app.onError((err, c) => { 165 logger.error("Unhandled request error", { 166 error_name: err.name, 167 error_message: err.message, 168 error_stack: err.stack, 169 method: c.req.method, 170 path: c.req.path, 171 url: c.req.url, 172 request_id: c.get("requestId"), 173 }); 174 c.get("sentry").captureException(err); 175 176 return c.json({ error: "Internal server error" }, 500); 177}); 178 179app.get("/", (c) => c.text("workflows", 200)); 180 181/** 182 * Ping Pong 183 */ 184app.get("/ping", (c) => c.json({ ping: "pong" }, 200)); 185 186/** 187 * Cron Routes 188 */ 189app.route("/cron", cronRouter); 190 191app.route("/", checkerRoute); 192 193if (NODE_ENV === "development") { 194 showRoutes(app, { verbose: true, colorize: true }); 195} 196 197logger.info("Starting server", { port: PORT, environment: NODE_ENV }); 198 199const server = { port: PORT, fetch: app.fetch }; 200 201export default server;