···11+# Voxtype (https://github.com/peteonrails/voxtype) Configuration
22+33+# State file for external integrations (Waybar, polybar, etc.)
44+# Use "auto" for default location ($XDG_RUNTIME_DIR/voxtype/state),
55+# a custom path, or "disabled" to turn off. The daemon writes state
66+# ("idle", "recording", "transcribing") to this file whenever it changes.
77+# Required for `voxtype record toggle` and `voxtype status` commands.
88+state_file = "auto"
99+1010+[hotkey]
1111+# Key to hold for push-to-talk
1212+# Common choices: SCROLLLOCK, PAUSE, RIGHTALT, F13-F24
1313+# Use `evtest` to find key names for your keyboard
1414+key = "SCROLLLOCK"
1515+1616+# Optional modifier keys that must also be held
1717+# Example: modifiers = ["LEFTCTRL", "LEFTALT"]
1818+modifiers = []
1919+2020+# Activation mode: "push_to_talk" or "toggle"
2121+# - push_to_talk: Hold hotkey to record, release to transcribe (default)
2222+# - toggle: Press hotkey once to start recording, press again to stop
2323+# mode = "push_to_talk"
2424+2525+# Enable built-in hotkey detection (default: true)
2626+# Set to false when using compositor keybindings (Hyprland, Sway) instead
2727+# When disabled, use `voxtype record start/stop/toggle` to control recording
2828+# enabled = true
2929+3030+[audio]
3131+# Audio input device ("default" uses system default)
3232+# List devices with: pactl list sources short
3333+device = "default"
3434+3535+# Sample rate in Hz (whisper expects 16000)
3636+sample_rate = 16000
3737+3838+# Maximum recording duration in seconds (safety limit)
3939+max_duration_secs = 60
4040+4141+[audio.feedback]
4242+# Enable audio feedback sounds (beeps when recording starts/stops)
4343+enabled = true
4444+#
4545+# Sound theme: "default", "subtle", "mechanical", or path to custom theme directory
4646+theme = "default"
4747+#
4848+# Volume level (0.0 to 1.0)
4949+volume = 0.7
5050+5151+[whisper]
5252+# Transcription backend: "local" or "remote"
5353+# - local: Use whisper.cpp locally (default)
5454+# - remote: Send audio to a remote whisper.cpp server or OpenAI-compatible API
5555+# backend = "local"
5656+5757+# Model to use for transcription (local backend)
5858+# Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v3, large-v3-turbo
5959+# .en models are English-only but faster and more accurate for English
6060+# large-v3-turbo is faster than large-v3 with minimal accuracy loss (recommended for GPU)
6161+# Or provide absolute path to a custom .bin model file
6262+model = "large-v3-turbo"
6363+6464+# Language for transcription
6565+# Use "en" for English, "auto" for auto-detection
6666+# See: https://github.com/openai/whisper#available-models-and-languages
6767+language = "en"
6868+6969+# Translate non-English speech to English
7070+translate = false
7171+7272+# Number of CPU threads for inference (omit for auto-detection)
7373+# threads = 4
7474+7575+# --- Remote backend settings (used when backend = "remote") ---
7676+#
7777+# Remote server endpoint URL (required for remote backend)
7878+# Examples:
7979+# - whisper.cpp server: "http://192.168.1.100:8080"
8080+# - OpenAI API: "https://api.openai.com"
8181+# remote_endpoint = "http://192.168.1.100:8080"
8282+#
8383+# Model name to send to remote server (default: "whisper-1")
8484+# remote_model = "whisper-1"
8585+#
8686+# API key for remote server (optional, or use VOXTYPE_WHISPER_API_KEY env var)
8787+# remote_api_key = ""
8888+#
8989+# Timeout for remote requests in seconds (default: 30)
9090+# remote_timeout_secs = 30
9191+9292+[output]
9393+# Primary output mode: "type" or "clipboard"
9494+# - type: Simulates keyboard input at cursor position (requires ydotool)
9595+# - clipboard: Copies text to clipboard (requires wl-copy)
9696+mode = "clipboard"
9797+9898+# Fall back to clipboard if typing fails
9999+fallback_to_clipboard = true
100100+101101+# Delay between typed characters in milliseconds
102102+# 0 = fastest possible, increase if characters are dropped
103103+type_delay_ms = 0
104104+105105+# Automatically submit (send Enter key) after outputting transcribed text
106106+# Useful for chat applications, command lines, or forms where you want
107107+# to auto-submit after dictation
108108+# auto_submit = true
109109+110110+# Pre/post output hooks (optional)
111111+# Commands to run before and after typing output. Useful for compositor integration.
112112+# Example: Block modifier keys during typing with Hyprland submap:
113113+# pre_output_command = "hyprctl dispatch submap voxtype_suppress"
114114+# post_output_command = "hyprctl dispatch submap reset"
115115+# See troubleshooting docs for the required Hyprland submap configuration.
116116+117117+# Post-processing command (optional)
118118+# Pipe transcribed text through an external command for cleanup before output.
119119+# The command receives text on stdin and outputs processed text on stdout.
120120+# Useful for LLM-based text cleanup, grammar correction, filler word removal.
121121+# On any failure (timeout, error), falls back to original transcription.
122122+#
123123+# [output.post_process]
124124+# command = "ollama run llama3.2:1b 'Clean up this dictation. Fix grammar, remove filler words. Output only the cleaned text:'"
125125+# timeout_ms = 30000 # 30 second timeout (generous for LLM)
126126+127127+[output.notification]
128128+# Show notification when recording starts (hotkey pressed)
129129+on_recording_start = false
130130+131131+# Show notification when recording stops (transcription beginning)
132132+on_recording_stop = false
133133+134134+# Show notification with transcribed text after transcription completes
135135+on_transcription = true
136136+137137+# [text]
138138+# Text processing options (word replacements, spoken punctuation)
139139+#
140140+# Enable spoken punctuation conversion (e.g., say "period" to get ".")
141141+# spoken_punctuation = false
142142+#
143143+# Custom word replacements (case-insensitive)
144144+# replacements = { "vox type" = "voxtype" }
145145+146146+# [status]
147147+# Status display icons for Waybar/tray integrations
148148+#
149149+# Icon theme (or path to custom theme file):
150150+# Font-based (require specific fonts):
151151+# - "emoji" - Default emoji icons (🎙️ 🎤 ⏳)
152152+# - "nerd-font" - Nerd Font icons (requires Nerd Font)
153153+# - "material" - Material Design Icons (requires MDI font)
154154+# - "phosphor" - Phosphor Icons (requires Phosphor font)
155155+# - "codicons" - VS Code icons (requires Codicons font)
156156+# - "omarchy" - Omarchy distro icons
157157+# Universal (no special fonts needed):
158158+# - "minimal" - Simple Unicode (○ ● ◐ ×)
159159+# - "dots" - Geometric shapes (◯ ⬤ ◔ ◌)
160160+# - "arrows" - Media player style (▶ ● ↻ ■)
161161+# - "text" - Plain text ([MIC] [REC] [...] [OFF])
162162+# icon_theme = "emoji"
163163+#
164164+# Per-state icon overrides (optional, takes precedence over theme)
165165+# [status.icons]
166166+# idle = "🎙️"
167167+# recording = "🎤"
168168+# transcribing = "⏳"
169169+# stopped = ""