✨ Add voxtype dictation config · davidgasquez.com/dotfiles@ceda9d8

+2 -1

hypr/hyprland.conf

··· 214 214 bind = $mainMod SHIFT, E, exec, uwsm app -- bemoji -n 215 215 216 216 # Whisper 217 - bind = $mainMod SHIFT, O, exec, whispr 217 + bind = $mainMod SHIFT, O, exec, voxtype record start 218 + bindr = $mainMod SHIFT, O, exec, voxtype record stop 218 219 219 220 # Volume 220 221 bindel = ,XF86AudioRaiseVolume, exec, wpctl set-volume -l 1 @DEFAULT_AUDIO_SINK@ 2%+

+4 -1

hypr/setup.sh

··· 6 6 WAYBAR_CONFIG_DIR="${HOME}/.config/waybar" 7 7 MAKO_CONFIG_DIR="${HOME}/.config/mako" 8 8 FUZZEL_CONFIG_DIR="${HOME}/.config/fuzzel" 9 + VOXTYPE_CONFIG_DIR="${HOME}/.config/voxtype" 9 10 PICTURES_DIR="${HOME}/Pictures" 10 11 11 12 packages=( ··· 30 31 wireplumber 31 32 wl-clip-persist 32 33 wl-clipboard 34 + voxtype-bin 33 35 xdg-desktop-portal-gtk 34 36 xdg-desktop-portal-hyprland 35 37 xdg-utils ··· 44 46 systemctl --user enable --now hyprpolkitagent.service 45 47 fi 46 48 47 - mkdir -p "${HYPR_CONFIG_DIR}" "${WAYBAR_CONFIG_DIR}" "${MAKO_CONFIG_DIR}" "${FUZZEL_CONFIG_DIR}" "${PICTURES_DIR}" 49 + mkdir -p "${HYPR_CONFIG_DIR}" "${WAYBAR_CONFIG_DIR}" "${MAKO_CONFIG_DIR}" "${FUZZEL_CONFIG_DIR}" "${VOXTYPE_CONFIG_DIR}" "${PICTURES_DIR}" 48 50 49 51 ln -sf "${DOTFILES}/hypr/wallpaper.png" "${PICTURES_DIR}/wallpaper.png" 50 52 ln -sf "${DOTFILES}/hypr/frappe.conf" "${HYPR_CONFIG_DIR}/frappe.conf" ··· 58 60 ln -sf "${DOTFILES}/hypr/waybar/frappe.css" "${WAYBAR_CONFIG_DIR}/frappe.css" 59 61 ln -sf "${DOTFILES}/hypr/mako/config" "${MAKO_CONFIG_DIR}/config" 60 62 ln -sf "${DOTFILES}/hypr/fuzzel/fuzzel.ini" "${FUZZEL_CONFIG_DIR}/fuzzel.ini" 63 + ln -sf "${DOTFILES}/hypr/voxtype/config.toml" "${VOXTYPE_CONFIG_DIR}/config.toml"

+169

hypr/voxtype/config.toml

··· 1 + # Voxtype (https://github.com/peteonrails/voxtype) Configuration 2 + 3 + # State file for external integrations (Waybar, polybar, etc.) 4 + # Use "auto" for default location ($XDG_RUNTIME_DIR/voxtype/state), 5 + # a custom path, or "disabled" to turn off. The daemon writes state 6 + # ("idle", "recording", "transcribing") to this file whenever it changes. 7 + # Required for `voxtype record toggle` and `voxtype status` commands. 8 + state_file = "auto" 9 + 10 + [hotkey] 11 + # Key to hold for push-to-talk 12 + # Common choices: SCROLLLOCK, PAUSE, RIGHTALT, F13-F24 13 + # Use `evtest` to find key names for your keyboard 14 + key = "SCROLLLOCK" 15 + 16 + # Optional modifier keys that must also be held 17 + # Example: modifiers = ["LEFTCTRL", "LEFTALT"] 18 + modifiers = [] 19 + 20 + # Activation mode: "push_to_talk" or "toggle" 21 + # - push_to_talk: Hold hotkey to record, release to transcribe (default) 22 + # - toggle: Press hotkey once to start recording, press again to stop 23 + # mode = "push_to_talk" 24 + 25 + # Enable built-in hotkey detection (default: true) 26 + # Set to false when using compositor keybindings (Hyprland, Sway) instead 27 + # When disabled, use `voxtype record start/stop/toggle` to control recording 28 + # enabled = true 29 + 30 + [audio] 31 + # Audio input device ("default" uses system default) 32 + # List devices with: pactl list sources short 33 + device = "default" 34 + 35 + # Sample rate in Hz (whisper expects 16000) 36 + sample_rate = 16000 37 + 38 + # Maximum recording duration in seconds (safety limit) 39 + max_duration_secs = 60 40 + 41 + [audio.feedback] 42 + # Enable audio feedback sounds (beeps when recording starts/stops) 43 + enabled = true 44 + # 45 + # Sound theme: "default", "subtle", "mechanical", or path to custom theme directory 46 + theme = "default" 47 + # 48 + # Volume level (0.0 to 1.0) 49 + volume = 0.7 50 + 51 + [whisper] 52 + # Transcription backend: "local" or "remote" 53 + # - local: Use whisper.cpp locally (default) 54 + # - remote: Send audio to a remote whisper.cpp server or OpenAI-compatible API 55 + # backend = "local" 56 + 57 + # Model to use for transcription (local backend) 58 + # Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v3, large-v3-turbo 59 + # .en models are English-only but faster and more accurate for English 60 + # large-v3-turbo is faster than large-v3 with minimal accuracy loss (recommended for GPU) 61 + # Or provide absolute path to a custom .bin model file 62 + model = "large-v3-turbo" 63 + 64 + # Language for transcription 65 + # Use "en" for English, "auto" for auto-detection 66 + # See: https://github.com/openai/whisper#available-models-and-languages 67 + language = "en" 68 + 69 + # Translate non-English speech to English 70 + translate = false 71 + 72 + # Number of CPU threads for inference (omit for auto-detection) 73 + # threads = 4 74 + 75 + # --- Remote backend settings (used when backend = "remote") --- 76 + # 77 + # Remote server endpoint URL (required for remote backend) 78 + # Examples: 79 + # - whisper.cpp server: "http://192.168.1.100:8080" 80 + # - OpenAI API: "https://api.openai.com" 81 + # remote_endpoint = "http://192.168.1.100:8080" 82 + # 83 + # Model name to send to remote server (default: "whisper-1") 84 + # remote_model = "whisper-1" 85 + # 86 + # API key for remote server (optional, or use VOXTYPE_WHISPER_API_KEY env var) 87 + # remote_api_key = "" 88 + # 89 + # Timeout for remote requests in seconds (default: 30) 90 + # remote_timeout_secs = 30 91 + 92 + [output] 93 + # Primary output mode: "type" or "clipboard" 94 + # - type: Simulates keyboard input at cursor position (requires ydotool) 95 + # - clipboard: Copies text to clipboard (requires wl-copy) 96 + mode = "clipboard" 97 + 98 + # Fall back to clipboard if typing fails 99 + fallback_to_clipboard = true 100 + 101 + # Delay between typed characters in milliseconds 102 + # 0 = fastest possible, increase if characters are dropped 103 + type_delay_ms = 0 104 + 105 + # Automatically submit (send Enter key) after outputting transcribed text 106 + # Useful for chat applications, command lines, or forms where you want 107 + # to auto-submit after dictation 108 + # auto_submit = true 109 + 110 + # Pre/post output hooks (optional) 111 + # Commands to run before and after typing output. Useful for compositor integration. 112 + # Example: Block modifier keys during typing with Hyprland submap: 113 + # pre_output_command = "hyprctl dispatch submap voxtype_suppress" 114 + # post_output_command = "hyprctl dispatch submap reset" 115 + # See troubleshooting docs for the required Hyprland submap configuration. 116 + 117 + # Post-processing command (optional) 118 + # Pipe transcribed text through an external command for cleanup before output. 119 + # The command receives text on stdin and outputs processed text on stdout. 120 + # Useful for LLM-based text cleanup, grammar correction, filler word removal. 121 + # On any failure (timeout, error), falls back to original transcription. 122 + # 123 + # [output.post_process] 124 + # command = "ollama run llama3.2:1b 'Clean up this dictation. Fix grammar, remove filler words. Output only the cleaned text:'" 125 + # timeout_ms = 30000 # 30 second timeout (generous for LLM) 126 + 127 + [output.notification] 128 + # Show notification when recording starts (hotkey pressed) 129 + on_recording_start = false 130 + 131 + # Show notification when recording stops (transcription beginning) 132 + on_recording_stop = false 133 + 134 + # Show notification with transcribed text after transcription completes 135 + on_transcription = true 136 + 137 + # [text] 138 + # Text processing options (word replacements, spoken punctuation) 139 + # 140 + # Enable spoken punctuation conversion (e.g., say "period" to get ".") 141 + # spoken_punctuation = false 142 + # 143 + # Custom word replacements (case-insensitive) 144 + # replacements = { "vox type" = "voxtype" } 145 + 146 + # [status] 147 + # Status display icons for Waybar/tray integrations 148 + # 149 + # Icon theme (or path to custom theme file): 150 + # Font-based (require specific fonts): 151 + # - "emoji" - Default emoji icons (🎙️ 🎤 ⏳) 152 + # - "nerd-font" - Nerd Font icons (requires Nerd Font) 153 + # - "material" - Material Design Icons (requires MDI font) 154 + # - "phosphor" - Phosphor Icons (requires Phosphor font) 155 + # - "codicons" - VS Code icons (requires Codicons font) 156 + # - "omarchy" - Omarchy distro icons 157 + # Universal (no special fonts needed): 158 + # - "minimal" - Simple Unicode (○ ● ◐ ×) 159 + # - "dots" - Geometric shapes (◯ ⬤ ◔ ◌) 160 + # - "arrows" - Media player style (▶ ● ↻ ■) 161 + # - "text" - Plain text ([MIC] [REC] [...] [OFF]) 162 + # icon_theme = "emoji" 163 + # 164 + # Per-state icon overrides (optional, takes precedence over theme) 165 + # [status.icons] 166 + # idle = "🎙️" 167 + # recording = "🎤" 168 + # transcribing = "⏳" 169 + # stopped = ""