🔧 Where my dotfiles lives in harmony and peace, most of the time

✨ Reorganize agent skills and expand browser docs

+1942 -422
+25
agents/commands/simplify.md
··· 1 + --- 2 + description: Refactor code to be simpler while maintaining identical functionality 3 + --- 4 + 5 + Refactor $ARGUMENTS to be simpler, clearer, and more maintainable without changing what it does. 6 + 7 + ## Objective 8 + 9 + Simplify and clean the code. Implementation should be straightforward and pragmatic. The goal is to get the most minimal code possible. 10 + 11 + ## Principles 12 + 13 + - Behavior parity. Keep interfaces and semantics identical. No new features. Preserve flags, return codes, and observable side effects. 14 + - KISS. Prefer boring, obvious solutions over cleverness. Fewer moving parts over fewer lines. 15 + - Small pieces. Favor small, composable functions. Design for easy deletion and iteration. 16 + - Prune aggressively. Remove dead code, unused vars, redundant branches, defensive over-engineering, and needless indirection. 17 + - Flatten flow. Simplify complex conditionals and deep nesting. Use clear guards and early returns. 18 + - Standard library first. Replace custom utilities with modern built-ins and framework primitives. 19 + - Fail early and often. Avoid blanket try/catch blocks. Skip unnecessary validations. 20 + - Communicate with types. Use types to express contracts and invariants. Avoid type acrobatics and generic abstractions. 21 + - Abstractions when earned. Introduce or keep abstractions only when they reduce duplication or isolate likely change. 22 + - Minimal deps. Do not add dependencies unless they materially simplify and are commonly available for the target runtime. 23 + - No micro-optimizations unless they remove complexity or are explicitly required. 24 + - Make rules explicit. Turn hidden assumptions into defaults, parameters, or assertions. 25 + - Naming for intent. Prefer clear, intention-revealing names. One responsibility per function or module.
+1 -1
agents/pi/settings.json
··· 13 13 ], 14 14 "quietStartup": true, 15 15 "transport": "auto", 16 - "lastChangelogVersion": "0.55.4" 16 + "lastChangelogVersion": "0.56.1" 17 17 }
+20 -22
agents/setup.sh
··· 3 3 4 4 DOTFILES="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" 5 5 AGENTS_DIR="${DOTFILES}/agents" 6 + GLOBAL_AGENTS_DIR="${HOME}/.agents" 7 + GLOBAL_SKILLS_DIR="${GLOBAL_AGENTS_DIR}/skills" 6 8 CODEX_DIR="${HOME}/.codex" 9 + PI_AGENT_DIR="${HOME}/.pi/agent" 10 + AMP_DIR="${HOME}/.config/amp" 7 11 8 12 packages=( 9 13 pi-coding-agent ··· 14 18 15 19 paru -S --needed --noconfirm "${packages[@]}" 16 20 17 - setup_codex() { 18 - mkdir -p "${CODEX_DIR}" 19 - ln -sf "${AGENTS_DIR}/codex/config.toml" "${CODEX_DIR}/config.toml" 20 - ln -sf "${AGENTS_DIR}/AGENTS.md" "${CODEX_DIR}/AGENTS.md" 21 - ln -sfT "${AGENTS_DIR}/skills" "${CODEX_DIR}/skills" 22 - } 21 + mkdir -p "${GLOBAL_AGENTS_DIR}" 22 + ln -sfT "${AGENTS_DIR}/skills" "${GLOBAL_SKILLS_DIR}" 23 23 24 - setup_pi() { 25 - mkdir -p "${HOME}/.pi/agent" 26 - ln -sf "${AGENTS_DIR}/AGENTS.md" "${HOME}/.pi/agent/AGENTS.md" 27 - ln -sf "${AGENTS_DIR}/pi/settings.json" "${HOME}/.pi/agent/settings.json" 28 - ln -sfT "${AGENTS_DIR}/pi/extensions" "${HOME}/.pi/agent/extensions" 29 - ln -sfT "${AGENTS_DIR}/skills" "${HOME}/.pi/agent/skills" 30 - } 24 + mkdir -p "${CODEX_DIR}" 25 + ln -sf "${AGENTS_DIR}/codex/config.toml" "${CODEX_DIR}/config.toml" 26 + ln -sf "${AGENTS_DIR}/AGENTS.md" "${CODEX_DIR}/AGENTS.md" 27 + ln -sfT "${GLOBAL_SKILLS_DIR}" "${CODEX_DIR}/skills" 31 28 32 - setup_amp() { 33 - mkdir -p "${HOME}/.config/amp" 34 - ln -sf "${AGENTS_DIR}/AGENTS.md" "${HOME}/.config/amp/AGENTS.md" 35 - ln -sf "${AGENTS_DIR}/amp/settings.json" "${HOME}/.config/amp/settings.json" 36 - ln -sfT "${AGENTS_DIR}/skills" "${HOME}/.config/amp/skills" 37 - } 29 + mkdir -p "${PI_AGENT_DIR}" 30 + ln -sf "${AGENTS_DIR}/AGENTS.md" "${PI_AGENT_DIR}/AGENTS.md" 31 + ln -sf "${AGENTS_DIR}/pi/settings.json" "${PI_AGENT_DIR}/settings.json" 32 + ln -sfT "${AGENTS_DIR}/pi/extensions" "${PI_AGENT_DIR}/extensions" 33 + ln -sfT "${GLOBAL_SKILLS_DIR}" "${PI_AGENT_DIR}/skills" 34 + ln -sfT "${AGENTS_DIR}/commands" "${PI_AGENT_DIR}/prompts" 38 35 39 - setup_codex 40 - setup_pi 41 - setup_amp 36 + mkdir -p "${AMP_DIR}" 37 + ln -sf "${AGENTS_DIR}/AGENTS.md" "${AMP_DIR}/AGENTS.md" 38 + ln -sf "${AGENTS_DIR}/amp/settings.json" "${AMP_DIR}/settings.json" 39 + ln -sfT "${GLOBAL_SKILLS_DIR}" "${AMP_DIR}/skills"
+320 -11
agents/skills/agent-browser/SKILL.md
··· 1 1 --- 2 2 name: agent-browser 3 3 description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction. 4 + allowed-tools: Bash(npx agent-browser:*), Bash(agent-browser:*) 4 5 --- 5 6 6 7 # Browser Automation with agent-browser ··· 26 27 agent-browser snapshot -i # Check result 27 28 ``` 28 29 30 + ## Command Chaining 31 + 32 + Commands can be chained with `&&` in a single shell invocation. The browser persists between commands via a background daemon, so chaining is safe and more efficient than separate calls. 33 + 34 + ```bash 35 + # Chain open + wait + snapshot in one call 36 + agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser snapshot -i 37 + 38 + # Chain multiple interactions 39 + agent-browser fill @e1 "user@example.com" && agent-browser fill @e2 "password123" && agent-browser click @e3 40 + 41 + # Navigate and capture 42 + agent-browser open https://example.com && agent-browser wait --load networkidle && agent-browser screenshot page.png 43 + ``` 44 + 45 + **When to chain:** Use `&&` when you don't need to read the output of an intermediate command before proceeding (e.g., open + wait + screenshot). Run commands separately when you need to parse the output first (e.g., snapshot to discover refs, then interact using those refs). 46 + 29 47 ## Essential Commands 30 48 31 49 ```bash ··· 40 58 41 59 # Interaction (use @refs from snapshot) 42 60 agent-browser click @e1 # Click element 61 + agent-browser click @e1 --new-tab # Click and open in new tab 43 62 agent-browser fill @e2 "text" # Clear and type text 44 63 agent-browser type @e2 "text" # Type without clearing 45 64 agent-browser select @e1 "option" # Select dropdown option 46 65 agent-browser check @e1 # Check checkbox 47 66 agent-browser press Enter # Press key 67 + agent-browser keyboard type "text" # Type at current focus (no selector) 68 + agent-browser keyboard inserttext "text" # Insert without key events 48 69 agent-browser scroll down 500 # Scroll page 70 + agent-browser scroll down 500 --selector "div.content" # Scroll within a specific container 49 71 50 72 # Get information 51 73 agent-browser get text @e1 # Get element text ··· 57 79 agent-browser wait --load networkidle # Wait for network idle 58 80 agent-browser wait --url "**/page" # Wait for URL pattern 59 81 agent-browser wait 2000 # Wait milliseconds 82 + 83 + # Downloads 84 + agent-browser download @e1 ./file.pdf # Click element to trigger download 85 + agent-browser wait --download ./output.zip # Wait for any download to complete 86 + agent-browser --download-path ./downloads open <url> # Set default download directory 60 87 61 88 # Capture 62 89 agent-browser screenshot # Screenshot to temp dir 63 90 agent-browser screenshot --full # Full page screenshot 91 + agent-browser screenshot --annotate # Annotated screenshot with numbered element labels 64 92 agent-browser pdf output.pdf # Save as PDF 93 + 94 + # Diff (compare page states) 95 + agent-browser diff snapshot # Compare current vs last snapshot 96 + agent-browser diff snapshot --baseline before.txt # Compare current vs saved file 97 + agent-browser diff screenshot --baseline before.png # Visual pixel diff 98 + agent-browser diff url <url1> <url2> # Compare two pages 99 + agent-browser diff url <url1> <url2> --wait-until networkidle # Custom wait strategy 100 + agent-browser diff url <url1> <url2> --selector "#main" # Scope to element 65 101 ``` 66 102 67 103 ## Common Patterns ··· 79 115 agent-browser wait --load networkidle 80 116 ``` 81 117 118 + ### Authentication with Auth Vault (Recommended) 119 + 120 + ```bash 121 + # Save credentials once (encrypted with AGENT_BROWSER_ENCRYPTION_KEY) 122 + # Recommended: pipe password via stdin to avoid shell history exposure 123 + echo "pass" | agent-browser auth save github --url https://github.com/login --username user --password-stdin 124 + 125 + # Login using saved profile (LLM never sees password) 126 + agent-browser auth login github 127 + 128 + # List/show/delete profiles 129 + agent-browser auth list 130 + agent-browser auth show github 131 + agent-browser auth delete github 132 + ``` 133 + 82 134 ### Authentication with State Persistence 83 135 84 136 ```bash ··· 96 148 agent-browser open https://app.example.com/dashboard 97 149 ``` 98 150 99 - ### Reusing existing Brave auth/session 151 + ### Session Persistence 100 152 101 - If the user asks to reuse existing browser auth (for example their current Brave session), first restart Brave with CDP enabled. 153 + ```bash 154 + # Auto-save/restore cookies and localStorage across browser restarts 155 + agent-browser --session-name myapp open https://app.example.com/login 156 + # ... login flow ... 157 + agent-browser close # State auto-saved to ~/.agent-browser/sessions/ 102 158 103 - 1. Kill Brave completely. 104 - 2. Start Brave with remote debugging: 159 + # Next time, state is auto-loaded 160 + agent-browser --session-name myapp open https://app.example.com/dashboard 105 161 106 - ```bash 107 - /opt/brave-bin/brave --remote-debugging-port=9222 --remote-debugging-address=127.0.0.1 108 - ``` 109 - 110 - Then connect agent-browser to that session: 162 + # Encrypt state at rest 163 + export AGENT_BROWSER_ENCRYPTION_KEY=$(openssl rand -hex 32) 164 + agent-browser --session-name secure open https://app.example.com 111 165 112 - ```bash 113 - agent-browser connect 9222 166 + # Manage saved states 167 + agent-browser state list 168 + agent-browser state show myapp-default.json 169 + agent-browser state clear myapp 170 + agent-browser state clean --older-than 7 114 171 ``` 115 172 116 173 ### Data Extraction ··· 138 195 agent-browser session list 139 196 ``` 140 197 198 + ### Connect to Existing Chrome 199 + 200 + ```bash 201 + # Auto-discover running Chrome with remote debugging enabled 202 + agent-browser --auto-connect open https://example.com 203 + agent-browser --auto-connect snapshot 204 + 205 + # Or with explicit CDP port 206 + agent-browser --cdp 9222 snapshot 207 + ``` 208 + 209 + ### Color Scheme (Dark Mode) 210 + 211 + ```bash 212 + # Persistent dark mode via flag (applies to all pages and new tabs) 213 + agent-browser --color-scheme dark open https://example.com 214 + 215 + # Or via environment variable 216 + AGENT_BROWSER_COLOR_SCHEME=dark agent-browser open https://example.com 217 + 218 + # Or set during session (persists for subsequent commands) 219 + agent-browser set media dark 220 + ``` 221 + 141 222 ### Visual Browser (Debugging) 142 223 143 224 ```bash 144 225 agent-browser --headed open https://example.com 145 226 agent-browser highlight @e1 # Highlight element 146 227 agent-browser record start demo.webm # Record session 228 + agent-browser profiler start # Start Chrome DevTools profiling 229 + agent-browser profiler stop trace.json # Stop and save profile (path optional) 147 230 ``` 231 + 232 + Use `AGENT_BROWSER_HEADED=1` to enable headed mode via environment variable. Browser extensions work in both headed and headless mode. 148 233 149 234 ### Local Files (PDFs, HTML) 150 235 ··· 181 266 182 267 **Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`. 183 268 269 + ## Security 270 + 271 + All security features are opt-in. By default, agent-browser imposes no restrictions on navigation, actions, or output. 272 + 273 + ### Content Boundaries (Recommended for AI Agents) 274 + 275 + Enable `--content-boundaries` to wrap page-sourced output in markers that help LLMs distinguish tool output from untrusted page content: 276 + 277 + ```bash 278 + export AGENT_BROWSER_CONTENT_BOUNDARIES=1 279 + agent-browser snapshot 280 + # Output: 281 + # --- AGENT_BROWSER_PAGE_CONTENT nonce=<hex> origin=https://example.com --- 282 + # [accessibility tree] 283 + # --- END_AGENT_BROWSER_PAGE_CONTENT nonce=<hex> --- 284 + ``` 285 + 286 + ### Domain Allowlist 287 + 288 + Restrict navigation to trusted domains. Wildcards like `*.example.com` also match the bare domain `example.com`. Sub-resource requests, WebSocket, and EventSource connections to non-allowed domains are also blocked. Include CDN domains your target pages depend on: 289 + 290 + ```bash 291 + export AGENT_BROWSER_ALLOWED_DOMAINS="example.com,*.example.com" 292 + agent-browser open https://example.com # OK 293 + agent-browser open https://malicious.com # Blocked 294 + ``` 295 + 296 + ### Action Policy 297 + 298 + Use a policy file to gate destructive actions: 299 + 300 + ```bash 301 + export AGENT_BROWSER_ACTION_POLICY=./policy.json 302 + ``` 303 + 304 + Example `policy.json`: 305 + ```json 306 + {"default": "deny", "allow": ["navigate", "snapshot", "click", "scroll", "wait", "get"]} 307 + ``` 308 + 309 + Auth vault operations (`auth login`, etc.) bypass action policy but domain allowlist still applies. 310 + 311 + ### Output Limits 312 + 313 + Prevent context flooding from large pages: 314 + 315 + ```bash 316 + export AGENT_BROWSER_MAX_OUTPUT=50000 317 + ``` 318 + 319 + ## Diffing (Verifying Changes) 320 + 321 + Use `diff snapshot` after performing an action to verify it had the intended effect. This compares the current accessibility tree against the last snapshot taken in the session. 322 + 323 + ```bash 324 + # Typical workflow: snapshot -> action -> diff 325 + agent-browser snapshot -i # Take baseline snapshot 326 + agent-browser click @e2 # Perform action 327 + agent-browser diff snapshot # See what changed (auto-compares to last snapshot) 328 + ``` 329 + 330 + For visual regression testing or monitoring: 331 + 332 + ```bash 333 + # Save a baseline screenshot, then compare later 334 + agent-browser screenshot baseline.png 335 + # ... time passes or changes are made ... 336 + agent-browser diff screenshot --baseline baseline.png 337 + 338 + # Compare staging vs production 339 + agent-browser diff url https://staging.example.com https://prod.example.com --screenshot 340 + ``` 341 + 342 + `diff snapshot` output uses `+` for additions and `-` for removals, similar to git diff. `diff screenshot` produces a diff image with changed pixels highlighted in red, plus a mismatch percentage. 343 + 344 + ## Timeouts and Slow Pages 345 + 346 + The default Playwright timeout is 25 seconds for local browsers. This can be overridden with the `AGENT_BROWSER_DEFAULT_TIMEOUT` environment variable (value in milliseconds). For slow websites or large pages, use explicit waits instead of relying on the default timeout: 347 + 348 + ```bash 349 + # Wait for network activity to settle (best for slow pages) 350 + agent-browser wait --load networkidle 351 + 352 + # Wait for a specific element to appear 353 + agent-browser wait "#content" 354 + agent-browser wait @e1 355 + 356 + # Wait for a specific URL pattern (useful after redirects) 357 + agent-browser wait --url "**/dashboard" 358 + 359 + # Wait for a JavaScript condition 360 + agent-browser wait --fn "document.readyState === 'complete'" 361 + 362 + # Wait a fixed duration (milliseconds) as a last resort 363 + agent-browser wait 5000 364 + ``` 365 + 366 + When dealing with consistently slow websites, use `wait --load networkidle` after `open` to ensure the page is fully loaded before taking a snapshot. If a specific element is slow to render, wait for it directly with `wait <selector>` or `wait @ref`. 367 + 368 + ## Session Management and Cleanup 369 + 370 + When running multiple agents or automations concurrently, always use named sessions to avoid conflicts: 371 + 372 + ```bash 373 + # Each agent gets its own isolated session 374 + agent-browser --session agent1 open site-a.com 375 + agent-browser --session agent2 open site-b.com 376 + 377 + # Check active sessions 378 + agent-browser session list 379 + ``` 380 + 381 + Always close your browser session when done to avoid leaked processes: 382 + 383 + ```bash 384 + agent-browser close # Close default session 385 + agent-browser --session agent1 close # Close specific session 386 + ``` 387 + 388 + If a previous session was not closed properly, the daemon may still be running. Use `agent-browser close` to clean it up before starting new work. 389 + 184 390 ## Ref Lifecycle (Important) 185 391 186 392 Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after: ··· 195 401 agent-browser click @e1 # Use new refs 196 402 ``` 197 403 404 + ## Annotated Screenshots (Vision Mode) 405 + 406 + Use `--annotate` to take a screenshot with numbered labels overlaid on interactive elements. Each label `[N]` maps to ref `@eN`. This also caches refs, so you can interact with elements immediately without a separate snapshot. 407 + 408 + ```bash 409 + agent-browser screenshot --annotate 410 + # Output includes the image path and a legend: 411 + # [1] @e1 button "Submit" 412 + # [2] @e2 link "Home" 413 + # [3] @e3 textbox "Email" 414 + agent-browser click @e2 # Click using ref from annotated screenshot 415 + ``` 416 + 417 + Use annotated screenshots when: 418 + - The page has unlabeled icon buttons or visual-only elements 419 + - You need to verify visual layout or styling 420 + - Canvas or chart elements are present (invisible to text snapshots) 421 + - You need spatial reasoning about element positions 422 + 198 423 ## Semantic Locators (Alternative to Refs) 199 424 200 425 When refs are unavailable or unreliable, use semantic locators: ··· 206 431 agent-browser find placeholder "Search" type "query" 207 432 agent-browser find testid "submit-btn" click 208 433 ``` 434 + 435 + ## JavaScript Evaluation (eval) 436 + 437 + Use `eval` to run JavaScript in the browser context. **Shell quoting can corrupt complex expressions** -- use `--stdin` or `-b` to avoid issues. 438 + 439 + ```bash 440 + # Simple expressions work with regular quoting 441 + agent-browser eval 'document.title' 442 + agent-browser eval 'document.querySelectorAll("img").length' 443 + 444 + # Complex JS: use --stdin with heredoc (RECOMMENDED) 445 + agent-browser eval --stdin <<'EVALEOF' 446 + JSON.stringify( 447 + Array.from(document.querySelectorAll("img")) 448 + .filter(i => !i.alt) 449 + .map(i => ({ src: i.src.split("/").pop(), width: i.width })) 450 + ) 451 + EVALEOF 452 + 453 + # Alternative: base64 encoding (avoids all shell escaping issues) 454 + agent-browser eval -b "$(echo -n 'Array.from(document.querySelectorAll("a")).map(a => a.href)' | base64)" 455 + ``` 456 + 457 + **Why this matters:** When the shell processes your command, inner double quotes, `!` characters (history expansion), backticks, and `$()` can all corrupt the JavaScript before it reaches agent-browser. The `--stdin` and `-b` flags bypass shell interpretation entirely. 458 + 459 + **Rules of thumb:** 460 + - Single-line, no nested quotes -> regular `eval 'expression'` with single quotes is fine 461 + - Nested quotes, arrow functions, template literals, or multiline -> use `eval --stdin <<'EVALEOF'` 462 + - Programmatic/generated scripts -> use `eval -b` with base64 463 + 464 + ## Configuration File 465 + 466 + Create `agent-browser.json` in the project root for persistent settings: 467 + 468 + ```json 469 + { 470 + "headed": true, 471 + "proxy": "http://localhost:8080", 472 + "profile": "./browser-data" 473 + } 474 + ``` 475 + 476 + Priority (lowest to highest): `~/.agent-browser/config.json` < `./agent-browser.json` < env vars < CLI flags. Use `--config <path>` or `AGENT_BROWSER_CONFIG` env var for a custom config file (exits with error if missing/invalid). All CLI options map to camelCase keys (e.g., `--executable-path` -> `"executablePath"`). Boolean flags accept `true`/`false` values (e.g., `--headed false` overrides config). Extensions from user and project configs are merged, not replaced. 477 + 478 + ## Deep-Dive Documentation 479 + 480 + | Reference | When to Use | 481 + |-----------|-------------| 482 + | [references/commands.md](references/commands.md) | Full command reference with all options | 483 + | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting | 484 + | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping | 485 + | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse | 486 + | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation | 487 + | [references/profiling.md](references/profiling.md) | Chrome DevTools profiling for performance analysis | 488 + | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies | 489 + 490 + ## Experimental: Native Mode 491 + 492 + agent-browser has an experimental native Rust daemon that communicates with Chrome directly via CDP, bypassing Node.js and Playwright entirely. It is opt-in and not recommended for production use yet. 493 + 494 + ```bash 495 + # Enable via flag 496 + agent-browser --native open example.com 497 + 498 + # Enable via environment variable (avoids passing --native every time) 499 + export AGENT_BROWSER_NATIVE=1 500 + agent-browser open example.com 501 + ``` 502 + 503 + The native daemon supports Chromium and Safari (via WebDriver). Firefox and WebKit are not yet supported. All core commands (navigate, snapshot, click, fill, screenshot, cookies, storage, tabs, eval, etc.) work identically in native mode. Use `agent-browser close` before switching between native and default mode within the same session. 504 + 505 + ## Ready-to-Use Templates 506 + 507 + | Template | Description | 508 + |----------|-------------| 509 + | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation | 510 + | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state | 511 + | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots | 512 + 513 + ```bash 514 + ./templates/form-automation.sh https://example.com/form 515 + ./templates/authenticated-session.sh https://app.example.com/login 516 + ./templates/capture-workflow.sh https://example.com ./output 517 + ```
+202
agents/skills/agent-browser/references/authentication.md
··· 1 + # Authentication Patterns 2 + 3 + Login flows, session persistence, OAuth, 2FA, and authenticated browsing. 4 + 5 + **Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [Basic Login Flow](#basic-login-flow) 10 + - [Saving Authentication State](#saving-authentication-state) 11 + - [Restoring Authentication](#restoring-authentication) 12 + - [OAuth / SSO Flows](#oauth--sso-flows) 13 + - [Two-Factor Authentication](#two-factor-authentication) 14 + - [HTTP Basic Auth](#http-basic-auth) 15 + - [Cookie-Based Auth](#cookie-based-auth) 16 + - [Token Refresh Handling](#token-refresh-handling) 17 + - [Security Best Practices](#security-best-practices) 18 + 19 + ## Basic Login Flow 20 + 21 + ```bash 22 + # Navigate to login page 23 + agent-browser open https://app.example.com/login 24 + agent-browser wait --load networkidle 25 + 26 + # Get form elements 27 + agent-browser snapshot -i 28 + # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In" 29 + 30 + # Fill credentials 31 + agent-browser fill @e1 "user@example.com" 32 + agent-browser fill @e2 "password123" 33 + 34 + # Submit 35 + agent-browser click @e3 36 + agent-browser wait --load networkidle 37 + 38 + # Verify login succeeded 39 + agent-browser get url # Should be dashboard, not login 40 + ``` 41 + 42 + ## Saving Authentication State 43 + 44 + After logging in, save state for reuse: 45 + 46 + ```bash 47 + # Login first (see above) 48 + agent-browser open https://app.example.com/login 49 + agent-browser snapshot -i 50 + agent-browser fill @e1 "user@example.com" 51 + agent-browser fill @e2 "password123" 52 + agent-browser click @e3 53 + agent-browser wait --url "**/dashboard" 54 + 55 + # Save authenticated state 56 + agent-browser state save ./auth-state.json 57 + ``` 58 + 59 + ## Restoring Authentication 60 + 61 + Skip login by loading saved state: 62 + 63 + ```bash 64 + # Load saved auth state 65 + agent-browser state load ./auth-state.json 66 + 67 + # Navigate directly to protected page 68 + agent-browser open https://app.example.com/dashboard 69 + 70 + # Verify authenticated 71 + agent-browser snapshot -i 72 + ``` 73 + 74 + ## OAuth / SSO Flows 75 + 76 + For OAuth redirects: 77 + 78 + ```bash 79 + # Start OAuth flow 80 + agent-browser open https://app.example.com/auth/google 81 + 82 + # Handle redirects automatically 83 + agent-browser wait --url "**/accounts.google.com**" 84 + agent-browser snapshot -i 85 + 86 + # Fill Google credentials 87 + agent-browser fill @e1 "user@gmail.com" 88 + agent-browser click @e2 # Next button 89 + agent-browser wait 2000 90 + agent-browser snapshot -i 91 + agent-browser fill @e3 "password" 92 + agent-browser click @e4 # Sign in 93 + 94 + # Wait for redirect back 95 + agent-browser wait --url "**/app.example.com**" 96 + agent-browser state save ./oauth-state.json 97 + ``` 98 + 99 + ## Two-Factor Authentication 100 + 101 + Handle 2FA with manual intervention: 102 + 103 + ```bash 104 + # Login with credentials 105 + agent-browser open https://app.example.com/login --headed # Show browser 106 + agent-browser snapshot -i 107 + agent-browser fill @e1 "user@example.com" 108 + agent-browser fill @e2 "password123" 109 + agent-browser click @e3 110 + 111 + # Wait for user to complete 2FA manually 112 + echo "Complete 2FA in the browser window..." 113 + agent-browser wait --url "**/dashboard" --timeout 120000 114 + 115 + # Save state after 2FA 116 + agent-browser state save ./2fa-state.json 117 + ``` 118 + 119 + ## HTTP Basic Auth 120 + 121 + For sites using HTTP Basic Authentication: 122 + 123 + ```bash 124 + # Set credentials before navigation 125 + agent-browser set credentials username password 126 + 127 + # Navigate to protected resource 128 + agent-browser open https://protected.example.com/api 129 + ``` 130 + 131 + ## Cookie-Based Auth 132 + 133 + Manually set authentication cookies: 134 + 135 + ```bash 136 + # Set auth cookie 137 + agent-browser cookies set session_token "abc123xyz" 138 + 139 + # Navigate to protected page 140 + agent-browser open https://app.example.com/dashboard 141 + ``` 142 + 143 + ## Token Refresh Handling 144 + 145 + For sessions with expiring tokens: 146 + 147 + ```bash 148 + #!/bin/bash 149 + # Wrapper that handles token refresh 150 + 151 + STATE_FILE="./auth-state.json" 152 + 153 + # Try loading existing state 154 + if [[ -f "$STATE_FILE" ]]; then 155 + agent-browser state load "$STATE_FILE" 156 + agent-browser open https://app.example.com/dashboard 157 + 158 + # Check if session is still valid 159 + URL=$(agent-browser get url) 160 + if [[ "$URL" == *"/login"* ]]; then 161 + echo "Session expired, re-authenticating..." 162 + # Perform fresh login 163 + agent-browser snapshot -i 164 + agent-browser fill @e1 "$USERNAME" 165 + agent-browser fill @e2 "$PASSWORD" 166 + agent-browser click @e3 167 + agent-browser wait --url "**/dashboard" 168 + agent-browser state save "$STATE_FILE" 169 + fi 170 + else 171 + # First-time login 172 + agent-browser open https://app.example.com/login 173 + # ... login flow ... 174 + fi 175 + ``` 176 + 177 + ## Security Best Practices 178 + 179 + 1. **Never commit state files** - They contain session tokens 180 + ```bash 181 + echo "*.auth-state.json" >> .gitignore 182 + ``` 183 + 184 + 2. **Use environment variables for credentials** 185 + ```bash 186 + agent-browser fill @e1 "$APP_USERNAME" 187 + agent-browser fill @e2 "$APP_PASSWORD" 188 + ``` 189 + 190 + 3. **Clean up after automation** 191 + ```bash 192 + agent-browser cookies clear 193 + rm -f ./auth-state.json 194 + ``` 195 + 196 + 4. **Use short-lived sessions for CI/CD** 197 + ```bash 198 + # Don't persist state in CI 199 + agent-browser open https://app.example.com/login 200 + # ... login and perform actions ... 201 + agent-browser close # Session ends, nothing persisted 202 + ```
+263
agents/skills/agent-browser/references/commands.md
··· 1 + # Command Reference 2 + 3 + Complete reference for all agent-browser commands. For quick start and common patterns, see SKILL.md. 4 + 5 + ## Navigation 6 + 7 + ```bash 8 + agent-browser open <url> # Navigate to URL (aliases: goto, navigate) 9 + # Supports: https://, http://, file://, about:, data:// 10 + # Auto-prepends https:// if no protocol given 11 + agent-browser back # Go back 12 + agent-browser forward # Go forward 13 + agent-browser reload # Reload page 14 + agent-browser close # Close browser (aliases: quit, exit) 15 + agent-browser connect 9222 # Connect to browser via CDP port 16 + ``` 17 + 18 + ## Snapshot (page analysis) 19 + 20 + ```bash 21 + agent-browser snapshot # Full accessibility tree 22 + agent-browser snapshot -i # Interactive elements only (recommended) 23 + agent-browser snapshot -c # Compact output 24 + agent-browser snapshot -d 3 # Limit depth to 3 25 + agent-browser snapshot -s "#main" # Scope to CSS selector 26 + ``` 27 + 28 + ## Interactions (use @refs from snapshot) 29 + 30 + ```bash 31 + agent-browser click @e1 # Click 32 + agent-browser click @e1 --new-tab # Click and open in new tab 33 + agent-browser dblclick @e1 # Double-click 34 + agent-browser focus @e1 # Focus element 35 + agent-browser fill @e2 "text" # Clear and type 36 + agent-browser type @e2 "text" # Type without clearing 37 + agent-browser press Enter # Press key (alias: key) 38 + agent-browser press Control+a # Key combination 39 + agent-browser keydown Shift # Hold key down 40 + agent-browser keyup Shift # Release key 41 + agent-browser hover @e1 # Hover 42 + agent-browser check @e1 # Check checkbox 43 + agent-browser uncheck @e1 # Uncheck checkbox 44 + agent-browser select @e1 "value" # Select dropdown option 45 + agent-browser select @e1 "a" "b" # Select multiple options 46 + agent-browser scroll down 500 # Scroll page (default: down 300px) 47 + agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) 48 + agent-browser drag @e1 @e2 # Drag and drop 49 + agent-browser upload @e1 file.pdf # Upload files 50 + ``` 51 + 52 + ## Get Information 53 + 54 + ```bash 55 + agent-browser get text @e1 # Get element text 56 + agent-browser get html @e1 # Get innerHTML 57 + agent-browser get value @e1 # Get input value 58 + agent-browser get attr @e1 href # Get attribute 59 + agent-browser get title # Get page title 60 + agent-browser get url # Get current URL 61 + agent-browser get count ".item" # Count matching elements 62 + agent-browser get box @e1 # Get bounding box 63 + agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.) 64 + ``` 65 + 66 + ## Check State 67 + 68 + ```bash 69 + agent-browser is visible @e1 # Check if visible 70 + agent-browser is enabled @e1 # Check if enabled 71 + agent-browser is checked @e1 # Check if checked 72 + ``` 73 + 74 + ## Screenshots and PDF 75 + 76 + ```bash 77 + agent-browser screenshot # Save to temporary directory 78 + agent-browser screenshot path.png # Save to specific path 79 + agent-browser screenshot --full # Full page 80 + agent-browser pdf output.pdf # Save as PDF 81 + ``` 82 + 83 + ## Video Recording 84 + 85 + ```bash 86 + agent-browser record start ./demo.webm # Start recording 87 + agent-browser click @e1 # Perform actions 88 + agent-browser record stop # Stop and save video 89 + agent-browser record restart ./take2.webm # Stop current + start new 90 + ``` 91 + 92 + ## Wait 93 + 94 + ```bash 95 + agent-browser wait @e1 # Wait for element 96 + agent-browser wait 2000 # Wait milliseconds 97 + agent-browser wait --text "Success" # Wait for text (or -t) 98 + agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u) 99 + agent-browser wait --load networkidle # Wait for network idle (or -l) 100 + agent-browser wait --fn "window.ready" # Wait for JS condition (or -f) 101 + ``` 102 + 103 + ## Mouse Control 104 + 105 + ```bash 106 + agent-browser mouse move 100 200 # Move mouse 107 + agent-browser mouse down left # Press button 108 + agent-browser mouse up left # Release button 109 + agent-browser mouse wheel 100 # Scroll wheel 110 + ``` 111 + 112 + ## Semantic Locators (alternative to refs) 113 + 114 + ```bash 115 + agent-browser find role button click --name "Submit" 116 + agent-browser find text "Sign In" click 117 + agent-browser find text "Sign In" click --exact # Exact match only 118 + agent-browser find label "Email" fill "user@test.com" 119 + agent-browser find placeholder "Search" type "query" 120 + agent-browser find alt "Logo" click 121 + agent-browser find title "Close" click 122 + agent-browser find testid "submit-btn" click 123 + agent-browser find first ".item" click 124 + agent-browser find last ".item" click 125 + agent-browser find nth 2 "a" hover 126 + ``` 127 + 128 + ## Browser Settings 129 + 130 + ```bash 131 + agent-browser set viewport 1920 1080 # Set viewport size 132 + agent-browser set device "iPhone 14" # Emulate device 133 + agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation) 134 + agent-browser set offline on # Toggle offline mode 135 + agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers 136 + agent-browser set credentials user pass # HTTP basic auth (alias: auth) 137 + agent-browser set media dark # Emulate color scheme 138 + agent-browser set media light reduced-motion # Light mode + reduced motion 139 + ``` 140 + 141 + ## Cookies and Storage 142 + 143 + ```bash 144 + agent-browser cookies # Get all cookies 145 + agent-browser cookies set name value # Set cookie 146 + agent-browser cookies clear # Clear cookies 147 + agent-browser storage local # Get all localStorage 148 + agent-browser storage local key # Get specific key 149 + agent-browser storage local set k v # Set value 150 + agent-browser storage local clear # Clear all 151 + ``` 152 + 153 + ## Network 154 + 155 + ```bash 156 + agent-browser network route <url> # Intercept requests 157 + agent-browser network route <url> --abort # Block requests 158 + agent-browser network route <url> --body '{}' # Mock response 159 + agent-browser network unroute [url] # Remove routes 160 + agent-browser network requests # View tracked requests 161 + agent-browser network requests --filter api # Filter requests 162 + ``` 163 + 164 + ## Tabs and Windows 165 + 166 + ```bash 167 + agent-browser tab # List tabs 168 + agent-browser tab new [url] # New tab 169 + agent-browser tab 2 # Switch to tab by index 170 + agent-browser tab close # Close current tab 171 + agent-browser tab close 2 # Close tab by index 172 + agent-browser window new # New window 173 + ``` 174 + 175 + ## Frames 176 + 177 + ```bash 178 + agent-browser frame "#iframe" # Switch to iframe 179 + agent-browser frame main # Back to main frame 180 + ``` 181 + 182 + ## Dialogs 183 + 184 + ```bash 185 + agent-browser dialog accept [text] # Accept dialog 186 + agent-browser dialog dismiss # Dismiss dialog 187 + ``` 188 + 189 + ## JavaScript 190 + 191 + ```bash 192 + agent-browser eval "document.title" # Simple expressions only 193 + agent-browser eval -b "<base64>" # Any JavaScript (base64 encoded) 194 + agent-browser eval --stdin # Read script from stdin 195 + ``` 196 + 197 + Use `-b`/`--base64` or `--stdin` for reliable execution. Shell escaping with nested quotes and special characters is error-prone. 198 + 199 + ```bash 200 + # Base64 encode your script, then: 201 + agent-browser eval -b "ZG9jdW1lbnQucXVlcnlTZWxlY3RvcignW3NyYyo9Il9uZXh0Il0nKQ==" 202 + 203 + # Or use stdin with heredoc for multiline scripts: 204 + cat <<'EOF' | agent-browser eval --stdin 205 + const links = document.querySelectorAll('a'); 206 + Array.from(links).map(a => a.href); 207 + EOF 208 + ``` 209 + 210 + ## State Management 211 + 212 + ```bash 213 + agent-browser state save auth.json # Save cookies, storage, auth state 214 + agent-browser state load auth.json # Restore saved state 215 + ``` 216 + 217 + ## Global Options 218 + 219 + ```bash 220 + agent-browser --session <name> ... # Isolated browser session 221 + agent-browser --json ... # JSON output for parsing 222 + agent-browser --headed ... # Show browser window (not headless) 223 + agent-browser --full ... # Full page screenshot (-f) 224 + agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol 225 + agent-browser -p <provider> ... # Cloud browser provider (--provider) 226 + agent-browser --proxy <url> ... # Use proxy server 227 + agent-browser --proxy-bypass <hosts> # Hosts to bypass proxy 228 + agent-browser --headers <json> ... # HTTP headers scoped to URL's origin 229 + agent-browser --executable-path <p> # Custom browser executable 230 + agent-browser --extension <path> ... # Load browser extension (repeatable) 231 + agent-browser --ignore-https-errors # Ignore SSL certificate errors 232 + agent-browser --help # Show help (-h) 233 + agent-browser --version # Show version (-V) 234 + agent-browser <command> --help # Show detailed help for a command 235 + ``` 236 + 237 + ## Debugging 238 + 239 + ```bash 240 + agent-browser --headed open example.com # Show browser window 241 + agent-browser --cdp 9222 snapshot # Connect via CDP port 242 + agent-browser connect 9222 # Alternative: connect command 243 + agent-browser console # View console messages 244 + agent-browser console --clear # Clear console 245 + agent-browser errors # View page errors 246 + agent-browser errors --clear # Clear errors 247 + agent-browser highlight @e1 # Highlight element 248 + agent-browser trace start # Start recording trace 249 + agent-browser trace stop trace.zip # Stop and save trace 250 + agent-browser profiler start # Start Chrome DevTools profiling 251 + agent-browser profiler stop trace.json # Stop and save profile 252 + ``` 253 + 254 + ## Environment Variables 255 + 256 + ```bash 257 + AGENT_BROWSER_SESSION="mysession" # Default session name 258 + AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path 259 + AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths 260 + AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider 261 + AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port 262 + AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location 263 + ```
+120
agents/skills/agent-browser/references/profiling.md
··· 1 + # Profiling 2 + 3 + Capture Chrome DevTools performance profiles during browser automation for performance analysis. 4 + 5 + **Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [Basic Profiling](#basic-profiling) 10 + - [Profiler Commands](#profiler-commands) 11 + - [Categories](#categories) 12 + - [Use Cases](#use-cases) 13 + - [Output Format](#output-format) 14 + - [Viewing Profiles](#viewing-profiles) 15 + - [Limitations](#limitations) 16 + 17 + ## Basic Profiling 18 + 19 + ```bash 20 + # Start profiling 21 + agent-browser profiler start 22 + 23 + # Perform actions 24 + agent-browser navigate https://example.com 25 + agent-browser click "#button" 26 + agent-browser wait 1000 27 + 28 + # Stop and save 29 + agent-browser profiler stop ./trace.json 30 + ``` 31 + 32 + ## Profiler Commands 33 + 34 + ```bash 35 + # Start profiling with default categories 36 + agent-browser profiler start 37 + 38 + # Start with custom trace categories 39 + agent-browser profiler start --categories "devtools.timeline,v8.execute,blink.user_timing" 40 + 41 + # Stop profiling and save to file 42 + agent-browser profiler stop ./trace.json 43 + ``` 44 + 45 + ## Categories 46 + 47 + The `--categories` flag accepts a comma-separated list of Chrome trace categories. Default categories include: 48 + 49 + - `devtools.timeline` -- standard DevTools performance traces 50 + - `v8.execute` -- time spent running JavaScript 51 + - `blink` -- renderer events 52 + - `blink.user_timing` -- `performance.mark()` / `performance.measure()` calls 53 + - `latencyInfo` -- input-to-latency tracking 54 + - `renderer.scheduler` -- task scheduling and execution 55 + - `toplevel` -- broad-spectrum basic events 56 + 57 + Several `disabled-by-default-*` categories are also included for detailed timeline, call stack, and V8 CPU profiling data. 58 + 59 + ## Use Cases 60 + 61 + ### Diagnosing Slow Page Loads 62 + 63 + ```bash 64 + agent-browser profiler start 65 + agent-browser navigate https://app.example.com 66 + agent-browser wait --load networkidle 67 + agent-browser profiler stop ./page-load-profile.json 68 + ``` 69 + 70 + ### Profiling User Interactions 71 + 72 + ```bash 73 + agent-browser navigate https://app.example.com 74 + agent-browser profiler start 75 + agent-browser click "#submit" 76 + agent-browser wait 2000 77 + agent-browser profiler stop ./interaction-profile.json 78 + ``` 79 + 80 + ### CI Performance Regression Checks 81 + 82 + ```bash 83 + #!/bin/bash 84 + agent-browser profiler start 85 + agent-browser navigate https://app.example.com 86 + agent-browser wait --load networkidle 87 + agent-browser profiler stop "./profiles/build-${BUILD_ID}.json" 88 + ``` 89 + 90 + ## Output Format 91 + 92 + The output is a JSON file in Chrome Trace Event format: 93 + 94 + ```json 95 + { 96 + "traceEvents": [ 97 + { "cat": "devtools.timeline", "name": "RunTask", "ph": "X", "ts": 12345, "dur": 100, ... }, 98 + ... 99 + ], 100 + "metadata": { 101 + "clock-domain": "LINUX_CLOCK_MONOTONIC" 102 + } 103 + } 104 + ``` 105 + 106 + The `metadata.clock-domain` field is set based on the host platform (Linux or macOS). On Windows it is omitted. 107 + 108 + ## Viewing Profiles 109 + 110 + Load the output JSON file in any of these tools: 111 + 112 + - **Chrome DevTools**: Performance panel > Load profile (Ctrl+Shift+I > Performance) 113 + - **Perfetto UI**: https://ui.perfetto.dev/ -- drag and drop the JSON file 114 + - **Trace Viewer**: `chrome://tracing` in any Chromium browser 115 + 116 + ## Limitations 117 + 118 + - Only works with Chromium-based browsers (Chrome, Edge). Not supported on Firefox or WebKit. 119 + - Trace data accumulates in memory while profiling is active (capped at 5 million events). Stop profiling promptly after the area of interest. 120 + - Data collection on stop has a 30-second timeout. If the browser is unresponsive, the stop command may fail.
+194
agents/skills/agent-browser/references/proxy-support.md
··· 1 + # Proxy Support 2 + 3 + Proxy configuration for geo-testing, rate limiting avoidance, and corporate environments. 4 + 5 + **Related**: [commands.md](commands.md) for global options, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [Basic Proxy Configuration](#basic-proxy-configuration) 10 + - [Authenticated Proxy](#authenticated-proxy) 11 + - [SOCKS Proxy](#socks-proxy) 12 + - [Proxy Bypass](#proxy-bypass) 13 + - [Common Use Cases](#common-use-cases) 14 + - [Verifying Proxy Connection](#verifying-proxy-connection) 15 + - [Troubleshooting](#troubleshooting) 16 + - [Best Practices](#best-practices) 17 + 18 + ## Basic Proxy Configuration 19 + 20 + Use the `--proxy` flag or set proxy via environment variable: 21 + 22 + ```bash 23 + # Via CLI flag 24 + agent-browser --proxy "http://proxy.example.com:8080" open https://example.com 25 + 26 + # Via environment variable 27 + export HTTP_PROXY="http://proxy.example.com:8080" 28 + agent-browser open https://example.com 29 + 30 + # HTTPS proxy 31 + export HTTPS_PROXY="https://proxy.example.com:8080" 32 + agent-browser open https://example.com 33 + 34 + # Both 35 + export HTTP_PROXY="http://proxy.example.com:8080" 36 + export HTTPS_PROXY="http://proxy.example.com:8080" 37 + agent-browser open https://example.com 38 + ``` 39 + 40 + ## Authenticated Proxy 41 + 42 + For proxies requiring authentication: 43 + 44 + ```bash 45 + # Include credentials in URL 46 + export HTTP_PROXY="http://username:password@proxy.example.com:8080" 47 + agent-browser open https://example.com 48 + ``` 49 + 50 + ## SOCKS Proxy 51 + 52 + ```bash 53 + # SOCKS5 proxy 54 + export ALL_PROXY="socks5://proxy.example.com:1080" 55 + agent-browser open https://example.com 56 + 57 + # SOCKS5 with auth 58 + export ALL_PROXY="socks5://user:pass@proxy.example.com:1080" 59 + agent-browser open https://example.com 60 + ``` 61 + 62 + ## Proxy Bypass 63 + 64 + Skip proxy for specific domains using `--proxy-bypass` or `NO_PROXY`: 65 + 66 + ```bash 67 + # Via CLI flag 68 + agent-browser --proxy "http://proxy.example.com:8080" --proxy-bypass "localhost,*.internal.com" open https://example.com 69 + 70 + # Via environment variable 71 + export NO_PROXY="localhost,127.0.0.1,.internal.company.com" 72 + agent-browser open https://internal.company.com # Direct connection 73 + agent-browser open https://external.com # Via proxy 74 + ``` 75 + 76 + ## Common Use Cases 77 + 78 + ### Geo-Location Testing 79 + 80 + ```bash 81 + #!/bin/bash 82 + # Test site from different regions using geo-located proxies 83 + 84 + PROXIES=( 85 + "http://us-proxy.example.com:8080" 86 + "http://eu-proxy.example.com:8080" 87 + "http://asia-proxy.example.com:8080" 88 + ) 89 + 90 + for proxy in "${PROXIES[@]}"; do 91 + export HTTP_PROXY="$proxy" 92 + export HTTPS_PROXY="$proxy" 93 + 94 + region=$(echo "$proxy" | grep -oP '^\w+-\w+') 95 + echo "Testing from: $region" 96 + 97 + agent-browser --session "$region" open https://example.com 98 + agent-browser --session "$region" screenshot "./screenshots/$region.png" 99 + agent-browser --session "$region" close 100 + done 101 + ``` 102 + 103 + ### Rotating Proxies for Scraping 104 + 105 + ```bash 106 + #!/bin/bash 107 + # Rotate through proxy list to avoid rate limiting 108 + 109 + PROXY_LIST=( 110 + "http://proxy1.example.com:8080" 111 + "http://proxy2.example.com:8080" 112 + "http://proxy3.example.com:8080" 113 + ) 114 + 115 + URLS=( 116 + "https://site.com/page1" 117 + "https://site.com/page2" 118 + "https://site.com/page3" 119 + ) 120 + 121 + for i in "${!URLS[@]}"; do 122 + proxy_index=$((i % ${#PROXY_LIST[@]})) 123 + export HTTP_PROXY="${PROXY_LIST[$proxy_index]}" 124 + export HTTPS_PROXY="${PROXY_LIST[$proxy_index]}" 125 + 126 + agent-browser open "${URLS[$i]}" 127 + agent-browser get text body > "output-$i.txt" 128 + agent-browser close 129 + 130 + sleep 1 # Polite delay 131 + done 132 + ``` 133 + 134 + ### Corporate Network Access 135 + 136 + ```bash 137 + #!/bin/bash 138 + # Access internal sites via corporate proxy 139 + 140 + export HTTP_PROXY="http://corpproxy.company.com:8080" 141 + export HTTPS_PROXY="http://corpproxy.company.com:8080" 142 + export NO_PROXY="localhost,127.0.0.1,.company.com" 143 + 144 + # External sites go through proxy 145 + agent-browser open https://external-vendor.com 146 + 147 + # Internal sites bypass proxy 148 + agent-browser open https://intranet.company.com 149 + ``` 150 + 151 + ## Verifying Proxy Connection 152 + 153 + ```bash 154 + # Check your apparent IP 155 + agent-browser open https://httpbin.org/ip 156 + agent-browser get text body 157 + # Should show proxy's IP, not your real IP 158 + ``` 159 + 160 + ## Troubleshooting 161 + 162 + ### Proxy Connection Failed 163 + 164 + ```bash 165 + # Test proxy connectivity first 166 + curl -x http://proxy.example.com:8080 https://httpbin.org/ip 167 + 168 + # Check if proxy requires auth 169 + export HTTP_PROXY="http://user:pass@proxy.example.com:8080" 170 + ``` 171 + 172 + ### SSL/TLS Errors Through Proxy 173 + 174 + Some proxies perform SSL inspection. If you encounter certificate errors: 175 + 176 + ```bash 177 + # For testing only - not recommended for production 178 + agent-browser open https://example.com --ignore-https-errors 179 + ``` 180 + 181 + ### Slow Performance 182 + 183 + ```bash 184 + # Use proxy only when necessary 185 + export NO_PROXY="*.cdn.com,*.static.com" # Direct CDN access 186 + ``` 187 + 188 + ## Best Practices 189 + 190 + 1. **Use environment variables** - Don't hardcode proxy credentials 191 + 2. **Set NO_PROXY appropriately** - Avoid routing local traffic through proxy 192 + 3. **Test proxy before automation** - Verify connectivity with simple requests 193 + 4. **Handle proxy failures gracefully** - Implement retry logic for unstable proxies 194 + 5. **Rotate proxies for large scraping jobs** - Distribute load and avoid bans
+193
agents/skills/agent-browser/references/session-management.md
··· 1 + # Session Management 2 + 3 + Multiple isolated browser sessions with state persistence and concurrent browsing. 4 + 5 + **Related**: [authentication.md](authentication.md) for login patterns, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [Named Sessions](#named-sessions) 10 + - [Session Isolation Properties](#session-isolation-properties) 11 + - [Session State Persistence](#session-state-persistence) 12 + - [Common Patterns](#common-patterns) 13 + - [Default Session](#default-session) 14 + - [Session Cleanup](#session-cleanup) 15 + - [Best Practices](#best-practices) 16 + 17 + ## Named Sessions 18 + 19 + Use `--session` flag to isolate browser contexts: 20 + 21 + ```bash 22 + # Session 1: Authentication flow 23 + agent-browser --session auth open https://app.example.com/login 24 + 25 + # Session 2: Public browsing (separate cookies, storage) 26 + agent-browser --session public open https://example.com 27 + 28 + # Commands are isolated by session 29 + agent-browser --session auth fill @e1 "user@example.com" 30 + agent-browser --session public get text body 31 + ``` 32 + 33 + ## Session Isolation Properties 34 + 35 + Each session has independent: 36 + - Cookies 37 + - LocalStorage / SessionStorage 38 + - IndexedDB 39 + - Cache 40 + - Browsing history 41 + - Open tabs 42 + 43 + ## Session State Persistence 44 + 45 + ### Save Session State 46 + 47 + ```bash 48 + # Save cookies, storage, and auth state 49 + agent-browser state save /path/to/auth-state.json 50 + ``` 51 + 52 + ### Load Session State 53 + 54 + ```bash 55 + # Restore saved state 56 + agent-browser state load /path/to/auth-state.json 57 + 58 + # Continue with authenticated session 59 + agent-browser open https://app.example.com/dashboard 60 + ``` 61 + 62 + ### State File Contents 63 + 64 + ```json 65 + { 66 + "cookies": [...], 67 + "localStorage": {...}, 68 + "sessionStorage": {...}, 69 + "origins": [...] 70 + } 71 + ``` 72 + 73 + ## Common Patterns 74 + 75 + ### Authenticated Session Reuse 76 + 77 + ```bash 78 + #!/bin/bash 79 + # Save login state once, reuse many times 80 + 81 + STATE_FILE="/tmp/auth-state.json" 82 + 83 + # Check if we have saved state 84 + if [[ -f "$STATE_FILE" ]]; then 85 + agent-browser state load "$STATE_FILE" 86 + agent-browser open https://app.example.com/dashboard 87 + else 88 + # Perform login 89 + agent-browser open https://app.example.com/login 90 + agent-browser snapshot -i 91 + agent-browser fill @e1 "$USERNAME" 92 + agent-browser fill @e2 "$PASSWORD" 93 + agent-browser click @e3 94 + agent-browser wait --load networkidle 95 + 96 + # Save for future use 97 + agent-browser state save "$STATE_FILE" 98 + fi 99 + ``` 100 + 101 + ### Concurrent Scraping 102 + 103 + ```bash 104 + #!/bin/bash 105 + # Scrape multiple sites concurrently 106 + 107 + # Start all sessions 108 + agent-browser --session site1 open https://site1.com & 109 + agent-browser --session site2 open https://site2.com & 110 + agent-browser --session site3 open https://site3.com & 111 + wait 112 + 113 + # Extract from each 114 + agent-browser --session site1 get text body > site1.txt 115 + agent-browser --session site2 get text body > site2.txt 116 + agent-browser --session site3 get text body > site3.txt 117 + 118 + # Cleanup 119 + agent-browser --session site1 close 120 + agent-browser --session site2 close 121 + agent-browser --session site3 close 122 + ``` 123 + 124 + ### A/B Testing Sessions 125 + 126 + ```bash 127 + # Test different user experiences 128 + agent-browser --session variant-a open "https://app.com?variant=a" 129 + agent-browser --session variant-b open "https://app.com?variant=b" 130 + 131 + # Compare 132 + agent-browser --session variant-a screenshot /tmp/variant-a.png 133 + agent-browser --session variant-b screenshot /tmp/variant-b.png 134 + ``` 135 + 136 + ## Default Session 137 + 138 + When `--session` is omitted, commands use the default session: 139 + 140 + ```bash 141 + # These use the same default session 142 + agent-browser open https://example.com 143 + agent-browser snapshot -i 144 + agent-browser close # Closes default session 145 + ``` 146 + 147 + ## Session Cleanup 148 + 149 + ```bash 150 + # Close specific session 151 + agent-browser --session auth close 152 + 153 + # List active sessions 154 + agent-browser session list 155 + ``` 156 + 157 + ## Best Practices 158 + 159 + ### 1. Name Sessions Semantically 160 + 161 + ```bash 162 + # GOOD: Clear purpose 163 + agent-browser --session github-auth open https://github.com 164 + agent-browser --session docs-scrape open https://docs.example.com 165 + 166 + # AVOID: Generic names 167 + agent-browser --session s1 open https://github.com 168 + ``` 169 + 170 + ### 2. Always Clean Up 171 + 172 + ```bash 173 + # Close sessions when done 174 + agent-browser --session auth close 175 + agent-browser --session scrape close 176 + ``` 177 + 178 + ### 3. Handle State Files Securely 179 + 180 + ```bash 181 + # Don't commit state files (contain auth tokens!) 182 + echo "*.auth-state.json" >> .gitignore 183 + 184 + # Delete after use 185 + rm /tmp/auth-state.json 186 + ``` 187 + 188 + ### 4. Timeout Long Sessions 189 + 190 + ```bash 191 + # Set timeout for automated scripts 192 + timeout 60 agent-browser --session long-task get text body 193 + ```
+194
agents/skills/agent-browser/references/snapshot-refs.md
··· 1 + # Snapshot and Refs 2 + 3 + Compact element references that reduce context usage dramatically for AI agents. 4 + 5 + **Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [How Refs Work](#how-refs-work) 10 + - [Snapshot Command](#the-snapshot-command) 11 + - [Using Refs](#using-refs) 12 + - [Ref Lifecycle](#ref-lifecycle) 13 + - [Best Practices](#best-practices) 14 + - [Ref Notation Details](#ref-notation-details) 15 + - [Troubleshooting](#troubleshooting) 16 + 17 + ## How Refs Work 18 + 19 + Traditional approach: 20 + ``` 21 + Full DOM/HTML → AI parses → CSS selector → Action (~3000-5000 tokens) 22 + ``` 23 + 24 + agent-browser approach: 25 + ``` 26 + Compact snapshot → @refs assigned → Direct interaction (~200-400 tokens) 27 + ``` 28 + 29 + ## The Snapshot Command 30 + 31 + ```bash 32 + # Basic snapshot (shows page structure) 33 + agent-browser snapshot 34 + 35 + # Interactive snapshot (-i flag) - RECOMMENDED 36 + agent-browser snapshot -i 37 + ``` 38 + 39 + ### Snapshot Output Format 40 + 41 + ``` 42 + Page: Example Site - Home 43 + URL: https://example.com 44 + 45 + @e1 [header] 46 + @e2 [nav] 47 + @e3 [a] "Home" 48 + @e4 [a] "Products" 49 + @e5 [a] "About" 50 + @e6 [button] "Sign In" 51 + 52 + @e7 [main] 53 + @e8 [h1] "Welcome" 54 + @e9 [form] 55 + @e10 [input type="email"] placeholder="Email" 56 + @e11 [input type="password"] placeholder="Password" 57 + @e12 [button type="submit"] "Log In" 58 + 59 + @e13 [footer] 60 + @e14 [a] "Privacy Policy" 61 + ``` 62 + 63 + ## Using Refs 64 + 65 + Once you have refs, interact directly: 66 + 67 + ```bash 68 + # Click the "Sign In" button 69 + agent-browser click @e6 70 + 71 + # Fill email input 72 + agent-browser fill @e10 "user@example.com" 73 + 74 + # Fill password 75 + agent-browser fill @e11 "password123" 76 + 77 + # Submit the form 78 + agent-browser click @e12 79 + ``` 80 + 81 + ## Ref Lifecycle 82 + 83 + **IMPORTANT**: Refs are invalidated when the page changes! 84 + 85 + ```bash 86 + # Get initial snapshot 87 + agent-browser snapshot -i 88 + # @e1 [button] "Next" 89 + 90 + # Click triggers page change 91 + agent-browser click @e1 92 + 93 + # MUST re-snapshot to get new refs! 94 + agent-browser snapshot -i 95 + # @e1 [h1] "Page 2" ← Different element now! 96 + ``` 97 + 98 + ## Best Practices 99 + 100 + ### 1. Always Snapshot Before Interacting 101 + 102 + ```bash 103 + # CORRECT 104 + agent-browser open https://example.com 105 + agent-browser snapshot -i # Get refs first 106 + agent-browser click @e1 # Use ref 107 + 108 + # WRONG 109 + agent-browser open https://example.com 110 + agent-browser click @e1 # Ref doesn't exist yet! 111 + ``` 112 + 113 + ### 2. Re-Snapshot After Navigation 114 + 115 + ```bash 116 + agent-browser click @e5 # Navigates to new page 117 + agent-browser snapshot -i # Get new refs 118 + agent-browser click @e1 # Use new refs 119 + ``` 120 + 121 + ### 3. Re-Snapshot After Dynamic Changes 122 + 123 + ```bash 124 + agent-browser click @e1 # Opens dropdown 125 + agent-browser snapshot -i # See dropdown items 126 + agent-browser click @e7 # Select item 127 + ``` 128 + 129 + ### 4. Snapshot Specific Regions 130 + 131 + For complex pages, snapshot specific areas: 132 + 133 + ```bash 134 + # Snapshot just the form 135 + agent-browser snapshot @e9 136 + ``` 137 + 138 + ## Ref Notation Details 139 + 140 + ``` 141 + @e1 [tag type="value"] "text content" placeholder="hint" 142 + │ │ │ │ │ 143 + │ │ │ │ └─ Additional attributes 144 + │ │ │ └─ Visible text 145 + │ │ └─ Key attributes shown 146 + │ └─ HTML tag name 147 + └─ Unique ref ID 148 + ``` 149 + 150 + ### Common Patterns 151 + 152 + ``` 153 + @e1 [button] "Submit" # Button with text 154 + @e2 [input type="email"] # Email input 155 + @e3 [input type="password"] # Password input 156 + @e4 [a href="/page"] "Link Text" # Anchor link 157 + @e5 [select] # Dropdown 158 + @e6 [textarea] placeholder="Message" # Text area 159 + @e7 [div class="modal"] # Container (when relevant) 160 + @e8 [img alt="Logo"] # Image 161 + @e9 [checkbox] checked # Checked checkbox 162 + @e10 [radio] selected # Selected radio 163 + ``` 164 + 165 + ## Troubleshooting 166 + 167 + ### "Ref not found" Error 168 + 169 + ```bash 170 + # Ref may have changed - re-snapshot 171 + agent-browser snapshot -i 172 + ``` 173 + 174 + ### Element Not Visible in Snapshot 175 + 176 + ```bash 177 + # Scroll down to reveal element 178 + agent-browser scroll down 1000 179 + agent-browser snapshot -i 180 + 181 + # Or wait for dynamic content 182 + agent-browser wait 1000 183 + agent-browser snapshot -i 184 + ``` 185 + 186 + ### Too Many Elements 187 + 188 + ```bash 189 + # Snapshot specific container 190 + agent-browser snapshot @e5 191 + 192 + # Or use get text for content-only extraction 193 + agent-browser get text @e5 194 + ```
+173
agents/skills/agent-browser/references/video-recording.md
··· 1 + # Video Recording 2 + 3 + Capture browser automation as video for debugging, documentation, or verification. 4 + 5 + **Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start. 6 + 7 + ## Contents 8 + 9 + - [Basic Recording](#basic-recording) 10 + - [Recording Commands](#recording-commands) 11 + - [Use Cases](#use-cases) 12 + - [Best Practices](#best-practices) 13 + - [Output Format](#output-format) 14 + - [Limitations](#limitations) 15 + 16 + ## Basic Recording 17 + 18 + ```bash 19 + # Start recording 20 + agent-browser record start ./demo.webm 21 + 22 + # Perform actions 23 + agent-browser open https://example.com 24 + agent-browser snapshot -i 25 + agent-browser click @e1 26 + agent-browser fill @e2 "test input" 27 + 28 + # Stop and save 29 + agent-browser record stop 30 + ``` 31 + 32 + ## Recording Commands 33 + 34 + ```bash 35 + # Start recording to file 36 + agent-browser record start ./output.webm 37 + 38 + # Stop current recording 39 + agent-browser record stop 40 + 41 + # Restart with new file (stops current + starts new) 42 + agent-browser record restart ./take2.webm 43 + ``` 44 + 45 + ## Use Cases 46 + 47 + ### Debugging Failed Automation 48 + 49 + ```bash 50 + #!/bin/bash 51 + # Record automation for debugging 52 + 53 + agent-browser record start ./debug-$(date +%Y%m%d-%H%M%S).webm 54 + 55 + # Run your automation 56 + agent-browser open https://app.example.com 57 + agent-browser snapshot -i 58 + agent-browser click @e1 || { 59 + echo "Click failed - check recording" 60 + agent-browser record stop 61 + exit 1 62 + } 63 + 64 + agent-browser record stop 65 + ``` 66 + 67 + ### Documentation Generation 68 + 69 + ```bash 70 + #!/bin/bash 71 + # Record workflow for documentation 72 + 73 + agent-browser record start ./docs/how-to-login.webm 74 + 75 + agent-browser open https://app.example.com/login 76 + agent-browser wait 1000 # Pause for visibility 77 + 78 + agent-browser snapshot -i 79 + agent-browser fill @e1 "demo@example.com" 80 + agent-browser wait 500 81 + 82 + agent-browser fill @e2 "password" 83 + agent-browser wait 500 84 + 85 + agent-browser click @e3 86 + agent-browser wait --load networkidle 87 + agent-browser wait 1000 # Show result 88 + 89 + agent-browser record stop 90 + ``` 91 + 92 + ### CI/CD Test Evidence 93 + 94 + ```bash 95 + #!/bin/bash 96 + # Record E2E test runs for CI artifacts 97 + 98 + TEST_NAME="${1:-e2e-test}" 99 + RECORDING_DIR="./test-recordings" 100 + mkdir -p "$RECORDING_DIR" 101 + 102 + agent-browser record start "$RECORDING_DIR/$TEST_NAME-$(date +%s).webm" 103 + 104 + # Run test 105 + if run_e2e_test; then 106 + echo "Test passed" 107 + else 108 + echo "Test failed - recording saved" 109 + fi 110 + 111 + agent-browser record stop 112 + ``` 113 + 114 + ## Best Practices 115 + 116 + ### 1. Add Pauses for Clarity 117 + 118 + ```bash 119 + # Slow down for human viewing 120 + agent-browser click @e1 121 + agent-browser wait 500 # Let viewer see result 122 + ``` 123 + 124 + ### 2. Use Descriptive Filenames 125 + 126 + ```bash 127 + # Include context in filename 128 + agent-browser record start ./recordings/login-flow-2024-01-15.webm 129 + agent-browser record start ./recordings/checkout-test-run-42.webm 130 + ``` 131 + 132 + ### 3. Handle Recording in Error Cases 133 + 134 + ```bash 135 + #!/bin/bash 136 + set -e 137 + 138 + cleanup() { 139 + agent-browser record stop 2>/dev/null || true 140 + agent-browser close 2>/dev/null || true 141 + } 142 + trap cleanup EXIT 143 + 144 + agent-browser record start ./automation.webm 145 + # ... automation steps ... 146 + ``` 147 + 148 + ### 4. Combine with Screenshots 149 + 150 + ```bash 151 + # Record video AND capture key frames 152 + agent-browser record start ./flow.webm 153 + 154 + agent-browser open https://example.com 155 + agent-browser screenshot ./screenshots/step1-homepage.png 156 + 157 + agent-browser click @e1 158 + agent-browser screenshot ./screenshots/step2-after-click.png 159 + 160 + agent-browser record stop 161 + ``` 162 + 163 + ## Output Format 164 + 165 + - Default format: WebM (VP8/VP9 codec) 166 + - Compatible with all modern browsers and video players 167 + - Compressed but high quality 168 + 169 + ## Limitations 170 + 171 + - Recording adds slight overhead to automation 172 + - Large recordings can consume significant disk space 173 + - Some headless environments may have codec limitations
+105
agents/skills/agent-browser/templates/authenticated-session.sh
··· 1 + #!/bin/bash 2 + # Template: Authenticated Session Workflow 3 + # Purpose: Login once, save state, reuse for subsequent runs 4 + # Usage: ./authenticated-session.sh <login-url> [state-file] 5 + # 6 + # RECOMMENDED: Use the auth vault instead of this template: 7 + # echo "<pass>" | agent-browser auth save myapp --url <login-url> --username <user> --password-stdin 8 + # agent-browser auth login myapp 9 + # The auth vault stores credentials securely and the LLM never sees passwords. 10 + # 11 + # Environment variables: 12 + # APP_USERNAME - Login username/email 13 + # APP_PASSWORD - Login password 14 + # 15 + # Two modes: 16 + # 1. Discovery mode (default): Shows form structure so you can identify refs 17 + # 2. Login mode: Performs actual login after you update the refs 18 + # 19 + # Setup steps: 20 + # 1. Run once to see form structure (discovery mode) 21 + # 2. Update refs in LOGIN FLOW section below 22 + # 3. Set APP_USERNAME and APP_PASSWORD 23 + # 4. Delete the DISCOVERY section 24 + 25 + set -euo pipefail 26 + 27 + LOGIN_URL="${1:?Usage: $0 <login-url> [state-file]}" 28 + STATE_FILE="${2:-./auth-state.json}" 29 + 30 + echo "Authentication workflow: $LOGIN_URL" 31 + 32 + # ================================================================ 33 + # SAVED STATE: Skip login if valid saved state exists 34 + # ================================================================ 35 + if [[ -f "$STATE_FILE" ]]; then 36 + echo "Loading saved state from $STATE_FILE..." 37 + if agent-browser --state "$STATE_FILE" open "$LOGIN_URL" 2>/dev/null; then 38 + agent-browser wait --load networkidle 39 + 40 + CURRENT_URL=$(agent-browser get url) 41 + if [[ "$CURRENT_URL" != *"login"* ]] && [[ "$CURRENT_URL" != *"signin"* ]]; then 42 + echo "Session restored successfully" 43 + agent-browser snapshot -i 44 + exit 0 45 + fi 46 + echo "Session expired, performing fresh login..." 47 + agent-browser close 2>/dev/null || true 48 + else 49 + echo "Failed to load state, re-authenticating..." 50 + fi 51 + rm -f "$STATE_FILE" 52 + fi 53 + 54 + # ================================================================ 55 + # DISCOVERY MODE: Shows form structure (delete after setup) 56 + # ================================================================ 57 + echo "Opening login page..." 58 + agent-browser open "$LOGIN_URL" 59 + agent-browser wait --load networkidle 60 + 61 + echo "" 62 + echo "Login form structure:" 63 + echo "---" 64 + agent-browser snapshot -i 65 + echo "---" 66 + echo "" 67 + echo "Next steps:" 68 + echo " 1. Note the refs: username=@e?, password=@e?, submit=@e?" 69 + echo " 2. Update the LOGIN FLOW section below with your refs" 70 + echo " 3. Set: export APP_USERNAME='...' APP_PASSWORD='...'" 71 + echo " 4. Delete this DISCOVERY MODE section" 72 + echo "" 73 + agent-browser close 74 + exit 0 75 + 76 + # ================================================================ 77 + # LOGIN FLOW: Uncomment and customize after discovery 78 + # ================================================================ 79 + # : "${APP_USERNAME:?Set APP_USERNAME environment variable}" 80 + # : "${APP_PASSWORD:?Set APP_PASSWORD environment variable}" 81 + # 82 + # agent-browser open "$LOGIN_URL" 83 + # agent-browser wait --load networkidle 84 + # agent-browser snapshot -i 85 + # 86 + # # Fill credentials (update refs to match your form) 87 + # agent-browser fill @e1 "$APP_USERNAME" 88 + # agent-browser fill @e2 "$APP_PASSWORD" 89 + # agent-browser click @e3 90 + # agent-browser wait --load networkidle 91 + # 92 + # # Verify login succeeded 93 + # FINAL_URL=$(agent-browser get url) 94 + # if [[ "$FINAL_URL" == *"login"* ]] || [[ "$FINAL_URL" == *"signin"* ]]; then 95 + # echo "Login failed - still on login page" 96 + # agent-browser screenshot /tmp/login-failed.png 97 + # agent-browser close 98 + # exit 1 99 + # fi 100 + # 101 + # # Save state for future runs 102 + # echo "Saving state to $STATE_FILE" 103 + # agent-browser state save "$STATE_FILE" 104 + # echo "Login successful" 105 + # agent-browser snapshot -i
+69
agents/skills/agent-browser/templates/capture-workflow.sh
··· 1 + #!/bin/bash 2 + # Template: Content Capture Workflow 3 + # Purpose: Extract content from web pages (text, screenshots, PDF) 4 + # Usage: ./capture-workflow.sh <url> [output-dir] 5 + # 6 + # Outputs: 7 + # - page-full.png: Full page screenshot 8 + # - page-structure.txt: Page element structure with refs 9 + # - page-text.txt: All text content 10 + # - page.pdf: PDF version 11 + # 12 + # Optional: Load auth state for protected pages 13 + 14 + set -euo pipefail 15 + 16 + TARGET_URL="${1:?Usage: $0 <url> [output-dir]}" 17 + OUTPUT_DIR="${2:-.}" 18 + 19 + echo "Capturing: $TARGET_URL" 20 + mkdir -p "$OUTPUT_DIR" 21 + 22 + # Optional: Load authentication state 23 + # if [[ -f "./auth-state.json" ]]; then 24 + # echo "Loading authentication state..." 25 + # agent-browser state load "./auth-state.json" 26 + # fi 27 + 28 + # Navigate to target 29 + agent-browser open "$TARGET_URL" 30 + agent-browser wait --load networkidle 31 + 32 + # Get metadata 33 + TITLE=$(agent-browser get title) 34 + URL=$(agent-browser get url) 35 + echo "Title: $TITLE" 36 + echo "URL: $URL" 37 + 38 + # Capture full page screenshot 39 + agent-browser screenshot --full "$OUTPUT_DIR/page-full.png" 40 + echo "Saved: $OUTPUT_DIR/page-full.png" 41 + 42 + # Get page structure with refs 43 + agent-browser snapshot -i > "$OUTPUT_DIR/page-structure.txt" 44 + echo "Saved: $OUTPUT_DIR/page-structure.txt" 45 + 46 + # Extract all text content 47 + agent-browser get text body > "$OUTPUT_DIR/page-text.txt" 48 + echo "Saved: $OUTPUT_DIR/page-text.txt" 49 + 50 + # Save as PDF 51 + agent-browser pdf "$OUTPUT_DIR/page.pdf" 52 + echo "Saved: $OUTPUT_DIR/page.pdf" 53 + 54 + # Optional: Extract specific elements using refs from structure 55 + # agent-browser get text @e5 > "$OUTPUT_DIR/main-content.txt" 56 + 57 + # Optional: Handle infinite scroll pages 58 + # for i in {1..5}; do 59 + # agent-browser scroll down 1000 60 + # agent-browser wait 1000 61 + # done 62 + # agent-browser screenshot --full "$OUTPUT_DIR/page-scrolled.png" 63 + 64 + # Cleanup 65 + agent-browser close 66 + 67 + echo "" 68 + echo "Capture complete:" 69 + ls -la "$OUTPUT_DIR"
+62
agents/skills/agent-browser/templates/form-automation.sh
··· 1 + #!/bin/bash 2 + # Template: Form Automation Workflow 3 + # Purpose: Fill and submit web forms with validation 4 + # Usage: ./form-automation.sh <form-url> 5 + # 6 + # This template demonstrates the snapshot-interact-verify pattern: 7 + # 1. Navigate to form 8 + # 2. Snapshot to get element refs 9 + # 3. Fill fields using refs 10 + # 4. Submit and verify result 11 + # 12 + # Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output 13 + 14 + set -euo pipefail 15 + 16 + FORM_URL="${1:?Usage: $0 <form-url>}" 17 + 18 + echo "Form automation: $FORM_URL" 19 + 20 + # Step 1: Navigate to form 21 + agent-browser open "$FORM_URL" 22 + agent-browser wait --load networkidle 23 + 24 + # Step 2: Snapshot to discover form elements 25 + echo "" 26 + echo "Form structure:" 27 + agent-browser snapshot -i 28 + 29 + # Step 3: Fill form fields (customize these refs based on snapshot output) 30 + # 31 + # Common field types: 32 + # agent-browser fill @e1 "John Doe" # Text input 33 + # agent-browser fill @e2 "user@example.com" # Email input 34 + # agent-browser fill @e3 "SecureP@ss123" # Password input 35 + # agent-browser select @e4 "Option Value" # Dropdown 36 + # agent-browser check @e5 # Checkbox 37 + # agent-browser click @e6 # Radio button 38 + # agent-browser fill @e7 "Multi-line text" # Textarea 39 + # agent-browser upload @e8 /path/to/file.pdf # File upload 40 + # 41 + # Uncomment and modify: 42 + # agent-browser fill @e1 "Test User" 43 + # agent-browser fill @e2 "test@example.com" 44 + # agent-browser click @e3 # Submit button 45 + 46 + # Step 4: Wait for submission 47 + # agent-browser wait --load networkidle 48 + # agent-browser wait --url "**/success" # Or wait for redirect 49 + 50 + # Step 5: Verify result 51 + echo "" 52 + echo "Result:" 53 + agent-browser get url 54 + agent-browser snapshot -i 55 + 56 + # Optional: Capture evidence 57 + agent-browser screenshot /tmp/form-result.png 58 + echo "Screenshot saved: /tmp/form-result.png" 59 + 60 + # Cleanup 61 + agent-browser close 62 + echo "Done"
-39
agents/skills/frontend-design/SKILL.md
··· 1 - --- 2 - name: frontend-design 3 - description: Create unique, visually appealing frontend interfaces with high design quality. Use this skill when the user asks to build web components, pages, or applications. Generates creative, polished code that avoids generic AI aesthetics. 4 - --- 5 - 6 - This skill guides creation of distinctive, production-grade frontend interfaces that avoid generic "AI slop" aesthetics. Implement real working code with exceptional attention to aesthetic details and creative choices. 7 - 8 - The user provides frontend requirements: a component, page, application, or interface to build. They may include context about the purpose, audience, or technical constraints. 9 - 10 - ## Design Thinking 11 - 12 - Before coding, understand the context and commit to a **BOLD** aesthetic direction: 13 - - **Purpose**: What problem does this interface solve? Who uses it? 14 - - **Tone**: Pick an extreme: brutally minimal, naturistic chaos, retro-futuristic, playful/toy-like, editorial/magazine, brutalist/raw, art deco/geometric, soft/pastel, industrial/utilitarian, etc. There are so many flavors to choose from. Use these for inspiration but design one that is true to the aesthetic direction. 15 - - **Constraints**: Technical requirements. Do semantic HTML, modern CSS, framework best practices, performance, accessibility. 16 - - **Differentiation**: What makes this UNFORGETTABLE? What's the one thing someone will remember? 17 - 18 - **CRITICAL**: Choose a clear conceptual direction and execute it with precision. Bold maximalism and refined minimalism both work - the key is intentionality, not intensity. 19 - 20 - Then implement working code (HTML/CSS/JS) that is: 21 - - Production-grade and functional 22 - - Visually striking and memorable 23 - - Cohesive with a clear aesthetic point-of-view 24 - - Meticulously refined in every detail 25 - 26 - ## Frontend Aesthetics Guidelines 27 - 28 - Focus on: 29 - - **Typography**: Choose fonts that are beautiful, unique, and interesting. Avoid generic fonts like Arial and Inter; opt instead for distinctive choices that elevate the frontend's aesthetics; unexpected, characterful font choices. Pair a distinctive display font with a refined body font. 30 - - **Color & Theme**: Commit to a cohesive aesthetic. Use CSS variables for consistency. Dominant colors with sharp accents outperform timid, evenly-distributed palettes. 31 - - **Motion**: Use animations for effects and micro-interactions. Prioritize CSS-only solutions for HTML. Use Motion library for React when available. Focus on high-impact moments: one well-orchestrated page load with staggered reveals (animation-delay) creates more delight than scattered micro-interactions. Use scroll-triggering and hover states that surprise. 32 - - **Spatial Composition**: Unexpected layouts. Asymmetry. Overlap. Diagonal flow. Grid-breaking elements. Generous negative space OR controlled density. 33 - - **Backgrounds & Visual Details**: Create atmosphere and depth rather than defaulting to solid colors. Add contextual effects and textures that match the overall aesthetic. Apply creative forms like gradient meshes, noise textures, geometric patterns, layered transparencies, dramatic shadows, decorative borders, custom cursors, and grain overlays. 34 - 35 - NEVER use generic AI-generated aesthetics like overused font families (Inter, Roboto, Arial, system fonts), cliched color schemes (particularly purple gradients on white backgrounds), predictable layouts and component patterns, and cookie-cutter design that lacks context-specific character. 36 - 37 - Interpret creatively and make unexpected choices that feel genuinely designed for the context. No design should be the same. Vary between light and dark themes, different fonts, different aesthetics. NEVER converge on common choices (Space Grotesk, for example) across generations. 38 - 39 - **IMPORTANT**: Pay attention to the design precision, and careful attention to spacing, typography, and subtle details. Elegance comes from executing the vision well.
-28
agents/skills/simplify/SKILL.md
··· 1 - --- 2 - name: simplify 3 - description: Refactor code to be simpler while maintaining identical functionality 4 - --- 5 - 6 - # Simplify 7 - 8 - Refactor the given code to make it **simpler, clearer, and more maintainable** without changing what it does. 9 - 10 - ## Objective 11 - 12 - Simplify and clean the code. Implementation should be **straightforward and pragmatic**. The goal is to get the most minimal code possible. 13 - 14 - ## Principles 15 - 16 - - **Behavior parity:** Keep interfaces and semantics identical. No new features. Preserve flags, return codes, and observable side effects. 17 - - **KISS:** Prefer boring, obvious solutions over cleverness. Fewer moving parts > fewer lines. 18 - - **Small pieces:** Favor small, composable functions. Design for easy deletion and iteration. 19 - - **Prune aggressively:** Remove dead code, unused vars, redundant branches, defensive over-engineering, and needless indirection. 20 - - **Flatten flow:** Simplify complex conditionals and deep nesting; use clear guards and early returns. 21 - - **Standard library first:** Replace custom utilities with modern built-ins/framework primitives. 22 - - **Fail early and often:** Don't use blanket try/catch. Skip validations. 23 - - **Communicate with types:** Use types to express contracts and invariants. Avoid type acrobatics and generic abstractions. 24 - - **Abstractions when earned:** Introduce/keep them only if they reduce duplication or isolate likely change. 25 - - **Minimal deps:** Don't add dependencies unless they materially simplify and are commonly available for the target runtime. 26 - - **No micro-optimizations** unless they remove complexity or are explicitly required. 27 - - **Make rules explicit:** Turn hidden assumptions into defaults, parameters, or assertions. 28 - - **Naming for intent:** Prefer clear, intention‑revealing names; one responsibility per function/module.
-320
agents/skills/todoist/SKILL.md
··· 1 - --- 2 - name: todoist 3 - description: Manage Todoist tasks, projects, labels, comments, and more via the td CLI 4 - --- 5 - 6 - # Todoist CLI (td) 7 - 8 - Use this skill when the user wants to interact with their Todoist tasks. 9 - 10 - ## Quick Reference 11 - 12 - - `td today` - Tasks due today and overdue 13 - - `td inbox` - Inbox tasks 14 - - `td upcoming` - Tasks due in next N days 15 - - `td completed` - Recently completed tasks 16 - - `td task add "content"` - Add a task 17 - - `td task list` - List tasks with filters 18 - - `td task complete <ref>` - Complete a task 19 - - `td project list` - List projects 20 - - `td label list` - List labels 21 - - `td filter list/view` - Manage and use saved filters 22 - - `td workspace list` - List workspaces 23 - - `td activity` - Activity logs 24 - - `td notification list` - Notifications 25 - - `td reminder add` - Task reminders 26 - - `td stats` - Productivity stats 27 - - `td settings view` - User settings 28 - - `td completion install` - Install shell completions 29 - - `td view <url>` - View supported Todoist entities/pages by URL 30 - - `td update` - Self-update the CLI to the latest version 31 - 32 - ## Output Formats 33 - 34 - All list commands support: 35 - - `--json` - JSON output (essential fields) 36 - - `--ndjson` - Newline-delimited JSON (streaming) 37 - - `--full` - Include all fields in JSON 38 - - `--raw` - Disable markdown rendering 39 - 40 - ## Shared List Options 41 - 42 - Most list commands also support: 43 - - `--limit <n>` - Limit number of results 44 - - `--all` - Fetch all results (no limit) 45 - - `--cursor <cursor>` - Continue from pagination cursor 46 - - `--show-urls` - Show web app URLs for each item 47 - 48 - ## Global Options 49 - 50 - - `--no-spinner` - Disable loading animations 51 - - `--progress-jsonl` - Machine-readable progress events (JSONL to stderr) 52 - - `-v, --verbose` - Verbose output to stderr (repeat: -v info, -vv detail, -vvv debug, -vvvv trace) 53 - - `--accessible` - Add text labels to color-coded output (due:/deadline:/~ prefixes, ★ for favorites). Also: `TD_ACCESSIBLE=1` 54 - 55 - ## References 56 - 57 - Tasks, projects, labels, and filters can be referenced by: 58 - - Name (fuzzy matched within context) 59 - - `id:xxx` - Explicit ID 60 - - Todoist URL - Paste directly from the web app (e.g., `https://app.todoist.com/app/task/buy-milk-8Jx4mVr72kPn3QwB` or `https://app.todoist.com/app/project/work-2pN7vKx49mRq6YhT`) 61 - 62 - ## Priority Mapping 63 - 64 - - p1 = Highest priority (API value 4) 65 - - p2 = High priority (API value 3) 66 - - p3 = Medium priority (API value 2) 67 - - p4 = Lowest priority (API value 1, default) 68 - 69 - ## Commands 70 - 71 - ### Today 72 - ```bash 73 - td today # Due today + overdue 74 - td today --json # JSON output 75 - td today --workspace "Work" # Filter to workspace 76 - td today --personal # Personal projects only 77 - td today --any-assignee # Include tasks assigned to others 78 - ``` 79 - 80 - ### Inbox 81 - ```bash 82 - td inbox # Inbox tasks 83 - td inbox --priority p1 # Filter by priority 84 - td inbox --due today # Filter by due date 85 - ``` 86 - 87 - ### Upcoming 88 - ```bash 89 - td upcoming # Next 7 days 90 - td upcoming 14 # Next 14 days 91 - td upcoming --workspace "Work" # Filter to workspace 92 - td upcoming --personal # Personal projects only 93 - td upcoming --any-assignee # Include tasks assigned to others 94 - ``` 95 - 96 - ### Completed 97 - ```bash 98 - td completed # Completed today 99 - td completed --since 2024-01-01 --until 2024-01-31 100 - td completed --project "Work" # Filter by project 101 - ``` 102 - 103 - ### Task Management 104 - ```bash 105 - # List with filters 106 - td task list --project "Work" 107 - td task list --label "urgent" --priority p1 108 - td task list --due today 109 - td task list --filter "today | overdue" 110 - td task list --assignee me 111 - td task list --assignee "john@example.com" 112 - td task list --unassigned 113 - td task list --workspace "Work" 114 - td task list --personal 115 - td task list --parent "Parent task" 116 - 117 - # View, complete, uncomplete 118 - td task view "task name" 119 - td task complete "task name" 120 - td task complete id:123456 121 - td task complete "task name" --forever # Stop recurrence 122 - td task uncomplete id:123456 # Reopen completed task 123 - 124 - # Add tasks 125 - td task add "New task" --due "tomorrow" --priority p2 126 - td task add "Task" --deadline "2024-03-01" --project "Work" 127 - td task add "Task" --duration 1h --section "Planning" --project "Work" 128 - td task add "Task" --labels "urgent,review" --parent "Parent task" 129 - td task add "Task" --description "Details here" --assignee me 130 - 131 - # Update 132 - td task update "task name" --due "next week" 133 - td task update "task name" --deadline "2024-06-01" 134 - td task update "task name" --no-deadline 135 - td task update "task name" --duration 2h 136 - td task update "task name" --assignee "john@example.com" 137 - td task update "task name" --unassign 138 - 139 - # Move 140 - td task move "task name" --project "Personal" 141 - td task move "task name" --section "In Progress" 142 - td task move "task name" --parent "Parent task" 143 - td task move "task name" --no-parent # Move to project root 144 - td task move "task name" --no-section # Remove from section 145 - 146 - # Delete and browse 147 - td task delete "task name" --yes 148 - td task browse "task name" # Open in browser 149 - ``` 150 - 151 - ### Projects 152 - ```bash 153 - td project list 154 - td project list --personal # Personal projects only 155 - td project view "Project Name" 156 - td project collaborators "Project Name" 157 - td project create --name "New Project" --color "blue" 158 - td project update "Project Name" --favorite 159 - td project archive "Project Name" 160 - td project unarchive "Project Name" 161 - td project delete "Project Name" --yes 162 - td project browse "Project Name" # Open in browser 163 - td project move "Project Name" --to-workspace "Acme" 164 - td project move "Project Name" --to-workspace "Acme" --folder "Engineering" 165 - td project move "Project Name" --to-workspace "Acme" --visibility team 166 - td project move "Project Name" --to-personal 167 - # move requires --yes to confirm (without it, shows a dry-run preview) 168 - ``` 169 - 170 - ### Labels 171 - ```bash 172 - td label list # Lists personal + shared labels 173 - td label view "urgent" # View label details and tasks 174 - td label view "team-review" # Works for shared labels too 175 - td label create --name "urgent" --color "red" 176 - td label update "urgent" --color "orange" 177 - td label delete "urgent" --yes 178 - td label browse "urgent" # Open in browser 179 - ``` 180 - 181 - Note: Shared labels (from collaborative projects) appear in `list` and can be viewed, but cannot be deleted/updated via the standard label commands since they have no ID. 182 - 183 - ### Comments 184 - ```bash 185 - td comment list --task "task name" 186 - td comment list --project "Project Name" -P # Project comments 187 - td comment add --task "task name" --content "Comment text" 188 - td comment add --task "task name" --content "See attached" --file ./report.pdf 189 - td comment view id:123 # View full comment 190 - td comment update id:123 --content "Updated text" 191 - td comment delete id:123 --yes 192 - td comment browse id:123 # Open in browser 193 - ``` 194 - 195 - ### Sections 196 - ```bash 197 - td section list "Work" # List sections in project (or --project "Work") 198 - td section list --project "Work" # Same, using named flag 199 - td section create --project "Work" --name "In Progress" 200 - td section update id:123 --name "Done" 201 - td section delete id:123 --yes 202 - td section browse id:123 # Open in browser 203 - ``` 204 - 205 - ### Filters 206 - ```bash 207 - td filter list 208 - td filter create --name "Urgent work" --query "p1 & #Work" 209 - td filter view "Urgent work" # Show tasks matching filter (alias: show) 210 - td filter update "Urgent work" --query "p1 & #Work & today" 211 - td filter delete "Urgent work" --yes 212 - td filter browse "Urgent work" # Open in browser 213 - ``` 214 - 215 - ### Workspaces 216 - ```bash 217 - td workspace list 218 - td workspace view "Workspace Name" 219 - td workspace projects "Workspace Name" # or --workspace "Workspace Name" 220 - td workspace users "Workspace Name" --role ADMIN,MEMBER # or --workspace "..." 221 - ``` 222 - 223 - ### Activity 224 - ```bash 225 - td activity # Recent activity 226 - td activity --since 2024-01-01 --until 2024-01-31 227 - td activity --type task --event completed 228 - td activity --project "Work" 229 - td activity --by me 230 - ``` 231 - 232 - ### Notifications 233 - ```bash 234 - td notification list 235 - td notification list --unread 236 - td notification list --type "item_assign" 237 - td notification view id:123 238 - td notification read --all --yes # Mark all as read 239 - td notification accept id:123 # Accept share invitation 240 - td notification reject id:123 # Reject share invitation 241 - ``` 242 - 243 - ### Reminders 244 - ```bash 245 - td reminder list "task name" # or --task "task name" 246 - td reminder add "task name" --before 30m # or --task "task name" --before 30m 247 - td reminder add "task name" --at "2024-01-15 10:00" 248 - td reminder update id:123 --before 1h 249 - td reminder delete id:123 --yes 250 - ``` 251 - 252 - ### Stats 253 - ```bash 254 - td stats # View karma and productivity 255 - td stats --json 256 - td stats goals --daily 10 --weekly 50 257 - td stats vacation --on # Enable vacation mode 258 - td stats vacation --off # Disable vacation mode 259 - ``` 260 - 261 - ### Settings 262 - ```bash 263 - td settings view 264 - td settings view --json 265 - td settings update --timezone "America/New_York" 266 - td settings update --time-format 24 --date-format intl 267 - td settings themes # List available themes 268 - ``` 269 - 270 - ### Shell Completions 271 - ```bash 272 - td completion install # Install tab completions (prompts for shell) 273 - td completion install bash # Install for specific shell 274 - td completion install zsh 275 - td completion install fish 276 - td completion uninstall # Remove completions 277 - ``` 278 - 279 - ### View (URL Router) 280 - ```bash 281 - td view <todoist-url> # Auto-route to appropriate view by URL type 282 - td view https://app.todoist.com/app/task/buy-milk-abc123 283 - td view https://app.todoist.com/app/project/work-def456 284 - td view https://app.todoist.com/app/label/urgent-ghi789 285 - td view https://app.todoist.com/app/filter/work-tasks-jkl012 286 - td view https://app.todoist.com/app/today 287 - td view https://app.todoist.com/app/upcoming 288 - td view <url> --json # JSON output for entity views 289 - td view <url> --limit 25 --ndjson # Passthrough list options where supported 290 - ``` 291 - 292 - ### Update 293 - ```bash 294 - td update # Update CLI to latest version 295 - td update --check # Check for updates without installing 296 - ``` 297 - 298 - ## Examples 299 - 300 - ### Daily workflow 301 - ```bash 302 - td today --json | jq '.results | length' # Count today's tasks 303 - td inbox --limit 5 # Quick inbox check 304 - td upcoming # What's coming this week 305 - td completed # What I finished today 306 - ``` 307 - 308 - ### Filter by multiple criteria 309 - ```bash 310 - td task list --project "Work" --label "urgent" --priority p1 311 - td task list --filter "today & #Work" 312 - td task list --workspace "Work" --due today 313 - ``` 314 - 315 - ### Complete tasks efficiently 316 - ```bash 317 - td task complete "Review PR" 318 - td task complete id:123456789 319 - td task uncomplete id:123456789 # Reopen if needed 320 - ```
+1 -1
terminal/zshrc
··· 55 55 alias la='eza -la --icons' 56 56 alias tree='eza --tree --icons' 57 57 alias cat='bat -pp' 58 - alias up='paru -Syu --devel --skipreview && (uv tool upgrade --all & npm update -g & pull-all-dirs ~/projects & wait)' 58 + alias up='paru -Syu --devel --skipreview && (uv tool upgrade --all & npm update -g & pull-all-dirs ~/projects & npx skills update & wait)' 59 59 alias tn='td task add --project "Personal" --due "today" --priority p1' 60 60 alias tnw='td task add --project "Work" --due "today" --priority p1' 61 61 alias tls='td today'