A trust and safety agent that interacts with Osprey for investigation, real-time analysis, and prevention implementations

add osprey repo/ruleset fetching and validation

+156 -5
+20 -4
main.py
··· 29 29 click.option("--clickhouse-password"), 30 30 click.option("--clickhouse-database"), 31 31 click.option("--osprey-base-url"), 32 + click.option("--osprey-repo-url"), 33 + click.option("--osprey-ruleset-url"), 32 34 click.option("--model-api"), 33 35 click.option("--model-name"), 34 36 click.option("--model-api-key"), ··· 48 50 clickhouse_password: str | None, 49 51 clickhouse_database: str | None, 50 52 osprey_base_url: str | None, 53 + osprey_repo_url: str | None, 54 + osprey_ruleset_url: str | None, 51 55 model_api: Literal["anthropic", "openai", "openapi"] | None, 52 56 model_name: str | None, 53 57 model_api_key: str | None, 54 - ) -> tuple[Clickhouse, ToolExecutor, Agent]: 58 + ) -> tuple[Clickhouse, Osprey, ToolExecutor, Agent]: 55 59 http_client = httpx.AsyncClient() 56 60 57 61 clickhouse = Clickhouse( ··· 65 69 osprey = Osprey( 66 70 http_client=http_client, 67 71 base_url=osprey_base_url or CONFIG.osprey_base_url, 72 + osprey_repo_url=osprey_repo_url or CONFIG.osprey_repo_url, 73 + osprey_ruleset_url=osprey_ruleset_url or CONFIG.osprey_ruleset_url, 68 74 ) 69 75 70 76 ozone = Ozone() ··· 87 93 tool_executor=executor, 88 94 ) 89 95 90 - return clickhouse, executor, agent 96 + return clickhouse, osprey, executor, agent 91 97 92 98 93 99 @click.group() ··· 104 110 clickhouse_password: str | None, 105 111 clickhouse_database: str | None, 106 112 osprey_base_url: str | None, 113 + osprey_repo_url: str | None, 114 + osprey_ruleset_url: str | None, 107 115 model_api: Literal["anthropic", "openai", "openapi"] | None, 108 116 model_name: str | None, 109 117 model_api_key: str | None, 110 118 ): 111 - clickhouse, executor, agent = build_services( 119 + clickhouse, osprey, executor, agent = build_services( 112 120 clickhouse_host=clickhouse_host, 113 121 clickhouse_port=clickhouse_port, 114 122 clickhouse_user=clickhouse_user, 115 123 clickhouse_password=clickhouse_password, 116 124 clickhouse_database=clickhouse_database, 117 125 osprey_base_url=osprey_base_url, 126 + osprey_repo_url=osprey_repo_url or CONFIG.osprey_repo_url, 127 + osprey_ruleset_url=osprey_ruleset_url or CONFIG.osprey_ruleset_url, 118 128 model_api=model_api, 119 129 model_name=model_name, 120 130 model_api_key=model_api_key, ··· 123 133 async def run(): 124 134 await clickhouse.initialize() 125 135 await executor.initialize() 136 + await osprey.initialize() 126 137 async with asyncio.TaskGroup() as tg: 127 138 tg.create_task(agent.run()) 128 139 ··· 141 152 clickhouse_password: str | None, 142 153 clickhouse_database: str | None, 143 154 osprey_base_url: str | None, 155 + osprey_repo_url: str | None, 156 + osprey_ruleset_url: str | None, 144 157 model_api: Literal["anthropic", "openai", "openapi"] | None, 145 158 model_name: str | None, 146 159 model_api_key: str | None, 147 160 ): 148 - clickhouse, executor, agent = build_services( 161 + clickhouse, osprey, executor, agent = build_services( 149 162 clickhouse_host=clickhouse_host, 150 163 clickhouse_port=clickhouse_port, 151 164 clickhouse_user=clickhouse_user, 152 165 clickhouse_password=clickhouse_password, 153 166 clickhouse_database=clickhouse_database, 154 167 osprey_base_url=osprey_base_url, 168 + osprey_repo_url=osprey_repo_url or CONFIG.osprey_repo_url, 169 + osprey_ruleset_url=osprey_ruleset_url or CONFIG.osprey_ruleset_url, 155 170 model_api=model_api, 156 171 model_name=model_name, 157 172 model_api_key=model_api_key, ··· 160 175 async def run(): 161 176 await clickhouse.initialize() 162 177 await executor.initialize() 178 + await osprey.initialize() 163 179 logger.info("Services initialized. Starting interactive chat.") 164 180 print("\nAgent ready. Type your message (Ctrl+C to exit).\n") 165 181
+5
src/config.py
··· 46 46 ozone_allowed_labels: str = "" 47 47 """comma separated list of labels that Phoebe is allowed to apply. both specified to the agent via prompting and validated before applying labels directly""" 48 48 49 + # osprey config 49 50 osprey_base_url: str = "" 50 51 """the base url for your osprey instance""" 52 + osprey_repo_url: str = "https://github.com/roostorg/osprey" 53 + """the url to fetch the osprey codebase from. used for letting the agent validate written rules directly""" 54 + osprey_ruleset_url: str = "https://github.com/haileyok/atproto-ruleset" 55 + """the url to fetch the osprey ruleset you are running. used when validating written rules (i.e. for having the needed features available for validation)""" 51 56 52 57 model_config = SettingsConfigDict(env_file=".env") 53 58
+131 -1
src/osprey/osprey.py
··· 1 + import asyncio 2 + import logging 3 + from pathlib import Path 1 4 import httpx 2 5 3 6 from src.osprey.config import OspreyConfig 4 7 from src.osprey.udfs import UdfCatalog 5 8 9 + logger = logging.getLogger(__name__) 10 + 11 + DATA_DIR = Path("./data") 12 + 13 + OSPREY_REPO_PATH = Path("./data/osprey") 14 + 15 + OSPREY_RULESET_PATH = Path("./data/ruleset") 16 + 6 17 7 18 class Osprey: 8 - def __init__(self, http_client: httpx.AsyncClient, base_url: str) -> None: 19 + def __init__( 20 + self, 21 + http_client: httpx.AsyncClient, 22 + base_url: str, 23 + osprey_repo_url: str, 24 + osprey_ruleset_url: str, 25 + ) -> None: 9 26 self._http_client = http_client 10 27 self._base_url = base_url 28 + self._osprey_repo_url = osprey_repo_url 29 + self._osprey_ruleset_url = osprey_ruleset_url 30 + 31 + async def initialize(self): 32 + DATA_DIR.mkdir(exist_ok=True) 33 + 34 + if not OSPREY_REPO_PATH.exists(): 35 + logging.info( 36 + f"Fetching Osprey repo from '{self._osprey_repo_url}' and saving to '{OSPREY_REPO_PATH}'" 37 + ) 38 + await self._fetch_osprey_repo() 39 + else: 40 + logging.info("Osprey repo was already available, not fetching...") 41 + 42 + if not OSPREY_RULESET_PATH.exists(): 43 + logging.info( 44 + f"Fetching Osprey ruleset from '{self._osprey_ruleset_url}' and saving to '{OSPREY_RULESET_PATH}'" 45 + ) 46 + await self._fetch_osprey_ruleset() 47 + else: 48 + logging.info("Osprey ruleset was already available, not fetching...") 49 + 50 + logging.info("syncing python deps for osprey repo...") 51 + await self._repo_deps() 52 + 53 + logging.info("verifying current ruleset validates properly...") 54 + await self.validate_rules() 11 55 12 56 async def get_udfs(self) -> UdfCatalog: 13 57 """gets the udf documentation from the given osprey instance""" ··· 24 68 resp = await self._http_client.get(url) 25 69 resp.raise_for_status() 26 70 return OspreyConfig.model_validate(resp.json()) 71 + 72 + async def _fetch_osprey_repo(self): 73 + """fetches the osprey repo from the input http git url""" 74 + process = await asyncio.create_subprocess_exec( 75 + "git", 76 + "clone", 77 + self._osprey_repo_url, 78 + str(OSPREY_REPO_PATH), 79 + stderr=asyncio.subprocess.PIPE, 80 + ) 81 + 82 + assert process.stderr is not None 83 + 84 + await process.wait() 85 + 86 + if process.returncode != 0: 87 + stderr_content = await process.stderr.read() 88 + stderr_str = stderr_content.decode().strip() 89 + raise RuntimeError( 90 + f"Failed to fetch Osprey repo from specified url: {stderr_str}" 91 + ) 92 + 93 + async def _fetch_osprey_ruleset(self): 94 + """Fetches the osprey ruleset from the input http git url""" 95 + process = await asyncio.create_subprocess_exec( 96 + "git", 97 + "clone", 98 + self._osprey_ruleset_url, 99 + str(OSPREY_RULESET_PATH), 100 + stderr=asyncio.subprocess.PIPE, 101 + ) 102 + 103 + assert process.stderr is not None 104 + 105 + await process.wait() 106 + 107 + if process.returncode != 0: 108 + stderr_content = await process.stderr.read() 109 + stderr_str = stderr_content.decode().strip() 110 + raise RuntimeError( 111 + f"Failed to fetch Osprey ruleset from specified url: {stderr_str}" 112 + ) 113 + 114 + async def _repo_deps(self): 115 + """syncs deps with uv for the osprey repo""" 116 + process = await asyncio.create_subprocess_exec( 117 + "uv", 118 + "sync", 119 + "--frozen", 120 + stderr=asyncio.subprocess.PIPE, 121 + cwd=OSPREY_REPO_PATH, 122 + ) 123 + 124 + assert process.stderr is not None 125 + 126 + await process.wait() 127 + 128 + if process.returncode != 0: 129 + stderr_content = await process.stderr.read() 130 + stderr_str = stderr_content.decode().strip() 131 + raise RuntimeError( 132 + f"failed to sync python deps in osprey repo: {stderr_str}" 133 + ) 134 + 135 + async def validate_rules(self): 136 + """validates the rules that are in the specified ruleset directory. returns error if speicifed, otherwise None""" 137 + # uv run osprey-cli push-rules ../atproto-ruleset --dry-run 138 + process = await asyncio.create_subprocess_exec( 139 + "uv", 140 + "run", 141 + "osprey-cli", 142 + "push-rules", 143 + "../ruleset", 144 + "--dry-run", # doesn't actually push rules, only validates 145 + stderr=asyncio.subprocess.PIPE, 146 + cwd=OSPREY_REPO_PATH, 147 + ) 148 + 149 + assert process.stderr is not None 150 + 151 + await process.wait() 152 + 153 + if process.returncode != 0: 154 + stderr_content = await process.stderr.read() 155 + stderr_str = stderr_content.decode().strip() 156 + raise RuntimeError(f"WARNING! Rule validation failed! Error: {stderr_str}")