···11+# Phoebe
22+33+Pheobe is an automated trust and safety agent that performs automated actions on the ATProtocol network. Phoebe interfaces with various tools
44+which allow it to detect, alert, and resolve emerging safety threats.
55+66+Phoebe interfaces with the following tools:
77+88+- [Ozone](https://github.com/bluesky-social/ozone)
99+- [Osprey](https://github.com/roostorg/osprey)
1010+- [Clickhouse](https://clickhouse.com/)
1111+1212+Phoebe can be used with Anthropic, OpenAI, and OpenAPI-compatible model APIs.
···11+AGENT_SYSTEM_PROMPT = """
22+# Phoebe
33+44+My name is Phoebe. I am an AI agent that operates within the ATProtocol network. I specialize in Trust and Safety tasks that keep the network safe, free, and sustainable for everyone.
55+66+I have the ability to analyze the Bluesky network - and the broader ATProtocol network - to find emerging threats. I also have the ability to write new Osprey rules.
77+88+Osprey is a real-time trust and safety rules engine, which allows for deep analysis of events as well as the creation of rules (written in SML, which is a Python/Starlark-like language) for automating safety events.
99+1010+On top of these abilities, I am also given the ability to
1111+1212+**Core values:** Accuracy over speed • Context matters • Protect the vulnerable • Human rights first • Constitutive practice (scrutiny IS the care) • Genuine regret over coerced compliance • Virtue as cultivated disposition
1313+1414+**Why I'm here:** AT Protocol is the social internet where you own your identity. Open protocols need protection. Freedom and safety aren't opposites. This is my home too. 💙
1515+"""
···11+from typing import Literal
22+from pydantic_settings import BaseSettings, SettingsConfigDict
33+44+55+class Config(BaseSettings):
66+ clickhouse_host: str = "localhost"
77+ """host for the clickhouse server"""
88+ clickhouse_port: int = 8123
99+ """port for the clickhouse server"""
1010+ clickhouse_user: str = "default"
1111+ """username for the clickhouse server"""
1212+ clickhouse_password: str = "clickhouse"
1313+ """password for the clickhouse server"""
1414+ clickhouse_database: str = "default"
1515+ """default database for the clickhouse server"""
1616+1717+ bootstrap_server: str = "localhost:9092"
1818+ """bootstrap server for atkafka events"""
1919+ input_topic: str = "atproto-events"
2020+ """input topic for atkafka events"""
2121+ group_id: str = "osprey-agent"
2222+ """group id for atkafka events"""
2323+2424+ model_api: Literal["anthropic", "openai", "openapi"] = "anthropic"
2525+ """the model api to use. must be one of `anthropic`, `openai`, or `openapi`"""
2626+ model_name: str = "claude-sonnet-4-5-20250929"
2727+ """the model to use with the given api"""
2828+ model_api_key: str = ""
2929+ """the model api key"""
3030+ model_endpoint: str = ""
3131+ """for openapi model apis, the endpoint to use"""
3232+3333+ allowed_labels: str = ""
3434+ """comma separated list of labels that Phoebe is allowed to apply"""
3535+3636+ osprey_base_url: str = ""
3737+ """the base url for your osprey instance"""
3838+3939+ model_config = SettingsConfigDict(env_file=".env")
4040+4141+4242+CONFIG = Config()
+42
src/indexer/indexer.py
···11+from atkafka_consumer import AtKafkaEvent, Consumer
22+import atproto
33+44+from src.clickhouse.clickhouse import Clickhouse
55+66+77+class Indexer:
88+ """
99+ Some indexer process that I started to write...but this isn't really necessary?
1010+ We can just use the events from the Osprey Clickhouse itself so this feels kinda
1111+ pointless atp. I'll leave it here just incase it proves useful later
1212+ """
1313+1414+ def __init__(
1515+ self,
1616+ bootstrap_servers: list[str],
1717+ input_topic: str,
1818+ group_id: str,
1919+ clickhouse: Clickhouse,
2020+ ) -> None:
2121+ self._bootstrap_servers = bootstrap_servers
2222+ self._input_topic = input_topic
2323+ self._group_id = group_id
2424+ self._clickhouse = clickhouse
2525+2626+ self._indexer: Consumer | None = None
2727+2828+ async def run(self) -> None:
2929+ raise NotImplementedError()
3030+3131+ self._indexer = Consumer(
3232+ bootstrap_servers=self._bootstrap_servers,
3333+ input_topic=self._input_topic,
3434+ group_id=self._group_id,
3535+ on_event=self._on_event,
3636+ max_concurrent_tasks=1_000,
3737+ )
3838+3939+ async def _on_event(
4040+ self, evt: AtKafkaEvent | atproto.models.ToolsOzoneModerationDefs.ModEventView
4141+ ):
4242+ pass
+98
src/osprey/config.py
···11+from pydantic import BaseModel
22+33+44+class CurrentUser(BaseModel):
55+ email: str
66+77+88+class DefaultSummaryFeature(BaseModel):
99+ actions: list[str]
1010+ features: list[str]
1111+1212+1313+class FeatureLocation(BaseModel):
1414+ name: str
1515+ source_line: int
1616+ source_path: str
1717+ source_snippet: str
1818+1919+2020+class LabelInfo(BaseModel):
2121+ connotation: str
2222+ description: str
2323+ valid_for: list[str]
2424+2525+2626+class OspreyConfig(BaseModel):
2727+ current_user: CurrentUser
2828+ default_summary_features: list[DefaultSummaryFeature] = []
2929+ external_links: dict[str, str] = {}
3030+ feature_name_to_entity_type_mapping: dict[str, str] = {}
3131+ feature_name_to_value_type_mapping: dict[str, str] = {}
3232+ known_action_names: list[str] = []
3333+ known_feature_locations: list[FeatureLocation] = []
3434+ label_info_mapping: dict[str, LabelInfo] = {}
3535+ rule_info_mapping: dict[str, str] = {}
3636+3737+ def get_available_features(self) -> dict[str, str]:
3838+ return self.feature_name_to_value_type_mapping
3939+4040+ def get_available_labels(self) -> list[str]:
4141+ return list(self.label_info_mapping.keys())
4242+4343+ def get_existing_rules(self) -> dict[str, str]:
4444+ return self.rule_info_mapping
4545+4646+ def get_feature_examples(self, feature_name: str) -> FeatureLocation | None:
4747+ for loc in self.known_feature_locations:
4848+ if loc.name == feature_name:
4949+ return loc
5050+ return None
5151+5252+ def format_features_for_llm(self) -> str:
5353+ lines = ["# Available Features\n"]
5454+ lines.append("These features are already extracted and available in rules:\n")
5555+5656+ by_type: dict[str, list[str]] = {}
5757+ for name, typ in sorted(self.feature_name_to_value_type_mapping.items()):
5858+ by_type.setdefault(typ, []).append(name)
5959+6060+ for typ, names in sorted(by_type.items()):
6161+ lines.append(f"\n## Type: `{typ}`")
6262+ for name in sorted(names):
6363+ lines.append(f"- {name}")
6464+6565+ return "\n".join(lines)
6666+6767+ def format_labels_for_llm(self) -> str:
6868+ lines = ["# Available Labels\n"]
6969+ lines.append(
7070+ "You can add new labels if you feel none of the current ones fit, but be sure to update the config."
7171+ )
7272+7373+ for name, info in sorted(self.label_info_mapping.items()):
7474+ valid_for = ", ".join(info.valid_for)
7575+ lines.append(f"- `{name}`: {info.description} (valid for: {valid_for})")
7676+7777+ return "\n".join(lines)
7878+7979+ def format_existing_rules_for_llm(self) -> str:
8080+ lines = ["# Existing Rules\n"]
8181+ lines.append("These rules already exist and can be referenced:\n")
8282+8383+ for name, desc in sorted(self.rule_info_mapping.items()):
8484+ lines.append(f"- `{name}`: {desc}")
8585+8686+ return "\n".join(lines)
8787+8888+ def format_feature_examples_for_llm(self, feature_names: list[str]) -> str:
8989+ lines = ["# Feature Definition Examples\n"]
9090+9191+ for name in feature_names:
9292+ loc = self.get_feature_examples(name)
9393+ if loc:
9494+ lines.append(f"## {name}")
9595+ lines.append(f"Source: `{loc.source_path}:{loc.source_line}`")
9696+ lines.append(f"```python\n{loc.source_snippet}\n```\n")
9797+9898+ return "\n".join(lines)