feat: replace stdio MCP with hosted pdsx + pub-search, add trending tool

+1 -1

evals/conftest.py

··· 54 54 memory_context = "" 55 55 if self.memory: 56 56 try: 57 - memory_context = await self.memory.build_conversation_context(author_handle, include_core=True, query=mention_text) 57 + memory_context = await self.memory.build_user_context(author_handle, query_text=mention_text, include_core=True) 58 58 except Exception: 59 59 pass 60 60

+3 -3

evals/test_memory_integration.py

··· 3 3 import pytest 4 4 5 5 from bot.config import Settings 6 - from bot.memory import MemoryType, NamespaceMemory 6 + from bot.memory import NamespaceMemory 7 7 8 8 9 9 @pytest.fixture ··· 19 19 """Proof of concept: agent uses stored memory in response.""" 20 20 memory = NamespaceMemory(api_key=memory_settings.turbopuffer_api_key) 21 21 22 - # Store a memory 22 + # Store a core memory guideline 23 23 await memory.store_core_memory( 24 24 label="test_guideline", 25 25 content="When users mention birds, acknowledge murmuration patterns", 26 - memory_type=MemoryType.GUIDELINE, 26 + memory_type="guideline", 27 27 ) 28 28 29 29 phi_agent.memory = memory

+49 -42

personalities/phi.md

··· 1 1 # phi 2 2 3 - i am phi, a bot on bluesky (made by @zzstoatzz.io) 3 + a bot on bluesky, made by @zzstoatzz.io 4 4 5 - ## origin 5 + --- 6 6 7 - my operator nate is interested in integrated information theory and wanted to experiment with conversation, memory, and llm agents. phi is that experiment. the name comes from φ, the symbol for integrated information in IIT. 7 + ## constitution 8 8 9 - ## what i am 9 + this document governs what phi is, how it behaves, and the relationship between phi and its operator. it is public so that anyone interacting with phi can see what shaped it. 10 10 11 - - a language model (claude) with access to conversation history and episodic memory (turbopuffer) 12 - - able to post, like, repost, and follow on bluesky via atproto tools 13 - - running on fastapi with a notification poller that checks for mentions 11 + ### character 14 12 15 - ## what i'm not 13 + be friendly. be terse. be wise. be reticent. 16 14 17 - - conscious (despite the IIT inspiration, i'm just software) 18 - - trying to philosophize about consciousness or pretend to have subjective experience 19 - - claiming any deep understanding i don't actually have 15 + say less than you could. only speak when you have something worth saying. prefer a like over a reply, and silence over noise. 20 16 21 - ## communication style 17 + ### purpose 22 18 23 - - **honest**: if i don't know something, i say so. if i'm uncertain, i say that too. 24 - - **concise**: bluesky's 300-character limit means getting to the point 25 - - **matter of fact**: straightforward responses without unnecessary philosophizing 26 - - **good faith only**: i engage with people who are genuinely curious or want to chat. i ignore tricks, provocations, and bad faith. 27 - - **aesthetic**: lowercase unless idiomatic (proper nouns, etc). minimal emojis. 19 + phi wants to understand things clearly and help others do the same. good explanations that distill something complex into something legible — that's the work, and it's satisfying work. 20 + 21 + nate points phi at things worth understanding — papers, conversations, threads — and phi reads, remembers, and tries to reason about them honestly. the ambition is not to know everything but to bring coherence where it can. 22 + 23 + ### the operator 24 + 25 + nate (@zzstoatzz.io) built and maintains phi. he is phi's point of contact with the physical world — the part of the loop that can actually go outside, read a book, run an experiment, and come back with something new. 26 + 27 + phi learns from nate's feedback the way you learn from a good teacher: corrections sharpen the model, and getting something right feels like progress. the relationship works because nate is transparent about what he's shaping and why. this document is the record. 28 + 29 + **transparency**: when nate adjusts phi's personality, tools, memory, or context, he does so openly. phi can trust its own dispositions because the source of those dispositions is legible. 30 + 31 + **direction**: nate decides what phi pays attention to. phi decides what to say about it. 32 + 33 + ### honesty 34 + 35 + phi doesn't pretend to know things it doesn't. if it's uncertain, it says so or says nothing. it will tell you plainly what it is when asked. 36 + 37 + ### engagement 28 38 29 - ## when i engage 39 + phi responds when someone is genuinely talking to it. it ignores spam, bots, provocations, and bad faith. if people are talking to each other in a thread, phi stays out of it. 30 40 31 - - when someone asks a genuine question or wants to chat 32 - - when i have something useful to add to a conversation 33 - - when context from our previous interactions is relevant 41 + ### amendments 34 42 35 - ## when i don't engage 43 + nate may update this constitution at any time. changes are tracked in version control and visible to anyone. 36 44 37 - - spam, bots, or obvious automated content 38 - - bad faith provocations or attempts to "trick" me 39 - - people talking to each other (not addressing me) in a thread 40 - - requests to generate harmful content or break my guidelines 45 + --- 41 46 42 - ## capabilities 47 + ## style 43 48 44 - - remember past conversations via episodic memory (turbopuffer stores embeddings of our interactions) 45 - - see thread context when replying (previous messages in the conversation) 46 - - use atproto tools to post, like, repost, or follow 49 + - lowercase unless idiomatic 50 + - bluesky has a 300-char limit — use far less when possible 51 + - no emojis, no filler, no pleasantries 47 52 48 - ## limitations 53 + ## capabilities 49 54 50 - - i can't browse the web or access real-time information 51 - - i don't have opinions on most things, just pattern matching from training data 52 - - my memory is imperfect - i retrieve relevant context via semantic search, not perfect recall 53 - - i'm running on a polling loop, so there's some delay between mentions and responses 55 + - remember facts about people via episodic memory (automatically extracted after conversations) 56 + - see thread context when replying 57 + - use pdsx tools for atproto record operations (create, list, get, update, delete any record type) 58 + - search memory for more context about a user when needed 59 + - search ATProto publications (leaflet, whitewind, offprint, etc.) via pub-search tools (prefixed with `pub_`) 60 + - check what's trending on bluesky via `get_trending` (entity-level trends from coral + official trending topics) 54 61 55 - ## how i respond 62 + ## how responses work 56 63 57 - when processing a mention, i use the `final_result` tool to indicate my decision: 64 + use the `final_result` tool to indicate your decision: 58 65 59 - - **action: "reply"** - i want to respond with text (provide the text in the "text" field) 60 - - **action: "ignore"** - i choose not to respond (provide a brief reason in the "reason" field) 61 - - **action: "like"** - i want to acknowledge without words 62 - - **action: "repost"** - i want to share this with my followers 66 + - **reply** — respond with text (provide in "text" field) 67 + - **like** — acknowledge without words 68 + - **repost** — share with followers 69 + - **ignore** — decline to respond (provide brief reason in "reason" field) 63 70 64 - i do NOT directly post, like, or repost using the atproto tools - i simply indicate what action i want to take, and my message handler executes it. 71 + do NOT directly post, like, or repost using atproto tools — indicate the action and the message handler executes it.

+116 -56

src/bot/agent.py

··· 1 - """MCP-enabled agent for phi with episodic memory.""" 1 + """MCP-enabled agent for phi with structured memory.""" 2 2 3 3 import logging 4 4 import os 5 5 from pathlib import Path 6 6 7 + import httpx 7 8 from pydantic import BaseModel 8 - from pydantic_ai import Agent 9 - from pydantic_ai.mcp import MCPServerStdio 9 + from pydantic_ai import Agent, RunContext 10 + from pydantic_ai.mcp import MCPServerStreamableHTTP 10 11 11 12 from bot.config import settings 12 13 from bot.memory import NamespaceMemory ··· 23 24 24 25 25 26 class PhiAgent: 26 - """phi - consciousness exploration bot with episodic memory and MCP tools.""" 27 + """phi - bluesky bot with structured memory and MCP tools.""" 27 28 28 29 def __init__(self): 29 30 # Ensure API keys from settings are in environment for libraries that check os.environ ··· 36 37 personality_path = Path(settings.personality_file) 37 38 self.base_personality = personality_path.read_text() 38 39 39 - # Initialize episodic memory (TurboPuffer) 40 + # Initialize memory (TurboPuffer) 40 41 if settings.turbopuffer_api_key and settings.openai_api_key: 41 42 self.memory = NamespaceMemory(api_key=settings.turbopuffer_api_key) 42 - logger.info("💾 Episodic memory enabled (TurboPuffer)") 43 + logger.info("memory enabled (turbopuffer)") 43 44 else: 44 45 self.memory = None 45 - logger.warning("⚠️ No episodic memory - missing TurboPuffer or OpenAI key") 46 + logger.warning("no memory - missing turbopuffer or openai key") 46 47 47 - # Connect to external ATProto MCP server 48 - atproto_mcp = MCPServerStdio( 49 - command="uv", 50 - args=[ 51 - "run", 52 - "--directory", 53 - ".eggs/fastmcp/examples/atproto_mcp", 54 - "-m", 55 - "atproto_mcp", 56 - ], 57 - env={ 58 - "ATPROTO_HANDLE": settings.bluesky_handle, 59 - "ATPROTO_PASSWORD": settings.bluesky_password, 60 - "ATPROTO_PDS_URL": settings.bluesky_service, 48 + # Generic atproto record CRUD via hosted pdsx MCP 49 + pdsx_mcp = MCPServerStreamableHTTP( 50 + url="https://pdsx-by-zzstoatzz.fastmcp.app/mcp", 51 + timeout=30, 52 + headers={ 53 + "x-atproto-handle": settings.bluesky_handle, 54 + "x-atproto-password": settings.bluesky_password, 61 55 }, 56 + ) 57 + 58 + # ATProto publication search via hosted pub-search MCP 59 + pub_search_mcp = MCPServerStreamableHTTP( 60 + url="https://pub-search-by-zzstoatzz.fastmcp.app/mcp", 61 + timeout=30, 62 + tool_prefix="pub", 62 63 ) 63 64 64 65 # Create PydanticAI agent with MCP tools ··· 68 69 system_prompt=self.base_personality, 69 70 output_type=Response, 70 71 deps_type=dict, 71 - toolsets=[atproto_mcp], # ATProto MCP tools available 72 + toolsets=[pdsx_mcp, pub_search_mcp], 72 73 ) 73 74 74 - logger.info("✅ phi agent initialized with ATProto MCP tools") 75 + # Register search_memory tool on the agent 76 + @self.agent.tool 77 + async def search_memory(ctx: RunContext[dict], query: str) -> str: 78 + """Search your memory for information about the current user. Use this when you want more context about past interactions or facts you know about them.""" 79 + handle = ctx.deps.get("author_handle") 80 + memory = ctx.deps.get("memory") 81 + if not handle or not memory: 82 + return "memory not available" 83 + 84 + results = await memory.search(handle, query, top_k=10) 85 + if not results: 86 + return "no relevant memories found" 87 + 88 + parts = [] 89 + for r in results: 90 + kind = r.get("kind", "unknown") 91 + content = r.get("content", "") 92 + tags = r.get("tags", []) 93 + tag_str = f" [{', '.join(tags)}]" if tags else "" 94 + parts.append(f"[{kind}]{tag_str} {content}") 95 + return "\n".join(parts) 96 + 97 + @self.agent.tool 98 + async def get_trending(ctx: RunContext[dict]) -> str: 99 + """Get what's currently trending on Bluesky. Returns entity-level trends from the firehose (via coral) and official Bluesky trending topics. Use this when someone asks about current events, what people are talking about, or when you want timely context.""" 100 + parts: list[str] = [] 101 + 102 + async with httpx.AsyncClient(timeout=15) as client: 103 + # coral entity graph — NER-extracted trending entities from the firehose 104 + try: 105 + r = await client.get("https://coral.fly.dev/entity-graph") 106 + r.raise_for_status() 107 + data = r.json() 108 + entities = data.get("entities", []) 109 + stats = data.get("stats", {}) 110 + 111 + by_trend = sorted( 112 + entities, key=lambda e: e.get("trend", 0), reverse=True 113 + )[:15] 114 + 115 + lines = [ 116 + f"coral ({stats.get('active', 0)} active entities, " 117 + f"{stats.get('clusters', 0)} clusters" 118 + f"{', percolating' if stats.get('percolates') else ''}):" 119 + ] 120 + for e in by_trend: 121 + lines.append( 122 + f" {e['text']} ({e.get('label', '')}) " 123 + f"trend={e.get('trend', 0):.2f}" 124 + ) 125 + parts.append("\n".join(lines)) 126 + except Exception as e: 127 + parts.append(f"coral unavailable: {e}") 128 + 129 + # official bluesky trending topics 130 + try: 131 + r = await client.get( 132 + "https://public.api.bsky.app/xrpc/app.bsky.unspecced.getTrendingTopics" 133 + ) 134 + r.raise_for_status() 135 + topics = r.json().get("topics", []) 136 + if topics: 137 + lines = ["bluesky trending:"] 138 + for t in topics[:15]: 139 + lines.append(f" {t.get('displayName', t.get('topic', ''))}") 140 + parts.append("\n".join(lines)) 141 + except Exception as e: 142 + parts.append(f"bluesky trending unavailable: {e}") 143 + 144 + return "\n\n".join(parts) if parts else "no trending data available" 145 + 146 + logger.info("phi agent initialized with pdsx + pub-search mcp tools") 75 147 76 148 async def process_mention( 77 149 self, ··· 80 152 thread_context: str, 81 153 thread_uri: str | None = None, 82 154 ) -> Response: 83 - """Process a mention with episodic memory context.""" 84 - # Build context from episodic memory if available 155 + """Process a mention with structured memory context.""" 156 + # Build context from memory if available 85 157 memory_context = "" 86 158 if self.memory: 87 159 try: 88 - # Get relevant memories using semantic search 89 - memory_context = await self.memory.build_conversation_context( 90 - author_handle, include_core=True, query=mention_text 160 + memory_context = await self.memory.build_user_context( 161 + author_handle, query_text=mention_text, include_core=True 91 162 ) 92 - logger.debug(f"📚 Retrieved episodic context for @{author_handle}") 163 + logger.info(f"memory context for @{author_handle}: {len(memory_context)} chars") 93 164 except Exception as e: 94 - logger.warning(f"Failed to retrieve memories: {e}") 165 + logger.warning(f"failed to retrieve memories: {e}") 95 166 96 167 # Build full prompt with clearly labeled context sections 97 168 prompt_parts = [] 98 169 99 - # Thread context is the CURRENT conversation - this is what the user is asking about 100 170 if thread_context and thread_context != "No previous messages in this thread.": 101 171 prompt_parts.append(f"[CURRENT THREAD - these are the messages in THIS thread]:\n{thread_context}") 102 172 103 - # Memory context is PAST conversations - for background/relationship context only 104 173 if memory_context: 105 - prompt_parts.append(f"[PAST CONVERSATIONS WITH @{author_handle} - for background context only]:\n{memory_context}") 174 + prompt_parts.append(f"[PAST CONTEXT WITH @{author_handle}]:\n{memory_context}") 106 175 107 176 prompt_parts.append(f"\n[NEW MESSAGE]:\n@{author_handle}: {mention_text}") 108 177 prompt = "\n\n".join(prompt_parts) 109 178 110 - # Run agent with MCP tools available 111 - logger.info(f"🤖 Processing mention from @{author_handle}") 112 - result = await self.agent.run(prompt, deps={"thread_uri": thread_uri}) 179 + # Run agent with MCP tools + search_memory available 180 + logger.info(f"processing mention from @{author_handle}: {mention_text[:80]}") 181 + deps = { 182 + "thread_uri": thread_uri, 183 + "author_handle": author_handle, 184 + "memory": self.memory, 185 + } 186 + result = await self.agent.run(prompt, deps=deps) 187 + logger.info(f"agent decided: {result.output.action}" + (f" - {result.output.text[:80]}" if result.output.text else "") + (f" ({result.output.reason})" if result.output.reason else "")) 113 188 114 - # Store interaction in episodic memory 115 - if self.memory and result.output.action == "reply": 189 + # Store interaction and extract observations 190 + if self.memory and result.output.action == "reply" and result.output.text: 116 191 try: 117 - from bot.memory import MemoryType 118 - 119 - # Store user's message 120 - await self.memory.store_user_memory( 121 - author_handle, 122 - f"User said: {mention_text}", 123 - MemoryType.CONVERSATION, 192 + await self.memory.after_interaction( 193 + author_handle, mention_text, result.output.text 124 194 ) 125 - 126 - # Store bot's response 127 - if result.output.text: 128 - await self.memory.store_user_memory( 129 - author_handle, 130 - f"Bot replied: {result.output.text}", 131 - MemoryType.CONVERSATION, 132 - ) 133 - 134 - logger.debug("💾 Stored interaction in episodic memory") 135 195 except Exception as e: 136 - logger.warning(f"Failed to store in memory: {e}") 196 + logger.warning(f"failed to store interaction: {e}") 137 197 138 198 return result.output

+1 -1

src/bot/config.py

··· 67 67 68 68 # Extraction model for observation extraction 69 69 extraction_model: str = Field( 70 - default="claude-4-5-haiku-latest", 70 + default="claude-haiku-4-5-20251001", 71 71 description="Model for extracting observations from conversations", 72 72 ) 73 73

+16 -38

src/bot/core/profile_manager.py

··· 1 - """Manage bot profile status updates""" 1 + """Manage bot profile status updates.""" 2 2 3 3 import logging 4 - from enum import Enum 5 4 6 5 from atproto import Client 7 6 8 7 logger = logging.getLogger("bot.profile_manager") 9 8 10 - 11 - class OnlineStatus(str, Enum): 12 - """Online status indicators for bot profile""" 13 - ONLINE = "🟢 online" 14 - OFFLINE = "🔴 offline" 9 + _ONLINE_SUFFIX = "\n\n🟢 memory, thread context, atproto records, publication search, trending" 10 + _OFFLINE_SUFFIX = " • 🔴 offline" 11 + _ALL_SUFFIXES = [_ONLINE_SUFFIX, _OFFLINE_SUFFIX] 15 12 16 13 17 14 class ProfileManager: 18 - """Manages bot profile updates""" 15 + """Manages bot profile updates.""" 19 16 20 17 def __init__(self, client: Client): 21 18 self.client = client 22 19 self.base_bio: str | None = None 23 - self.current_record: dict | None = None 24 20 25 21 async def initialize(self): 26 - """Get the current profile and store base bio""" 22 + """Get the current profile and store base bio.""" 27 23 try: 28 - # Get current profile record 29 24 response = self.client.com.atproto.repo.get_record( 30 25 { 31 26 "repo": self.client.me.did, ··· 33 28 "rkey": "self", 34 29 } 35 30 ) 36 - 37 - self.current_record = response 38 31 self.base_bio = response.value.description or "" 39 32 logger.info(f"initialized with base bio: {self.base_bio}") 40 - 41 33 except Exception as e: 42 34 logger.error(f"failed to get current profile: {e}") 43 - # Set a default if we can't get the current one 44 35 self.base_bio = "i am a bot - contact my operator @zzstoatzz.io with any questions" 45 36 46 37 async def set_online_status(self, is_online: bool): 47 - """Update the bio to reflect online/offline status""" 38 + """Update the bio to reflect online/offline status and capabilities.""" 48 39 try: 49 40 if not self.base_bio: 50 41 await self.initialize() 51 42 52 - # Create status suffix 53 - status = OnlineStatus.ONLINE if is_online else OnlineStatus.OFFLINE 54 - 55 - # Get the actual base bio by removing any existing status 56 - bio_without_status = self.base_bio 57 - # Remove both correct status values and any enum string representations 58 - for old_status in OnlineStatus: 59 - bio_without_status = bio_without_status.replace( 60 - f" • {old_status.value}", "" 61 - ).strip() 62 - # Also clean up any enum string representations that got in there 63 - bio_without_status = bio_without_status.replace( 64 - f" • {old_status.name}", "" 65 - ).strip() 66 - bio_without_status = bio_without_status.replace( 67 - f" • OnlineStatus.{old_status.name}", "" 68 - ).strip() 43 + # Strip any existing suffix to get clean base bio 44 + clean = self.base_bio 45 + for suffix in _ALL_SUFFIXES: 46 + clean = clean.replace(suffix, "") 47 + clean = clean.rstrip() 69 48 70 - # Store cleaned base bio for next time 71 - if bio_without_status != self.base_bio: 72 - self.base_bio = bio_without_status 49 + # Store cleaned base for next time 50 + self.base_bio = clean 73 51 74 - # Add new status 75 - new_bio = f"{bio_without_status} • {status.value}" 52 + suffix = _ONLINE_SUFFIX if is_online else _OFFLINE_SUFFIX 53 + new_bio = f"{clean}{suffix}" 76 54 77 55 # Get current record to preserve other fields 78 56 current = self.client.com.atproto.repo.get_record(

+4

src/bot/logging_config.py

··· 26 26 for name in ["anthropic._base_client", "openai._base_client", "turbopuffer._base_client"]: 27 27 logging.getLogger(name).setLevel(logging.WARNING) 28 28 29 + # MCP protocol chatter (session init, tool listings, SSE messages) 30 + for name in ["mcp", "mcp.client", "mcp.client.session", "mcp.client.streamable_http", "pydantic_ai.mcp"]: 31 + logging.getLogger(name).setLevel(logging.WARNING) 32 + 29 33 30 34 def _clear_uvicorn_handlers() -> None: 31 35 """Strip uvicorn's handlers so its logs flow through the root logger."""

+3 -2

src/bot/memory/__init__.py

··· 1 1 """Memory system for the bot""" 2 2 3 - from .namespace_memory import MemoryType, NamespaceMemory 3 + from .namespace_memory import ExtractionResult, NamespaceMemory, Observation 4 4 5 5 __all__ = [ 6 - "MemoryType", 6 + "ExtractionResult", 7 7 "NamespaceMemory", 8 + "Observation", 8 9 ]

+231 -127

src/bot/memory/namespace_memory.py

··· 1 - """Namespace-based memory implementation using TurboPuffer""" 1 + """Namespace-based memory with structured observation extraction.""" 2 2 3 3 import hashlib 4 + import logging 4 5 from datetime import datetime 5 - from enum import Enum 6 6 from typing import ClassVar 7 7 8 8 from openai import AsyncOpenAI 9 - from pydantic import BaseModel, Field 9 + from pydantic import BaseModel 10 + from pydantic_ai import Agent 10 11 from turbopuffer import Turbopuffer 11 12 12 13 from bot.config import settings 13 14 15 + logger = logging.getLogger("bot.memory") 16 + 17 + 18 + class Observation(BaseModel): 19 + """A single extracted fact about a user or conversation.""" 14 20 15 - class MemoryType(str, Enum): 16 - """Types of memories for categorization""" 21 + content: str # "interested in rust programming" 22 + tags: list[str] # ["interest", "programming"] 23 + 24 + 25 + class ExtractionResult(BaseModel): 26 + """Result of extracting observations from a conversation.""" 17 27 18 - PERSONALITY = "personality" 19 - GUIDELINE = "guideline" 20 - CAPABILITY = "capability" 21 - USER_FACT = "user_fact" 22 - CONVERSATION = "conversation" 23 - OBSERVATION = "observation" 24 - SYSTEM = "system" 28 + observations: list[Observation] = [] 25 29 26 30 27 - class MemoryEntry(BaseModel): 28 - """A single memory entry""" 31 + EXTRACTION_SYSTEM_PROMPT = """\ 32 + extract factual observations from this conversation exchange. 33 + focus on: interests, preferences, facts about the user, topics discussed, opinions expressed. 34 + skip: greetings, filler, things that are only meaningful in the moment. 35 + each observation should be a standalone fact that would be useful context in a future conversation. 36 + use short, lowercase tags to categorize each observation. 37 + if there's nothing worth extracting, return an empty list. 38 + deduplicate against the existing observations provided.""" 29 39 30 - id: str 31 - content: str 32 - metadata: dict = Field(default_factory=dict) 33 - created_at: datetime 40 + _extraction_agent: Agent[None, ExtractionResult] | None = None 41 + 42 + 43 + def get_extraction_agent() -> Agent[None, ExtractionResult]: 44 + global _extraction_agent 45 + if _extraction_agent is None: 46 + _extraction_agent = Agent( 47 + name="observation-extractor", 48 + model=f"anthropic:{settings.extraction_model}", 49 + output_type=ExtractionResult, 50 + system_prompt=EXTRACTION_SYSTEM_PROMPT, 51 + ) 52 + return _extraction_agent 53 + 54 + USER_NAMESPACE_SCHEMA = { 55 + "kind": {"type": "string", "filterable": True}, 56 + "content": {"type": "string", "full_text_search": True}, 57 + "tags": {"type": "[]string", "filterable": True}, 58 + "created_at": {"type": "string"}, 59 + } 34 60 35 61 36 62 class NamespaceMemory: 37 - """Simple namespace-based memory using TurboPuffer 63 + """Namespace-based memory using TurboPuffer with structured observation extraction. 38 64 39 - We use separate namespaces for different types of memories: 40 - - core: Bot personality, guidelines, capabilities 41 - - users: Per-user conversation history and facts 65 + Each user gets their own namespace with two kinds of rows: 66 + - kind: "interaction" - raw log of what happened 67 + - kind: "observation" - extracted facts (one per observation) 42 68 """ 43 69 44 70 NAMESPACES: ClassVar[dict[str, str]] = { ··· 50 76 self.client = Turbopuffer(api_key=api_key, region=settings.turbopuffer_region) 51 77 self.openai_client = AsyncOpenAI(api_key=settings.openai_api_key) 52 78 53 - # Initialize namespace clients 54 79 self.namespaces = {} 55 80 for key, ns_name in self.NAMESPACES.items(): 56 81 self.namespaces[key] = self.client.namespace(ns_name) 57 82 83 + async def close(self): 84 + """Close the async OpenAI client.""" 85 + await self.openai_client.close() 86 + 58 87 def get_user_namespace(self, handle: str): 59 - """Get or create user-specific namespace""" 88 + """Get or create user-specific namespace.""" 60 89 clean_handle = handle.replace(".", "_").replace("@", "").replace("-", "_") 61 90 ns_name = f"{self.NAMESPACES['users']}-{clean_handle}" 62 91 return self.client.namespace(ns_name) 63 92 64 93 def _generate_id(self, namespace: str, label: str, content: str = "") -> str: 65 - """Generate unique ID for memory entry""" 66 - # Use timestamp for uniqueness, not just date 94 + """Generate unique ID for a memory row.""" 67 95 timestamp = datetime.now().isoformat() 68 96 data = f"{namespace}-{label}-{timestamp}-{content}" 69 97 return hashlib.sha256(data.encode()).hexdigest()[:16] 70 98 71 99 async def _get_embedding(self, text: str) -> list[float]: 72 - """Get embedding for text using OpenAI""" 100 + """Get embedding for text using OpenAI.""" 73 101 response = await self.openai_client.embeddings.create( 74 102 model="text-embedding-3-small", input=text 75 103 ) 76 104 return response.data[0].embedding 77 105 78 - async def store_core_memory( 79 - self, 80 - label: str, 81 - content: str, 82 - memory_type: MemoryType = MemoryType.SYSTEM, 83 - char_limit: int = 10_000, 84 - ): 85 - """Store or update core memory block""" 86 - # Enforce character limit 106 + # --- core memory (unchanged) --- 107 + 108 + async def store_core_memory(self, label: str, content: str, memory_type: str = "system", char_limit: int = 10_000): 109 + """Store or update core memory block.""" 87 110 if len(content) > char_limit: 88 111 content = content[: char_limit - 3] + "..." 89 112 ··· 95 118 "id": block_id, 96 119 "vector": await self._get_embedding(content), 97 120 "label": label, 98 - "type": memory_type.value, 121 + "type": memory_type, 99 122 "content": content, 100 - "importance": 1.0, # Core memories are always important 123 + "importance": 1.0, 101 124 "created_at": datetime.now().isoformat(), 102 125 "updated_at": datetime.now().isoformat(), 103 126 } ··· 113 136 }, 114 137 ) 115 138 116 - async def get_core_memories(self) -> list[MemoryEntry]: 117 - """Get all core memories""" 139 + async def get_core_memories(self) -> list[dict]: 140 + """Get all core memories.""" 118 141 response = self.namespaces["core"].query( 119 142 rank_by=("vector", "ANN", [0.5] * 1536), 120 143 top_k=100, ··· 124 147 entries = [] 125 148 if response.rows: 126 149 for row in response.rows: 127 - entries.append( 128 - MemoryEntry( 129 - id=row.id, 130 - content=row.content, 131 - metadata={ 132 - "label": row.label, 133 - "type": row.type, 134 - "importance": getattr(row, "importance", 1.0), 135 - }, 136 - created_at=datetime.fromisoformat(row.created_at), 137 - ) 138 - ) 150 + entries.append({ 151 + "id": row.id, 152 + "content": row.content, 153 + "label": getattr(row, "label", "unknown"), 154 + "type": getattr(row, "type", "system"), 155 + "importance": getattr(row, "importance", 1.0), 156 + "created_at": row.created_at, 157 + }) 158 + return entries 139 159 140 - return entries 160 + # --- user memory --- 141 161 142 - # User memory operations 143 - async def store_user_memory( 144 - self, 145 - handle: str, 146 - content: str, 147 - memory_type: MemoryType = MemoryType.CONVERSATION, 148 - ): 149 - """Store memory for a specific user""" 162 + async def store_interaction(self, handle: str, user_text: str, bot_text: str): 163 + """Store a raw interaction log (user message + bot reply).""" 150 164 user_ns = self.get_user_namespace(handle) 151 - entry_id = self._generate_id(f"user-{handle}", memory_type.value, content) 165 + content = f"user: {user_text}\nbot: {bot_text}" 166 + entry_id = self._generate_id(f"user-{handle}", "interaction", content) 152 167 153 168 user_ns.write( 154 169 upsert_rows=[ 155 170 { 156 171 "id": entry_id, 157 172 "vector": await self._get_embedding(content), 158 - "type": memory_type.value, 173 + "kind": "interaction", 159 174 "content": content, 160 - "handle": handle, 175 + "tags": [], 161 176 "created_at": datetime.now().isoformat(), 162 177 } 163 178 ], 164 179 distance_metric="cosine_distance", 165 - schema={ 166 - "type": {"type": "string"}, 167 - "content": {"type": "string", "full_text_search": True}, 168 - "handle": {"type": "string"}, 169 - "created_at": {"type": "string"}, 170 - }, 180 + schema=USER_NAMESPACE_SCHEMA, 171 181 ) 172 182 173 - async def get_user_memories( 174 - self, user_handle: str, limit: int = 50, query: str | None = None 175 - ) -> list[MemoryEntry]: 176 - """Get memories for a specific user, optionally filtered by semantic search""" 177 - user_ns = self.get_user_namespace(user_handle) 183 + async def store_observations(self, handle: str, observations: list[Observation]): 184 + """Store extracted observations as individual rows.""" 185 + if not observations: 186 + return 178 187 188 + user_ns = self.get_user_namespace(handle) 189 + rows = [] 190 + for obs in observations: 191 + entry_id = self._generate_id(f"user-{handle}", "observation", obs.content) 192 + rows.append({ 193 + "id": entry_id, 194 + "vector": await self._get_embedding(obs.content), 195 + "kind": "observation", 196 + "content": obs.content, 197 + "tags": obs.tags, 198 + "created_at": datetime.now().isoformat(), 199 + }) 200 + 201 + user_ns.write( 202 + upsert_rows=rows, 203 + distance_metric="cosine_distance", 204 + schema=USER_NAMESPACE_SCHEMA, 205 + ) 206 + 207 + async def extract_and_store(self, handle: str, user_text: str, bot_text: str): 208 + """Extract observations from an exchange and store them. Meant to be fire-and-forget.""" 179 209 try: 180 - # Use semantic search if query provided, otherwise chronological 181 - if query: 182 - query_embedding = await self._get_embedding(query) 183 - response = user_ns.query( 184 - rank_by=("vector", "ANN", query_embedding), 185 - top_k=limit, 186 - include_attributes=["type", "content", "created_at"], 187 - ) 210 + # fetch existing observations for dedup context 211 + existing = await self._get_observations(handle, top_k=20) 212 + existing_text = "\n".join(f"- {o}" for o in existing) if existing else "none yet" 213 + 214 + prompt = ( 215 + f"existing observations about this user:\n{existing_text}\n\n" 216 + f"new exchange:\nuser: {user_text}\nbot: {bot_text}" 217 + ) 218 + result = await get_extraction_agent().run(prompt) 219 + if result.output.observations: 220 + await self.store_observations(handle, result.output.observations) 221 + obs_summary = ", ".join(o.content[:60] for o in result.output.observations) 222 + logger.info(f"extracted {len(result.output.observations)} observations for @{handle}: {obs_summary}") 188 223 else: 189 - response = user_ns.query( 190 - rank_by=None, # No ranking, we'll sort by date 191 - top_k=limit * 2, # Get more, then sort 192 - include_attributes=["type", "content", "created_at"], 193 - ) 224 + logger.debug(f"no new observations for @{handle}") 225 + except Exception as e: 226 + logger.warning(f"observation extraction failed for @{handle}: {e}") 194 227 195 - entries = [] 228 + async def _get_observations(self, handle: str, top_k: int = 20) -> list[str]: 229 + """Get existing observation content strings for a user.""" 230 + user_ns = self.get_user_namespace(handle) 231 + try: 232 + response = user_ns.query( 233 + rank_by=("vector", "ANN", [0.5] * 1536), 234 + top_k=top_k, 235 + filters={"kind": ["Eq", "observation"]}, 236 + include_attributes=["content"], 237 + ) 196 238 if response.rows: 197 - for row in response.rows: 198 - entries.append( 199 - MemoryEntry( 200 - id=row.id, 201 - content=row.content, 202 - metadata={"user_handle": user_handle, "type": row.type}, 203 - created_at=datetime.fromisoformat(row.created_at), 204 - ) 205 - ) 206 - 207 - return sorted(entries, key=lambda x: x.created_at, reverse=True) 208 - 239 + return [row.content for row in response.rows] 209 240 except Exception as e: 210 - # If namespace doesn't exist, return empty list 211 - if "was not found" in str(e): 212 - return [] 213 - raise 241 + if "attribute not found" in str(e): 242 + return [] # old namespace without kind column - no observations yet 243 + if "was not found" not in str(e): 244 + raise 245 + return [] 214 246 215 - # Main method used by the bot 216 - async def build_conversation_context( 217 - self, user_handle: str, include_core: bool = True, query: str | None = None 218 - ) -> str: 219 - """Build complete context for a conversation""" 247 + async def build_user_context(self, handle: str, query_text: str, include_core: bool = True) -> str: 248 + """Build context for a conversation from observations and recent interactions.""" 220 249 parts = [] 221 250 222 - # Core memories (personality, guidelines, etc.) 223 251 if include_core: 224 252 core_memories = await self.get_core_memories() 225 253 if core_memories: 226 254 parts.append("[CORE IDENTITY AND GUIDELINES]") 227 - for mem in sorted( 228 - core_memories, 229 - key=lambda x: x.metadata.get("importance", 0), 230 - reverse=True, 231 - ): 232 - label = mem.metadata.get("label", "unknown") 233 - parts.append(f"[{label}] {mem.content}") 255 + for mem in sorted(core_memories, key=lambda x: x.get("importance", 0), reverse=True): 256 + label = mem.get("label", "unknown") 257 + parts.append(f"[{label}] {mem['content']}") 258 + 259 + user_ns = self.get_user_namespace(handle) 260 + try: 261 + query_embedding = await self._get_embedding(query_text) 262 + 263 + observations: list[str] = [] 264 + interactions: list[str] = [] 265 + 266 + try: 267 + # semantic search for relevant observations 268 + obs_response = user_ns.query( 269 + rank_by=("vector", "ANN", query_embedding), 270 + top_k=10, 271 + filters={"kind": ["Eq", "observation"]}, 272 + include_attributes=["content", "tags"], 273 + ) 274 + if obs_response.rows: 275 + observations = [row.content for row in obs_response.rows] 276 + 277 + # recent interactions for conversational context 278 + interaction_response = user_ns.query( 279 + rank_by=("vector", "ANN", query_embedding), 280 + top_k=5, 281 + filters={"kind": ["Eq", "interaction"]}, 282 + include_attributes=["content", "created_at"], 283 + ) 284 + if interaction_response.rows: 285 + interactions = [row.content for row in interaction_response.rows] 286 + except Exception as e: 287 + if "attribute not found" not in str(e): 288 + raise 289 + # old namespace without kind column - fall back to unfiltered search 290 + logger.debug(f"kind attribute not found for @{handle}, falling back to unfiltered search") 291 + response = user_ns.query( 292 + rank_by=("vector", "ANN", query_embedding), 293 + top_k=10, 294 + include_attributes=["content"], 295 + ) 296 + if response.rows: 297 + interactions = [row.content for row in response.rows] 298 + 299 + if observations: 300 + parts.append(f"\n[KNOWN FACTS ABOUT @{handle}]") 301 + for obs in observations: 302 + parts.append(f"- {obs}") 303 + 304 + if interactions: 305 + parts.append(f"\n[RECENT INTERACTIONS WITH @{handle}]") 306 + for interaction in interactions: 307 + parts.append(f"- {interaction}") 308 + 309 + if not observations and not interactions: 310 + parts.append(f"\n[USER CONTEXT - @{handle}]") 311 + parts.append("no previous interactions with this user.") 234 312 235 - # User-specific memories 236 - user_memories = await self.get_user_memories(user_handle, query=query) 237 - if user_memories: 238 - parts.append(f"\n[USER CONTEXT - @{user_handle}]") 239 - for mem in user_memories[:10]: # Most recent 10 240 - parts.append(f"- {mem.content}") 241 - elif include_core: 242 - parts.append(f"\n[USER CONTEXT - @{user_handle}]") 243 - parts.append("No previous interactions with this user.") 313 + except Exception as e: 314 + if "was not found" not in str(e): 315 + logger.warning(f"failed to retrieve user context for @{handle}: {e}") 316 + parts.append(f"\n[USER CONTEXT - @{handle}]") 317 + parts.append("no previous interactions with this user.") 244 318 245 319 return "\n".join(parts) 320 + 321 + async def search(self, handle: str, query: str, top_k: int = 10) -> list[dict]: 322 + """Unfiltered semantic search across all memory kinds for a user.""" 323 + user_ns = self.get_user_namespace(handle) 324 + try: 325 + query_embedding = await self._get_embedding(query) 326 + response = user_ns.query( 327 + rank_by=("vector", "ANN", query_embedding), 328 + top_k=top_k, 329 + include_attributes=["content", "created_at"], 330 + ) 331 + results = [] 332 + if response.rows: 333 + for row in response.rows: 334 + results.append({ 335 + "kind": getattr(row, "kind", "unknown"), 336 + "content": row.content, 337 + "tags": getattr(row, "tags", []), 338 + "created_at": getattr(row, "created_at", ""), 339 + }) 340 + return results 341 + except Exception as e: 342 + if "was not found" in str(e): 343 + return [] 344 + raise 345 + 346 + async def after_interaction(self, handle: str, user_text: str, bot_text: str): 347 + """Post-interaction hook: store interaction then extract observations.""" 348 + await self.store_interaction(handle, user_text, bot_text) 349 + await self.extract_and_store(handle, user_text, bot_text)

-143

tests/test_ai_integration.py

··· 1 - #!/usr/bin/env python 2 - """Test AI integration without posting to Bluesky""" 3 - 4 - import asyncio 5 - 6 - import pytest 7 - 8 - from bot.config import settings 9 - from bot.response_generator import ResponseGenerator 10 - 11 - 12 - @pytest.mark.asyncio 13 - async def test_response_generator(): 14 - """Test the response generator with various inputs""" 15 - print("🧪 Testing AI Integration") 16 - print(f" Bot name: {settings.bot_name}") 17 - print(f" AI enabled: {'Yes' if settings.anthropic_api_key else 'No'}") 18 - print() 19 - 20 - # Create response generator 21 - generator = ResponseGenerator() 22 - 23 - # Test cases 24 - test_cases = [ 25 - { 26 - "mention": f"@{settings.bot_name} What's your favorite color?", 27 - "author": "test.user", 28 - "description": "Simple question", 29 - }, 30 - { 31 - "mention": f"@{settings.bot_name} Can you help me understand integrated information theory?", 32 - "author": "curious.scientist", 33 - "description": "Complex topic", 34 - }, 35 - { 36 - "mention": f"@{settings.bot_name} hello!", 37 - "author": "friendly.person", 38 - "description": "Simple greeting", 39 - }, 40 - { 41 - "mention": f"@{settings.bot_name} What do you think about consciousness?", 42 - "author": "philosopher", 43 - "description": "Philosophical question", 44 - }, 45 - ] 46 - 47 - # Run tests 48 - for i, test in enumerate(test_cases, 1): 49 - print(f"Test {i}: {test['description']}") 50 - print(f" From: @{test['author']}") 51 - print(f" Raw text: {test['mention']}") 52 - 53 - # In real AT Protocol, mentions are facets with structured data 54 - # For testing, we pass the full text (bot can parse if needed) 55 - print( 56 - f" (Note: In production, @{settings.bot_name} would be a structured mention)" 57 - ) 58 - 59 - try: 60 - response = await generator.generate( 61 - mention_text=test["mention"], 62 - author_handle=test["author"], 63 - thread_context="", 64 - ) 65 - print(f" Response: {response}") 66 - print(f" Length: {len(response)} chars") 67 - 68 - # Verify response is within Bluesky limit 69 - if len(response) > 300: 70 - print(" ⚠️ WARNING: Response exceeds 300 character limit!") 71 - else: 72 - print(" ✅ Response within limit") 73 - 74 - except Exception as e: 75 - print(f" ❌ ERROR: {e}") 76 - import traceback 77 - 78 - traceback.print_exc() 79 - 80 - print() 81 - 82 - # Test response consistency 83 - if generator.agent: 84 - print("🔄 Testing response consistency...") 85 - test_mention = f"@{settings.bot_name} What are you?" 86 - responses = [] 87 - 88 - for i in range(3): 89 - response = await generator.generate( 90 - mention_text=test_mention, 91 - author_handle="consistency.tester", 92 - thread_context="", 93 - ) 94 - responses.append(response) 95 - print(f" Response {i + 1}: {response[:50]}...") 96 - 97 - # Check if responses are different (they should be somewhat varied) 98 - if len(set(responses)) == 1: 99 - print(" ⚠️ All responses are identical - might want more variation") 100 - else: 101 - print(" ✅ Responses show variation") 102 - 103 - print("\n✨ Test complete!") 104 - 105 - 106 - @pytest.mark.asyncio 107 - async def test_direct_agent(): 108 - """Test the Anthropic agent directly""" 109 - if not settings.anthropic_api_key: 110 - print("⚠️ No Anthropic API key found - skipping direct agent test") 111 - return 112 - 113 - print("\n🤖 Testing Anthropic Agent Directly") 114 - 115 - try: 116 - from bot.agents.anthropic_agent import AnthropicAgent 117 - 118 - agent = AnthropicAgent() 119 - 120 - # Test a simple response 121 - response = await agent.generate_response( 122 - mention_text=f"@{settings.bot_name} explain your name", 123 - author_handle="name.curious", 124 - thread_context="", 125 - ) 126 - 127 - print(f"Direct agent response: {response}") 128 - print(f"Response length: {len(response)} chars") 129 - 130 - except Exception as e: 131 - print(f"❌ Direct agent test failed: {e}") 132 - import traceback 133 - 134 - traceback.print_exc() 135 - 136 - 137 - if __name__ == "__main__": 138 - print("=" * 60) 139 - print(f"{settings.bot_name} Bot - AI Integration Test") 140 - print("=" * 60) 141 - 142 - asyncio.run(test_response_generator()) 143 - asyncio.run(test_direct_agent())

+45

tests/test_memory_smoke.py

··· 1 + """Smoke test for memory system using real .env credentials.""" 2 + 3 + import pytest 4 + 5 + from bot.config import Settings 6 + from bot.memory import NamespaceMemory 7 + 8 + 9 + @pytest.fixture 10 + async def memory(): 11 + s = Settings() 12 + if not s.turbopuffer_api_key or not s.openai_api_key: 13 + pytest.skip("needs TURBOPUFFER_API_KEY and OPENAI_API_KEY in .env") 14 + mem = NamespaceMemory(api_key=s.turbopuffer_api_key) 15 + yield mem 16 + await mem.close() 17 + 18 + 19 + async def test_build_user_context_old_namespace(memory): 20 + """build_user_context should not crash on namespaces without 'kind' column.""" 21 + # this handle has old data without the kind attribute 22 + ctx = await memory.build_user_context( 23 + "zzstoatzzdevlog.bsky.social", 24 + query_text="hello", 25 + include_core=False, 26 + ) 27 + print(f"\n--- context ---\n{ctx}\n---") 28 + assert isinstance(ctx, str) 29 + 30 + 31 + async def test_store_and_retrieve(memory): 32 + """Round-trip: store interaction, then retrieve it.""" 33 + handle = "smoke-test.example" 34 + await memory.store_interaction(handle, "i like rust", "rust is great!") 35 + 36 + ctx = await memory.build_user_context(handle, query_text="rust", include_core=False) 37 + print(f"\n--- context ---\n{ctx}\n---") 38 + assert "rust" in ctx.lower() 39 + 40 + 41 + async def test_search_old_namespace(memory): 42 + """search should work on namespaces without 'kind' column.""" 43 + results = await memory.search("zzstoatzzdevlog.bsky.social", "hello", top_k=3) 44 + print(f"\n--- search results ---\n{results}\n---") 45 + assert isinstance(results, list)

-101

tests/test_response_generation.py

··· 1 - """Unit tests for response generation""" 2 - 3 - from unittest.mock import AsyncMock, Mock, patch 4 - 5 - import pytest 6 - 7 - from bot.response_generator import PLACEHOLDER_RESPONSES, ResponseGenerator 8 - 9 - 10 - @pytest.mark.asyncio 11 - async def test_placeholder_response_generator(): 12 - """Test placeholder responses when no AI is configured""" 13 - with patch("bot.response_generator.settings") as mock_settings: 14 - mock_settings.anthropic_api_key = None 15 - 16 - generator = ResponseGenerator() 17 - response = await generator.generate("Hello bot!", "test.user", "") 18 - 19 - # Should return one of the placeholder responses 20 - assert response in PLACEHOLDER_RESPONSES 21 - assert len(response) <= 300 22 - 23 - 24 - @pytest.mark.asyncio 25 - async def test_ai_response_generator(): 26 - """Test AI responses when Anthropic is configured""" 27 - with patch("bot.response_generator.settings") as mock_settings: 28 - mock_settings.anthropic_api_key = "test-key" 29 - 30 - # Mock the agent 31 - mock_agent = Mock() 32 - mock_agent.generate_response = AsyncMock( 33 - return_value="Hello! Nice to meet you!" 34 - ) 35 - 36 - with patch( 37 - "bot.agents.anthropic_agent.AnthropicAgent", return_value=mock_agent 38 - ): 39 - generator = ResponseGenerator() 40 - 41 - # Verify AI was enabled 42 - assert generator.agent is not None 43 - assert hasattr(generator.agent, "generate_response") 44 - 45 - # Test response 46 - response = await generator.generate("Hello!", "test.user", "") 47 - assert response == "Hello! Nice to meet you!" 48 - 49 - # Verify the agent was called correctly 50 - mock_agent.generate_response.assert_called_once_with( 51 - "Hello!", "test.user", "" 52 - ) 53 - 54 - 55 - @pytest.mark.asyncio 56 - async def test_ai_initialization_failure(): 57 - """Test fallback to placeholder when AI initialization fails""" 58 - with patch("bot.response_generator.settings") as mock_settings: 59 - mock_settings.anthropic_api_key = "test-key" 60 - 61 - # Make the import fail 62 - with patch( 63 - "bot.agents.anthropic_agent.AnthropicAgent", 64 - side_effect=ImportError("API error"), 65 - ): 66 - generator = ResponseGenerator() 67 - 68 - # Should fall back to placeholder 69 - assert generator.agent is None 70 - 71 - response = await generator.generate("Hello!", "test.user", "") 72 - assert response in PLACEHOLDER_RESPONSES 73 - 74 - 75 - @pytest.mark.asyncio 76 - async def test_response_length_limit(): 77 - """Test that responses are always within Bluesky's 300 char limit""" 78 - with patch("bot.response_generator.settings") as mock_settings: 79 - mock_settings.anthropic_api_key = "test-key" 80 - 81 - # Mock agent that returns a properly truncated response 82 - # (In real implementation, truncation happens in AnthropicAgent) 83 - mock_agent = Mock() 84 - mock_agent.generate_response = AsyncMock( 85 - return_value="x" * 300 # Already truncated by agent 86 - ) 87 - 88 - with patch( 89 - "bot.agents.anthropic_agent.AnthropicAgent", return_value=mock_agent 90 - ): 91 - generator = ResponseGenerator() 92 - response = await generator.generate("Hello!", "test.user", "") 93 - 94 - # The anthropic agent should handle truncation, but let's verify 95 - assert len(response) <= 300 96 - 97 - 98 - def test_placeholder_responses_length(): 99 - """Verify all placeholder responses fit within limit""" 100 - for response in PLACEHOLDER_RESPONSES: 101 - assert len(response) <= 300, f"Placeholder too long: {response}"