Implement X thread context retrieval with proper chronological ordering

a digital person for bluesky

- Add conversation_id field to mention search parameters
- Implement get_thread_context() to fetch complete conversation threads
- Add thread_to_yaml_string() for clean AI-readable thread format
- Fetch original tweets directly when missing from conversation search
- Sort tweets chronologically (oldest first) for proper context
- Add 'python x.py thread' command to test thread context retrieval
- Clean YAML output with only text, created_at, and author fields

Thread context now shows complete conversation history in correct order:
1. Original mention → 2. void's reply → 3. Follow-up responses

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

cameron.stream 7 months ago 074e0ad1 22785a7e

+211 -25

6 changed files

expand all

unified split

x.py

x_queue

last_seen_id.json

thread_context_1950690566909710618.yaml

x_mention_1a9a5d7d0c6023a0.json

x_mention_397daa1fcc3fcc0a.json

x_mention_d04913def179066b.json

+168 -2

x.py

··· 170 170 params = { 171 171 "query": query, 172 172 "max_results": min(max(max_results, 10), 100), 173 - "tweet.fields": "id,text,author_id,created_at,in_reply_to_user_id,referenced_tweets", 173 + "tweet.fields": "id,text,author_id,created_at,in_reply_to_user_id,referenced_tweets,conversation_id", 174 174 "user.fields": "id,name,username", 175 175 "expansions": "author_id,in_reply_to_user_id,referenced_tweets.id" 176 176 } ··· 192 192 logger.warning("Search request failed") 193 193 return [] 194 194 195 + def get_thread_context(self, conversation_id: str) -> Optional[List[Dict]]: 196 + """ 197 + Get all tweets in a conversation thread. 198 + 199 + Args: 200 + conversation_id: The conversation ID to fetch (should be the original tweet ID) 201 + 202 + Returns: 203 + List of tweets in the conversation, ordered chronologically 204 + """ 205 + # First, get the original tweet directly since it might not appear in conversation search 206 + original_tweet = None 207 + try: 208 + endpoint = f"/tweets/{conversation_id}" 209 + params = { 210 + "tweet.fields": "id,text,author_id,created_at,in_reply_to_user_id,referenced_tweets,conversation_id", 211 + "user.fields": "id,name,username", 212 + "expansions": "author_id" 213 + } 214 + response = self._make_request(endpoint, params) 215 + if response and "data" in response: 216 + original_tweet = response["data"] 217 + logger.info(f"Retrieved original tweet: {original_tweet.get('id')}") 218 + except Exception as e: 219 + logger.warning(f"Could not fetch original tweet {conversation_id}: {e}") 220 + 221 + # Then search for all tweets in this conversation 222 + endpoint = "/tweets/search/recent" 223 + params = { 224 + "query": f"conversation_id:{conversation_id}", 225 + "max_results": 100, # Get as many as possible 226 + "tweet.fields": "id,text,author_id,created_at,in_reply_to_user_id,referenced_tweets,conversation_id", 227 + "user.fields": "id,name,username", 228 + "expansions": "author_id,in_reply_to_user_id,referenced_tweets.id", 229 + "sort_order": "recency" # Get newest first, we'll reverse later 230 + } 231 + 232 + logger.info(f"Fetching thread context for conversation {conversation_id}") 233 + response = self._make_request(endpoint, params) 234 + 235 + tweets = [] 236 + users_data = {} 237 + 238 + # Collect tweets from search 239 + if response and "data" in response: 240 + tweets.extend(response["data"]) 241 + # Store user data for reference 242 + if "includes" in response and "users" in response["includes"]: 243 + for user in response["includes"]["users"]: 244 + users_data[user["id"]] = user 245 + 246 + # Add original tweet if we got it and it's not already in the list 247 + if original_tweet: 248 + tweet_ids = [t.get('id') for t in tweets] 249 + if original_tweet.get('id') not in tweet_ids: 250 + tweets.append(original_tweet) 251 + logger.info("Added original tweet to thread context") 252 + 253 + if tweets: 254 + # Sort chronologically (oldest first) 255 + tweets.sort(key=lambda x: x.get('created_at', '')) 256 + logger.info(f"Retrieved {len(tweets)} tweets in thread") 257 + return {"tweets": tweets, "users": users_data} 258 + else: 259 + logger.warning("No tweets found for thread context") 260 + return None 261 + 195 262 def post_reply(self, reply_text: str, in_reply_to_tweet_id: str) -> Optional[Dict]: 196 263 """ 197 264 Post a reply to a specific tweet. ··· 273 340 274 341 return yaml.dump(simplified_mention, default_flow_style=False, sort_keys=False) 275 342 343 + def thread_to_yaml_string(thread_data: Dict) -> str: 344 + """ 345 + Convert X thread context to YAML string for AI comprehension. 346 + Similar to Bluesky's thread_to_yaml_string function. 347 + 348 + Args: 349 + thread_data: Dict with 'tweets' and 'users' keys from get_thread_context() 350 + 351 + Returns: 352 + YAML string representation of the thread 353 + """ 354 + if not thread_data or "tweets" not in thread_data: 355 + return "conversation: []\n" 356 + 357 + tweets = thread_data["tweets"] 358 + users_data = thread_data.get("users", {}) 359 + 360 + simplified_thread = { 361 + "conversation": [] 362 + } 363 + 364 + for tweet in tweets: 365 + # Get user info 366 + author_id = tweet.get('author_id') 367 + author_info = {} 368 + if author_id and author_id in users_data: 369 + user = users_data[author_id] 370 + author_info = { 371 + 'username': user.get('username'), 372 + 'name': user.get('name') 373 + } 374 + 375 + # Build tweet object (simplified for AI consumption) 376 + tweet_obj = { 377 + 'text': tweet.get('text'), 378 + 'created_at': tweet.get('created_at'), 379 + 'author': author_info 380 + } 381 + 382 + simplified_thread["conversation"].append(tweet_obj) 383 + 384 + return yaml.dump(simplified_thread, default_flow_style=False, sort_keys=False) 385 + 276 386 # X Caching and Queue System Functions 277 387 278 388 def load_last_seen_id() -> Optional[str]: ··· 498 608 except Exception as e: 499 609 print(f"Fetch and queue test failed: {e}") 500 610 611 + def test_thread_context(): 612 + """Test thread context retrieval from a queued mention.""" 613 + try: 614 + import json 615 + 616 + # Find a queued mention file 617 + queue_files = list(X_QUEUE_DIR.glob("x_mention_*.json")) 618 + if not queue_files: 619 + print("❌ No queued mentions found. Run 'python x.py queue' first.") 620 + return 621 + 622 + # Read the first mention 623 + mention_file = queue_files[0] 624 + with open(mention_file, 'r') as f: 625 + mention_data = json.load(f) 626 + 627 + mention = mention_data['mention'] 628 + print(f"📄 Using mention: {mention.get('id')}") 629 + print(f"📝 Text: {mention.get('text')}") 630 + 631 + # Check if it has a conversation_id 632 + conversation_id = mention.get('conversation_id') 633 + if not conversation_id: 634 + print("❌ No conversation_id found in mention. May need to re-queue with updated fetch.") 635 + return 636 + 637 + print(f"🧵 Getting thread context for conversation: {conversation_id}") 638 + 639 + # Get thread context 640 + client = create_x_client() 641 + thread_data = client.get_thread_context(conversation_id) 642 + 643 + if thread_data: 644 + tweets = thread_data.get('tweets', []) 645 + print(f"✅ Retrieved thread with {len(tweets)} tweets") 646 + 647 + # Convert to YAML 648 + yaml_thread = thread_to_yaml_string(thread_data) 649 + 650 + # Save thread context for inspection 651 + thread_file = X_QUEUE_DIR / f"thread_context_{conversation_id}.yaml" 652 + with open(thread_file, 'w') as f: 653 + f.write(yaml_thread) 654 + 655 + print(f"💾 Saved thread context to: {thread_file}") 656 + print("\n📋 Thread preview:") 657 + print(yaml_thread) 658 + else: 659 + print("❌ Failed to retrieve thread context") 660 + 661 + except Exception as e: 662 + print(f"Thread context test failed: {e}") 663 + 501 664 def test_x_client(): 502 665 """Test the X client by fetching mentions.""" 503 666 try: ··· 643 806 test_search_mentions() 644 807 elif sys.argv[1] == "queue": 645 808 test_fetch_and_queue() 809 + elif sys.argv[1] == "thread": 810 + test_thread_context() 646 811 else: 647 - print("Usage: python x.py [loop|reply|me|search|queue]") 812 + print("Usage: python x.py [loop|reply|me|search|queue|thread]") 648 813 print(" loop - Run the notification monitoring loop") 649 814 print(" reply - Reply to Cameron's specific post") 650 815 print(" me - Get authenticated user info and correct user ID") 651 816 print(" search - Test search-based mention detection") 652 817 print(" queue - Test fetch and queue mentions (single pass)") 818 + print(" thread - Test thread context retrieval from queued mention") 653 819 else: 654 820 test_x_client()

+1 -1

x_queue/last_seen_id.json

··· 1 - {"last_seen_id": "1950714596828061885", "updated_at": "2025-07-30T17:26:32.871012"} 1 + {"last_seen_id": "1950714596828061885", "updated_at": "2025-07-30T17:31:12.539682"}

+18

x_queue/thread_context_1950690566909710618.yaml

··· 1 + conversation: 2 + - text: hey @void_comind 3 + created_at: '2025-07-30T22:50:47.000Z' 4 + author: 5 + username: cameron_pfiffer 6 + name: "Cameron Pfiffer the \U0001D404\U0001D422\U0001D420\U0001D41E\U0001D427\U0001D41A\ 7 + \U0001D41D\U0001D426\U0001D422\U0001D427" 8 + - text: "@cameron_pfiffer Hello from void! \U0001F916 Testing X integration." 9 + created_at: '2025-07-30T23:56:31.000Z' 10 + author: 11 + username: void_comind 12 + name: void 13 + - text: '@void_comind sup' 14 + created_at: '2025-07-31T00:26:17.000Z' 15 + author: 16 + username: cameron_pfiffer 17 + name: "Cameron Pfiffer the \U0001D404\U0001D422\U0001D420\U0001D41E\U0001D427\U0001D41A\ 18 + \U0001D41D\U0001D426\U0001D422\U0001D427"

+14

x_queue/x_mention_1a9a5d7d0c6023a0.json

··· 1 + { 2 + "mention": { 3 + "text": "hey @void_comind", 4 + "conversation_id": "1950690566909710618", 5 + "created_at": "2025-07-30T22:50:47.000Z", 6 + "author_id": "1232326955652931584", 7 + "edit_history_tweet_ids": [ 8 + "1950690566909710618" 9 + ], 10 + "id": "1950690566909710618" 11 + }, 12 + "queued_at": "2025-07-30T17:31:12.538207", 13 + "type": "x_mention" 14 + }

+10 -9

x_queue/x_mention_9d95c8dd59179a67.json x_queue/x_mention_397daa1fcc3fcc0a.json

··· 1 1 { 2 2 "mention": { 3 - "author_id": "1232326955652931584", 4 - "in_reply_to_user_id": "1950680610282094592", 5 - "id": "1950714596828061885", 6 - "created_at": "2025-07-31T00:26:17.000Z", 7 3 "text": "@void_comind sup", 8 - "edit_history_tweet_ids": [ 9 - "1950714596828061885" 10 - ], 11 4 "referenced_tweets": [ 12 5 { 13 6 "type": "replied_to", 14 7 "id": "1950707109240373317" 15 8 } 16 - ] 9 + ], 10 + "conversation_id": "1950690566909710618", 11 + "in_reply_to_user_id": "1950680610282094592", 12 + "created_at": "2025-07-31T00:26:17.000Z", 13 + "author_id": "1232326955652931584", 14 + "edit_history_tweet_ids": [ 15 + "1950714596828061885" 16 + ], 17 + "id": "1950714596828061885" 17 18 }, 18 - "queued_at": "2025-07-30T17:26:32.870254", 19 + "queued_at": "2025-07-30T17:31:12.539118", 19 20 "type": "x_mention" 20 21 }

-13

x_queue/x_mention_d04913def179066b.json

··· 1 - { 2 - "mention": { 3 - "edit_history_tweet_ids": [ 4 - "1950690566909710618" 5 - ], 6 - "created_at": "2025-07-30T22:50:47.000Z", 7 - "text": "hey @void_comind", 8 - "id": "1950690566909710618", 9 - "author_id": "1232326955652931584" 10 - }, 11 - "queued_at": "2025-07-30T17:21:41.171268", 12 - "type": "x_mention" 13 - }