···115115 return obj
116116117117118118+def extract_links_from_facets(record_text: str, facets: list) -> list:
119119+ """
120120+ Extract link URLs from facets with their associated text.
121121+122122+ Args:
123123+ record_text: The post text (needed to extract link text using byte offsets)
124124+ facets: List of facet objects from post record
125125+126126+ Returns:
127127+ List of dicts with 'url' and 'text' keys
128128+ """
129129+ links = []
130130+ text_bytes = record_text.encode('utf-8')
131131+132132+ for facet in facets:
133133+ for feature in facet.features:
134134+ if hasattr(feature, 'uri'): # Link facet
135135+ byte_start = facet.index.byte_start
136136+ byte_end = facet.index.byte_end
137137+ try:
138138+ link_text = text_bytes[byte_start:byte_end].decode('utf-8')
139139+ except (UnicodeDecodeError, IndexError):
140140+ link_text = feature.uri # Fallback to URL itself
141141+ links.append({
142142+ 'url': feature.uri,
143143+ 'text': link_text
144144+ })
145145+ return links
146146+147147+148148+def extract_images_from_embed(embed, include_thumbnails: bool = True) -> list[dict]:
149149+ """Extract image URLs and alt text from a post embed (View type).
150150+151151+ This function handles the View types returned by get_post_thread(),
152152+ which contain CDN URLs for images (unlike raw record embeds which
153153+ only have BlobRefs).
154154+155155+ Also extracts thumbnails from external links and videos when include_thumbnails=True.
156156+157157+ Args:
158158+ embed: The embed object from post.embed (View type)
159159+ include_thumbnails: Whether to include thumbnails from links/videos (default True)
160160+161161+ Returns:
162162+ List of dicts with 'fullsize', 'thumb', 'alt', and optional 'source' keys
163163+ """
164164+ images = []
165165+ if not embed:
166166+ return images
167167+168168+ embed_type = getattr(embed, 'py_type', '')
169169+170170+ # Direct image embed (app.bsky.embed.images#view)
171171+ if 'images' in embed_type and 'record' not in embed_type:
172172+ for img in embed.images:
173173+ images.append({
174174+ 'fullsize': getattr(img, 'fullsize', None),
175175+ 'thumb': getattr(img, 'thumb', None),
176176+ 'alt': getattr(img, 'alt', '') or ''
177177+ })
178178+179179+ # External link with thumbnail (app.bsky.embed.external#view)
180180+ elif 'external' in embed_type and 'record' not in embed_type and include_thumbnails:
181181+ if hasattr(embed, 'external') and embed.external:
182182+ thumb = getattr(embed.external, 'thumb', None)
183183+ if thumb:
184184+ title = getattr(embed.external, 'title', '') or ''
185185+ images.append({
186186+ 'fullsize': thumb, # External links only have thumb, use as fullsize too
187187+ 'thumb': thumb,
188188+ 'alt': f"Link preview: {title}" if title else 'Link preview image',
189189+ 'source': 'external_link'
190190+ })
191191+192192+ # Video with thumbnail (app.bsky.embed.video#view)
193193+ elif 'video' in embed_type and 'record' not in embed_type and include_thumbnails:
194194+ thumb = getattr(embed, 'thumbnail', None)
195195+ if thumb:
196196+ alt = getattr(embed, 'alt', '') or 'Video thumbnail'
197197+ images.append({
198198+ 'fullsize': thumb,
199199+ 'thumb': thumb,
200200+ 'alt': alt,
201201+ 'source': 'video'
202202+ })
203203+204204+ # Quote post with media (app.bsky.embed.recordWithMedia#view)
205205+ elif 'recordWithMedia' in embed_type and hasattr(embed, 'media'):
206206+ media_type = getattr(embed.media, 'py_type', '')
207207+ # Images in media
208208+ if 'images' in media_type and hasattr(embed.media, 'images'):
209209+ for img in embed.media.images:
210210+ images.append({
211211+ 'fullsize': getattr(img, 'fullsize', None),
212212+ 'thumb': getattr(img, 'thumb', None),
213213+ 'alt': getattr(img, 'alt', '') or ''
214214+ })
215215+ # External link thumbnail in media
216216+ elif 'external' in media_type and include_thumbnails:
217217+ if hasattr(embed.media, 'external') and embed.media.external:
218218+ thumb = getattr(embed.media.external, 'thumb', None)
219219+ if thumb:
220220+ title = getattr(embed.media.external, 'title', '') or ''
221221+ images.append({
222222+ 'fullsize': thumb,
223223+ 'thumb': thumb,
224224+ 'alt': f"Link preview: {title}" if title else 'Link preview image',
225225+ 'source': 'external_link'
226226+ })
227227+ # Video thumbnail in media
228228+ elif 'video' in media_type and include_thumbnails:
229229+ thumb = getattr(embed.media, 'thumbnail', None)
230230+ if thumb:
231231+ alt = getattr(embed.media, 'alt', '') or 'Video thumbnail'
232232+ images.append({
233233+ 'fullsize': thumb,
234234+ 'thumb': thumb,
235235+ 'alt': alt,
236236+ 'source': 'video'
237237+ })
238238+239239+ # Quote post - check for images in nested embeds (app.bsky.embed.record#view)
240240+ elif 'record' in embed_type and 'recordWithMedia' not in embed_type:
241241+ if hasattr(embed, 'record') and embed.record:
242242+ record = embed.record
243243+ if hasattr(record, 'embeds') and record.embeds:
244244+ for nested in record.embeds:
245245+ nested_type = getattr(nested, 'py_type', '')
246246+ # Nested images
247247+ if 'images' in nested_type and hasattr(nested, 'images'):
248248+ for img in nested.images:
249249+ images.append({
250250+ 'fullsize': getattr(img, 'fullsize', None),
251251+ 'thumb': getattr(img, 'thumb', None),
252252+ 'alt': getattr(img, 'alt', '') or '',
253253+ 'source': 'quoted_post'
254254+ })
255255+ # Nested external link thumbnail
256256+ elif 'external' in nested_type and include_thumbnails:
257257+ if hasattr(nested, 'external') and nested.external:
258258+ thumb = getattr(nested.external, 'thumb', None)
259259+ if thumb:
260260+ title = getattr(nested.external, 'title', '') or ''
261261+ images.append({
262262+ 'fullsize': thumb,
263263+ 'thumb': thumb,
264264+ 'alt': f"Link preview: {title}" if title else 'Link preview image',
265265+ 'source': 'quoted_post_link'
266266+ })
267267+ # Nested video thumbnail
268268+ elif 'video' in nested_type and include_thumbnails:
269269+ thumb = getattr(nested, 'thumbnail', None)
270270+ if thumb:
271271+ alt = getattr(nested, 'alt', '') or 'Video thumbnail'
272272+ images.append({
273273+ 'fullsize': thumb,
274274+ 'thumb': thumb,
275275+ 'alt': alt,
276276+ 'source': 'quoted_post_video'
277277+ })
278278+279279+ return images
280280+281281+282282+def extract_images_from_thread(thread_data, max_images: int = 8) -> list[dict]:
283283+ """Extract all images from a thread, up to max_images.
284284+285285+ Traverses the thread structure and extracts image URLs from post embeds.
286286+ Images are collected in chronological order (parents before children).
287287+288288+ Args:
289289+ thread_data: The thread data from get_post_thread
290290+ max_images: Maximum number of images to extract (default 8)
291291+292292+ Returns:
293293+ List of image dicts with 'fullsize', 'thumb', 'alt', 'author_handle' keys
294294+ """
295295+ images = []
296296+297297+ def traverse_thread(node):
298298+ if not node or len(images) >= max_images:
299299+ return
300300+301301+ # Traverse parent first (chronological order)
302302+ if hasattr(node, 'parent') and node.parent:
303303+ traverse_thread(node.parent)
304304+305305+ # Extract images from this post's embed (View type, not record.embed)
306306+ if hasattr(node, 'post') and node.post:
307307+ post = node.post
308308+ if hasattr(post, 'embed') and post.embed:
309309+ post_images = extract_images_from_embed(post.embed)
310310+ author_handle = getattr(post.author, 'handle', 'unknown') if hasattr(post, 'author') else 'unknown'
311311+ for img in post_images:
312312+ if len(images) >= max_images:
313313+ break
314314+ img['author_handle'] = author_handle
315315+ images.append(img)
316316+317317+ # Traverse replies
318318+ if hasattr(node, 'replies') and node.replies:
319319+ for reply in node.replies:
320320+ if len(images) >= max_images:
321321+ break
322322+ traverse_thread(reply)
323323+324324+ if hasattr(thread_data, 'thread'):
325325+ traverse_thread(thread_data.thread)
326326+327327+ return images
328328+329329+330330+def extract_external_link_from_embed(embed) -> dict | None:
331331+ """Extract external link card data from a post embed (View type).
332332+333333+ External links are shown as "link cards" with URL, title, description,
334334+ and optional thumbnail.
335335+336336+ Args:
337337+ embed: The embed object from post.embed (View type)
338338+339339+ Returns:
340340+ Dict with 'url', 'title', 'description', 'thumbnail' keys, or None
341341+ """
342342+ if not embed:
343343+ return None
344344+345345+ embed_type = getattr(embed, 'py_type', '')
346346+347347+ # Direct external link embed (app.bsky.embed.external#view)
348348+ if 'external' in embed_type and hasattr(embed, 'external'):
349349+ external = embed.external
350350+ return {
351351+ 'url': getattr(external, 'uri', ''),
352352+ 'title': getattr(external, 'title', ''),
353353+ 'description': getattr(external, 'description', ''),
354354+ 'thumbnail': getattr(external, 'thumb', None)
355355+ }
356356+357357+ # RecordWithMedia with external link (app.bsky.embed.recordWithMedia#view)
358358+ if 'recordWithMedia' in embed_type and hasattr(embed, 'media'):
359359+ media_type = getattr(embed.media, 'py_type', '')
360360+ if 'external' in media_type and hasattr(embed.media, 'external'):
361361+ external = embed.media.external
362362+ return {
363363+ 'url': getattr(external, 'uri', ''),
364364+ 'title': getattr(external, 'title', ''),
365365+ 'description': getattr(external, 'description', ''),
366366+ 'thumbnail': getattr(external, 'thumb', None)
367367+ }
368368+369369+ return None
370370+371371+372372+def extract_quote_post_from_embed(embed) -> dict | None:
373373+ """Extract quoted post data from a record embed (View type).
374374+375375+ Quote posts embed another post, which can include the quoted text,
376376+ author, and any media attached to the quoted post.
377377+378378+ Args:
379379+ embed: The embed object from post.embed (View type)
380380+381381+ Returns:
382382+ Dict with quote post data, or None if not a quote or unavailable
383383+ """
384384+ if not embed:
385385+ return None
386386+387387+ embed_type = getattr(embed, 'py_type', '')
388388+389389+ # Get the record object (works for both record and recordWithMedia)
390390+ record = None
391391+ if 'recordWithMedia' in embed_type and hasattr(embed, 'record'):
392392+ # recordWithMedia has record.record for the actual quote
393393+ record = getattr(embed.record, 'record', None)
394394+ elif 'record' in embed_type and hasattr(embed, 'record'):
395395+ record = embed.record
396396+397397+ if not record:
398398+ return None
399399+400400+ record_type = getattr(record, 'py_type', '')
401401+402402+ # Handle different quote post states
403403+ if 'viewNotFound' in record_type:
404404+ return {
405405+ 'status': 'not_found',
406406+ 'uri': getattr(record, 'uri', ''),
407407+ 'message': 'Quoted post was deleted or not found'
408408+ }
409409+410410+ if 'viewBlocked' in record_type:
411411+ return {
412412+ 'status': 'blocked',
413413+ 'uri': getattr(record, 'uri', ''),
414414+ 'message': 'Quoted post is from a blocked account'
415415+ }
416416+417417+ if 'viewDetached' in record_type:
418418+ return {
419419+ 'status': 'detached',
420420+ 'uri': getattr(record, 'uri', ''),
421421+ 'message': 'Quoted post was detached'
422422+ }
423423+424424+ # Normal quote post (viewRecord)
425425+ if 'viewRecord' in record_type or hasattr(record, 'author'):
426426+ result = {
427427+ 'status': 'available',
428428+ 'uri': getattr(record, 'uri', ''),
429429+ }
430430+431431+ # Extract author info
432432+ if hasattr(record, 'author') and record.author:
433433+ author = record.author
434434+ result['author'] = {
435435+ 'handle': getattr(author, 'handle', 'unknown'),
436436+ 'display_name': getattr(author, 'display_name', '') or getattr(author, 'handle', 'unknown')
437437+ }
438438+439439+ # Extract the quoted post text from value
440440+ # The 'value' field contains the actual post record
441441+ if hasattr(record, 'value') and record.value:
442442+ value = record.value
443443+ # value can be a dict or an object
444444+ if isinstance(value, dict):
445445+ result['text'] = value.get('text', '')
446446+ elif hasattr(value, 'text'):
447447+ result['text'] = getattr(value, 'text', '')
448448+449449+ # Extract engagement metrics if present
450450+ metrics = {}
451451+ if hasattr(record, 'like_count') and record.like_count is not None:
452452+ metrics['likes'] = record.like_count
453453+ if hasattr(record, 'repost_count') and record.repost_count is not None:
454454+ metrics['reposts'] = record.repost_count
455455+ if hasattr(record, 'reply_count') and record.reply_count is not None:
456456+ metrics['replies'] = record.reply_count
457457+ if hasattr(record, 'quote_count') and record.quote_count is not None:
458458+ metrics['quotes'] = record.quote_count
459459+ if metrics:
460460+ result['metrics'] = metrics
461461+462462+ # Add thread context hints (for hybrid thread navigation)
463463+ thread_context = {}
464464+465465+ # Reply count indicates replies exist below this post
466466+ if metrics.get('replies'):
467467+ thread_context['reply_count'] = metrics['replies']
468468+469469+ # Check if quoted post is itself a reply (has parents above)
470470+ if hasattr(record, 'value') and record.value:
471471+ value = record.value
472472+ reply_ref = value.get('reply') if isinstance(value, dict) else getattr(value, 'reply', None)
473473+ if reply_ref:
474474+ thread_context['has_parents'] = True
475475+476476+ if thread_context:
477477+ result['thread_context'] = thread_context
478478+479479+ # Check for nested embeds in the quoted post
480480+ if hasattr(record, 'embeds') and record.embeds:
481481+ nested_embeds = []
482482+ for nested in record.embeds:
483483+ nested_type = getattr(nested, 'py_type', '')
484484+ if 'images' in nested_type:
485485+ nested_embeds.append({'type': 'images', 'count': len(getattr(nested, 'images', []))})
486486+ elif 'video' in nested_type:
487487+ nested_embeds.append({'type': 'video'})
488488+ elif 'external' in nested_type:
489489+ ext = getattr(nested, 'external', None)
490490+ if ext:
491491+ nested_embeds.append({
492492+ 'type': 'external_link',
493493+ 'url': getattr(ext, 'uri', ''),
494494+ 'title': getattr(ext, 'title', '')
495495+ })
496496+ if nested_embeds:
497497+ result['embeds'] = nested_embeds
498498+499499+ return result
500500+501501+ return None
502502+503503+504504+def extract_embed_data(embed) -> dict | None:
505505+ """Extract structured data from any embed type.
506506+507507+ This is the main entry point for embed extraction. It detects the embed
508508+ type and delegates to the appropriate extraction function.
509509+510510+ Args:
511511+ embed: The embed object from post.embed (View type)
512512+513513+ Returns:
514514+ Dict with embed type and extracted data, or None if no embed
515515+ """
516516+ if not embed:
517517+ return None
518518+519519+ embed_type = getattr(embed, 'py_type', '')
520520+521521+ # Images
522522+ if 'images' in embed_type and 'record' not in embed_type:
523523+ images = extract_images_from_embed(embed)
524524+ if images:
525525+ return {
526526+ 'type': 'images',
527527+ 'images': images
528528+ }
529529+530530+ # External link
531531+ if 'external' in embed_type and 'record' not in embed_type:
532532+ link = extract_external_link_from_embed(embed)
533533+ if link:
534534+ return {
535535+ 'type': 'external_link',
536536+ 'link': link
537537+ }
538538+539539+ # Quote post (record)
540540+ if embed_type == 'app.bsky.embed.record#view':
541541+ quote = extract_quote_post_from_embed(embed)
542542+ if quote:
543543+ return {
544544+ 'type': 'quote_post',
545545+ 'quote': quote
546546+ }
547547+548548+ # Quote post with media (recordWithMedia)
549549+ if 'recordWithMedia' in embed_type:
550550+ result = {'type': 'quote_with_media'}
551551+552552+ # Extract the quote
553553+ quote = extract_quote_post_from_embed(embed)
554554+ if quote:
555555+ result['quote'] = quote
556556+557557+ # Extract the media
558558+ if hasattr(embed, 'media'):
559559+ media_type = getattr(embed.media, 'py_type', '')
560560+ if 'images' in media_type:
561561+ images = extract_images_from_embed(embed)
562562+ if images:
563563+ result['media'] = {'type': 'images', 'images': images}
564564+ elif 'external' in media_type:
565565+ link = extract_external_link_from_embed(embed)
566566+ if link:
567567+ result['media'] = {'type': 'external_link', 'link': link}
568568+ elif 'video' in media_type:
569569+ # Basic video info
570570+ result['media'] = {
571571+ 'type': 'video',
572572+ 'thumbnail': getattr(embed.media, 'thumbnail', None),
573573+ 'alt': getattr(embed.media, 'alt', None)
574574+ }
575575+576576+ return result
577577+578578+ # Video (basic handling)
579579+ if 'video' in embed_type:
580580+ return {
581581+ 'type': 'video',
582582+ 'thumbnail': getattr(embed, 'thumbnail', None),
583583+ 'alt': getattr(embed, 'alt', None)
584584+ }
585585+586586+ return None
587587+588588+118589def flatten_thread_structure(thread_data):
119590 """
120591 Flatten a nested thread structure into a list while preserving all data.
121121-592592+122593 Args:
123594 thread_data: The thread data from get_post_thread
124124-595595+125596 Returns:
126597 Dict with 'posts' key containing a list of posts in chronological order
127598 """
128599 posts = []
129129-600600+130601 def traverse_thread(node):
131602 """Recursively traverse the thread structure to collect posts."""
132603 if not node:
133604 return
134134-605605+135606 # If this node has a parent, traverse it first (to maintain chronological order)
136607 if hasattr(node, 'parent') and node.parent:
137608 traverse_thread(node.parent)
138138-609609+139610 # Then add this node's post
140611 if hasattr(node, 'post') and node.post:
141141- # Convert to dict if needed to ensure we can process it
142142- if hasattr(node.post, '__dict__'):
143143- post_dict = node.post.__dict__.copy()
144144- elif isinstance(node.post, dict):
145145- post_dict = node.post.copy()
146146- else:
147147- post_dict = {}
148148-612612+ # Extract post data by accessing properties directly (not __dict__)
613613+ # AT Protocol objects store data in properties, not __dict__
614614+ post = node.post
615615+616616+ # Build post dict with proper property access
617617+ post_dict = {}
618618+619619+ # Extract basic fields
620620+ if hasattr(post, 'uri'):
621621+ post_dict['uri'] = post.uri
622622+ if hasattr(post, 'cid'):
623623+ post_dict['cid'] = post.cid
624624+625625+ # Extract author info
626626+ if hasattr(post, 'author') and post.author:
627627+ author = post.author
628628+ post_dict['author'] = {
629629+ 'handle': getattr(author, 'handle', 'unknown'),
630630+ 'display_name': getattr(author, 'display_name', 'unknown'),
631631+ 'did': getattr(author, 'did', 'unknown')
632632+ }
633633+634634+ # Extract record info (text, created_at, etc.)
635635+ if hasattr(post, 'record') and post.record:
636636+ record = post.record
637637+ record_dict = {
638638+ 'text': getattr(record, 'text', ''),
639639+ 'createdAt': getattr(record, 'created_at', 'unknown')
640640+ }
641641+642642+ # Extract links from facets if present
643643+ if hasattr(record, 'facets') and record.facets:
644644+ links = extract_links_from_facets(
645645+ getattr(record, 'text', ''),
646646+ record.facets
647647+ )
648648+ if links:
649649+ record_dict['links'] = links
650650+651651+ post_dict['record'] = record_dict
652652+653653+ # Extract embed data from post.embed (View type with CDN URLs)
654654+ # This is different from record.embed which only has raw BlobRefs
655655+ if hasattr(post, 'embed') and post.embed:
656656+ embed_data = extract_embed_data(post.embed)
657657+ if embed_data:
658658+ post_dict['embed'] = embed_data
659659+660660+ # Extract parent_uri for tree visualization
661661+ parent_uri = None
662662+ if hasattr(post, 'record') and post.record:
663663+ record_obj = post.record
664664+ if hasattr(record_obj, 'reply') and record_obj.reply:
665665+ reply_ref = record_obj.reply
666666+ if hasattr(reply_ref, 'parent') and reply_ref.parent:
667667+ if hasattr(reply_ref.parent, 'uri'):
668668+ parent_uri = reply_ref.parent.uri
669669+ post_dict['parent_uri'] = parent_uri
670670+149671 posts.append(post_dict)
150150-672672+673673+ # Then traverse any replies (going DOWN the thread)
674674+ if hasattr(node, 'replies') and node.replies:
675675+ for reply in node.replies:
676676+ traverse_thread(reply)
677677+151678 # Handle the thread structure
152679 if hasattr(thread_data, 'thread'):
153680 # Start from the main thread node
154681 traverse_thread(thread_data.thread)
155682 elif hasattr(thread_data, '__dict__') and 'thread' in thread_data.__dict__:
156683 traverse_thread(thread_data.__dict__['thread'])
157157-684684+158685 # Return a simple structure with posts list
159686 return {'posts': posts}
160687···173700 return len(flattened.get('posts', []))
174701175702176176-def thread_to_yaml_string(thread, strip_metadata=True):
703703+def compute_tree_prefixes(posts: List[Dict]) -> Dict[str, str]:
704704+ """
705705+ Compute tree-style prefixes based on parent relationships.
706706+707707+ Args:
708708+ posts: List of post dicts, each with 'uri' and 'parent_uri' keys
709709+710710+ Returns:
711711+ Dict mapping uri -> prefix string (e.g., "├─ ", "│ └─ ")
712712+ """
713713+ if not posts:
714714+ return {}
715715+716716+ uri_to_post = {p.get('uri'): p for p in posts if p.get('uri')}
717717+ children_map: Dict[str, List[str]] = {} # parent_uri -> [child_uris]
718718+ root_uris: List[str] = []
719719+720720+ for post in posts:
721721+ uri = post.get('uri')
722722+ if not uri:
723723+ continue
724724+ parent_uri = post.get('parent_uri')
725725+ if not parent_uri or parent_uri not in uri_to_post:
726726+ root_uris.append(uri)
727727+ else:
728728+ children_map.setdefault(parent_uri, []).append(uri)
729729+730730+ prefixes: Dict[str, str] = {}
731731+ visited: set = set()
732732+733733+ def compute_recursive(uri: str, ancestors_last: List[bool]):
734734+ if uri in visited:
735735+ return
736736+ visited.add(uri)
737737+738738+ prefix_parts = []
739739+ for is_last in ancestors_last[:-1]:
740740+ prefix_parts.append(" " if is_last else "│ ")
741741+ if ancestors_last:
742742+ prefix_parts.append("└─ " if ancestors_last[-1] else "├─ ")
743743+ prefixes[uri] = "".join(prefix_parts)
744744+745745+ children = children_map.get(uri, [])
746746+ for i, child_uri in enumerate(children):
747747+ compute_recursive(child_uri, ancestors_last + [i == len(children) - 1])
748748+749749+ for i, root_uri in enumerate(root_uris):
750750+ if len(root_uris) == 1:
751751+ prefixes[root_uri] = ""
752752+ children = children_map.get(root_uri, [])
753753+ for j, child_uri in enumerate(children):
754754+ compute_recursive(child_uri, [j == len(children) - 1])
755755+ else:
756756+ compute_recursive(root_uri, [i == len(root_uris) - 1])
757757+758758+ return prefixes
759759+760760+761761+def build_tree_view(posts: List[Dict]) -> str:
762762+ """
763763+ Build a tree-style text visualization of a thread.
764764+765765+ Args:
766766+ posts: List of post dicts with uri, parent_uri, author, record fields
767767+768768+ Returns:
769769+ Multi-line string showing thread structure with tree prefixes
770770+ """
771771+ if not posts:
772772+ return "(empty thread)"
773773+774774+ prefixes = compute_tree_prefixes(posts)
775775+ lines = []
776776+777777+ for post in posts:
778778+ uri = post.get('uri', '')
779779+ prefix = prefixes.get(uri, '')
780780+781781+ author = post.get('author', {})
782782+ handle = author.get('handle', 'unknown')
783783+ record = post.get('record', {})
784784+ text = record.get('text', '').replace('\n', ' | ')
785785+786786+ lines.append(f"{prefix}@{handle}: {text}")
787787+788788+ return "\n".join(lines)
789789+790790+791791+def thread_to_yaml_string(thread, strip_metadata=True, include_tree_view=True):
177792 """
178793 Convert thread data to a YAML-formatted string for LLM parsing.
179794180795 Args:
181796 thread: The thread data from get_post_thread
182797 strip_metadata: Whether to strip metadata fields for cleaner output
798798+ include_tree_view: Whether to prepend a tree visualization of the thread
183799184800 Returns:
185185- YAML-formatted string representation of the thread
801801+ String representation of the thread with optional tree view and YAML data
186802 """
187803 # First flatten the thread structure to avoid deep nesting
188804 flattened = flatten_thread_structure(thread)
805805+ posts = flattened.get('posts', [])
806806+807807+ output_parts = []
808808+809809+ # Build tree visualization if requested
810810+ if include_tree_view and posts:
811811+ tree_view = build_tree_view(posts)
812812+ output_parts.append("THREAD STRUCTURE:")
813813+ output_parts.append(tree_view)
814814+ output_parts.append("")
815815+ output_parts.append("FULL POST DATA:")
189816190817 # Convert complex objects to basic types
191818 basic_thread = convert_to_basic_types(flattened)
···196823 else:
197824 cleaned_thread = basic_thread
198825199199- return yaml.dump(cleaned_thread, indent=2, allow_unicode=True, default_flow_style=False)
826826+ yaml_output = yaml.dump(cleaned_thread, indent=2, allow_unicode=True, default_flow_style=False)
827827+ output_parts.append(yaml_output)
828828+829829+ return "\n".join(output_parts)
200830201831202832···5121142 except Exception as e:
5131143 logger.error(f"Error fetching post thread: {e}")
5141144 return None
11451145+11461146+11471147+def find_last_consecutive_post_in_chain(thread_node, author_handle: str):
11481148+ """
11491149+ Find the last consecutive post in the direct reply chain by the same author.
11501150+11511151+ Starting from the given thread node, this function traverses down the direct reply chain
11521152+ (not all branches) to find the last consecutive post made by the specified author.
11531153+11541154+ Args:
11551155+ thread_node: The thread node to start from (usually the mention post's thread node)
11561156+ author_handle: The handle of the author to match (e.g., "user.bsky.social")
11571157+11581158+ Returns:
11591159+ Tuple of (uri, cid, text) for the last consecutive post by the author, or None if no consecutive posts
11601160+11611161+ Example:
11621162+ If the thread structure is:
11631163+ - Post A by @alice (mention) -> thread_node starts here
11641164+ - Post B by @alice (consecutive)
11651165+ - Post C by @alice (consecutive)
11661166+ - Post D by @bob (different author, stop here)
11671167+11681168+ Returns (uri_C, cid_C, text_C)
11691169+ """
11701170+ if not thread_node:
11711171+ return None
11721172+11731173+ # Start with the current node's post
11741174+ current_post = None
11751175+ if hasattr(thread_node, 'post') and thread_node.post:
11761176+ current_post = thread_node.post
11771177+11781178+ if not current_post:
11791179+ return None
11801180+11811181+ # Check if current post is by the target author
11821182+ current_author = None
11831183+ if hasattr(current_post, 'author') and hasattr(current_post.author, 'handle'):
11841184+ current_author = current_post.author.handle
11851185+11861186+ if current_author != author_handle:
11871187+ # Current post is not by target author, can't find consecutive posts
11881188+ return None
11891189+11901190+ # Track the last consecutive post (start with current)
11911191+ last_uri = current_post.uri if hasattr(current_post, 'uri') else None
11921192+ last_cid = current_post.cid if hasattr(current_post, 'cid') else None
11931193+ last_text = ""
11941194+ if hasattr(current_post, 'record') and hasattr(current_post.record, 'text'):
11951195+ last_text = current_post.record.text
11961196+11971197+ # Traverse down the direct reply chain
11981198+ current_node = thread_node
11991199+ while True:
12001200+ # Check if there are replies to this node
12011201+ if not hasattr(current_node, 'replies') or not current_node.replies:
12021202+ # No more replies, we've found the last consecutive post
12031203+ break
12041204+12051205+ # For direct chain traversal, we look for replies by the same author
12061206+ # If there are multiple replies, we'll take the first one by the same author
12071207+ next_node = None
12081208+ for reply in current_node.replies:
12091209+ if hasattr(reply, 'post') and reply.post:
12101210+ reply_author = None
12111211+ if hasattr(reply.post, 'author') and hasattr(reply.post.author, 'handle'):
12121212+ reply_author = reply.post.author.handle
12131213+12141214+ if reply_author == author_handle:
12151215+ # Found a consecutive post by same author
12161216+ next_node = reply
12171217+ break
12181218+12191219+ if not next_node:
12201220+ # No more consecutive posts by same author
12211221+ break
12221222+12231223+ # Update last post info to this consecutive post
12241224+ current_node = next_node
12251225+ current_post = current_node.post
12261226+12271227+ if hasattr(current_post, 'uri'):
12281228+ last_uri = current_post.uri
12291229+ if hasattr(current_post, 'cid'):
12301230+ last_cid = current_post.cid
12311231+ if hasattr(current_post, 'record') and hasattr(current_post.record, 'text'):
12321232+ last_text = current_post.record.text
12331233+12341234+ # Return the last consecutive post's metadata
12351235+ # Only return if we actually have valid URI and CID
12361236+ if last_uri and last_cid:
12371237+ return (last_uri, last_cid, last_text)
12381238+12391239+ return None
12401240+12411241+12421242+def find_consecutive_parent_posts_by_author(thread_node, author_handle: str) -> List[Dict]:
12431243+ """
12441244+ Find consecutive posts by the same author in the parent chain.
12451245+12461246+ Starting from the given thread node, this function traverses UP the parent chain
12471247+ to find all consecutive posts made by the specified author.
12481248+12491249+ This is the inverse of find_last_consecutive_post_in_chain which traverses DOWN.
12501250+12511251+ Args:
12521252+ thread_node: The thread node to start from (the notification post's thread node)
12531253+ author_handle: The handle of the author to match (e.g., "user.bsky.social")
12541254+12551255+ Returns:
12561256+ List of post dicts for consecutive posts by the author in the parent chain,
12571257+ in chronological order (oldest first). Returns empty list if no parent posts
12581258+ by the same author.
12591259+12601260+ Example:
12611261+ If the thread structure is:
12621262+ - Post A by @alice (first part)
12631263+ - Post B by @alice (consecutive) <- start from here (notification)
12641264+12651265+ Returns [Post A dict] (not including Post B since that's the current node)
12661266+ """
12671267+ parent_posts = []
12681268+12691269+ if not thread_node:
12701270+ return parent_posts
12711271+12721272+ # Traverse up the parent chain
12731273+ current_node = thread_node
12741274+ while True:
12751275+ # Check if this node has a parent
12761276+ if not hasattr(current_node, 'parent') or not current_node.parent:
12771277+ break
12781278+12791279+ parent_node = current_node.parent
12801280+ if not hasattr(parent_node, 'post') or not parent_node.post:
12811281+ break
12821282+12831283+ parent_post = parent_node.post
12841284+12851285+ # Check if parent is by the same author
12861286+ parent_author = None
12871287+ if hasattr(parent_post, 'author') and hasattr(parent_post.author, 'handle'):
12881288+ parent_author = parent_post.author.handle
12891289+12901290+ if parent_author != author_handle:
12911291+ # Parent is by different author, stop here
12921292+ break
12931293+12941294+ # Collect this parent post
12951295+ post_dict = {
12961296+ 'uri': getattr(parent_post, 'uri', ''),
12971297+ 'cid': getattr(parent_post, 'cid', ''),
12981298+ 'author': {
12991299+ 'handle': parent_author,
13001300+ 'display_name': getattr(parent_post.author, 'display_name', '') if hasattr(parent_post, 'author') else '',
13011301+ 'did': getattr(parent_post.author, 'did', '') if hasattr(parent_post, 'author') else ''
13021302+ },
13031303+ 'record': {
13041304+ 'text': getattr(parent_post.record, 'text', '') if hasattr(parent_post, 'record') else '',
13051305+ 'createdAt': getattr(parent_post.record, 'created_at', '') if hasattr(parent_post, 'record') else ''
13061306+ }
13071307+ }
13081308+ parent_posts.append(post_dict)
13091309+13101310+ # Move up to the next parent
13111311+ current_node = parent_node
13121312+13131313+ # Return in chronological order (oldest first)
13141314+ parent_posts.reverse()
13151315+ return parent_posts
515131651613175171318def reply_to_notification(client: Client, notification: Any, reply_text: str, lang: str = "en-US", correlation_id: Optional[str] = None) -> Optional[Dict[str, Any]]: