···11-# PLC Bundle V1 Reference Implementation in TypeScript
11+# PLC Bundle V1 Reference Implementations
2233-This script ([plcbundle.ts](plcbundle.ts)) is a compact, readable reference implementation for creating [PLC Bundle](https://github.com/atscan/plcbundle) v1 archives. It fetches operations from the PLC directory and generates a complete, verifiable repository of data bundles.
33+This set of scripts represents a compact, readable reference implementations for creating [PLC Bundle](https://github.com/atscan/plcbundle) v1 archives. It fetches operations from the PLC directory and generates a complete, verifiable repository of data bundles.
4455It is fully compliant with the [PLC Bundle v1 Specification](https://github.com/atscan/plcbundle/blob/main/SPECIFICATION.md).
6677## Features
8899- **Spec Compliant:** Correctly implements hashing, chaining, serialization, and boundary de-duplication.
1010-- **Reproducible:** Generates byte-for-byte identical bundles to the official Go implementation.
1111-- **Efficient:** Uses a memory-efficient method to handle duplicates between bundle boundaries.
1010+- **Reproducible:** Generates byte-for-byte identical bundles to the other implementations.
1211- **Standalone:** Single-file script with clear dependencies.
13121414-## Usage
1313+## Implementations
1414+1515+| Language | File |
1616+| --- | --- |
1717+| [TypeScript](#typescript) | [`typescript/plcbundle.ts`](typescript/plcbundle.ts) |
1818+| [Python](#python) | [`python/plcbundle.py`](python/plcbundle.py) |
1919+2020+## TypeScript
2121+2222+File: [plcbundle.ts](plcbundle.ts)
2323+2424+### Usage
15251626This script should run well with **[Bun](https://bun.com/) (recommended)**, [Deno](https://deno.com/), or [Node.js](https://nodejs.org/en).
17271828The script accepts one optional argument: the path to the output directory where bundles will be stored. If omitted, it defaults to `./plc_bundles`.
19292020-### Bun (Recommended)
3030+#### Bun (Recommended)
21312232Bun is the fastest and easiest way to run this script, as it handles TypeScript and dependencies automatically.
2333···2939bun run plcbundle.ts ./my_plc_bundles
3040```
31413232-### Deno
4242+#### Deno
33433444Deno can also run the script directly. You will need to provide permissions for network access and file system I/O.
3545···3848deno run --allow-net --allow-read --allow-write plcbundle.ts ./my_plc_bundles
3949```
40504141-### Node.js (with TypeScript)
5151+#### Node.js (with TypeScript)
42524353If using Node.js, you must first install dependencies and compile the TypeScript file to JavaScript.
4454···5363node dist/plcbundle.js ./my_plc_bundles
5464```
55656666+## Python
6767+6868+TODO
package.json
typescript/package.json
plcbundle.ts
typescript/plcbundle.ts
+264
python/plcbundle.py
···11+#!/usr/bin/env python3
22+33+"""
44+plcbundle.py - A compact, readable reference implementation for creating
55+plcbundle V1 compliant archives. This script demonstrates all critical spec
66+requirements, including hashing, serialization, ordering, and boundary handling.
77+88+PLC Bundle v1 Specification:
99+ https://github.com/atscan/plcbundle/blob/main/SPECIFICATION.md
1010+"""
1111+1212+import asyncio
1313+import hashlib
1414+import json
1515+import sys
1616+from datetime import datetime, timezone
1717+from pathlib import Path
1818+from typing import TypedDict, Self
1919+2020+import httpx
2121+import zstd
2222+2323+# --- Configuration ---
2424+BUNDLE_SIZE = 10000
2525+INDEX_FILE = 'plc_bundles.json'
2626+DEFAULT_DIR = './plc_bundles_py'
2727+PLC_URL = 'https://plc.directory'
2828+2929+# --- Types (as per spec) ---
3030+class PLCOperation(TypedDict):
3131+ did: str
3232+ cid: str
3333+ createdAt: str
3434+ operation: dict
3535+ nullified: bool | str | None
3636+ _raw: str # Holds the original raw JSON string for reproducibility
3737+3838+class BundleMetadata(TypedDict):
3939+ bundle_number: int
4040+ start_time: str
4141+ end_time: str
4242+ operation_count: int
4343+ did_count: int
4444+ hash: str # The chain hash
4545+ content_hash: str
4646+ parent: str
4747+ compressed_hash: str
4848+ compressed_size: int
4949+ uncompressed_size: int
5050+ cursor: str
5151+ created_at: str
5252+5353+class Index(TypedDict):
5454+ version: str
5555+ last_bundle: int
5656+ updated_at: str
5757+ total_size_bytes: int
5858+ bundles: list[BundleMetadata]
5959+6060+class PlcBundleManager:
6161+ """
6262+ Manages the state and process of fetching, validating, and creating PLC bundles.
6363+ """
6464+ _index: Index
6565+ _mempool: list[PLCOperation] = []
6666+ # This set correctly de-duplicates operations, both from the previous bundle's
6767+ # boundary and within new batches, and is pruned to stay memory-efficient.
6868+ _seen_cids = set[str]()
6969+7070+ def __init__(self, bundle_dir: str):
7171+ self._bundle_dir = Path(bundle_dir)
7272+ self._http_client = httpx.AsyncClient(timeout=30)
7373+7474+ @classmethod
7575+ async def create(cls, bundle_dir: str) -> Self:
7676+ """Factory to create and asynchronously initialize a PlcBundleManager instance."""
7777+ manager = cls(bundle_dir)
7878+ await manager._init()
7979+ return manager
8080+8181+ async def _init(self):
8282+ """
8383+ Initializes the manager by loading the index and seeding the `seen_cids`
8484+ set with the CIDs from the last saved bundle's boundary.
8585+ """
8686+ self._bundle_dir.mkdir(exist_ok=True)
8787+ self._index = await self._load_index()
8888+ print(f"plcbundle Reference Implementation\nDirectory: {self._bundle_dir}\n")
8989+9090+ last_bundle = self._index['bundles'][-1] if self._index['bundles'] else None
9191+ if last_bundle:
9292+ print(f"Resuming from bundle {last_bundle['bundle_number'] + 1}. Last op time: {last_bundle['end_time']}")
9393+ try:
9494+ prev_ops = await self._load_bundle_ops(last_bundle['bundle_number'])
9595+ self._seen_cids = self._get_boundary_cids(prev_ops)
9696+ print(f" Seeded de-duplication set with {len(self._seen_cids)} boundary CIDs.")
9797+ except FileNotFoundError:
9898+ print(f" Warning: Could not load previous bundle file. Boundary deduplication may be incomplete.")
9999+ else:
100100+ print('Starting from the beginning (genesis bundle).')
101101+102102+ async def run(self):
103103+ """
104104+ The main execution loop. It continuously fetches operations, validates and
105105+ de-duplicates them, fills the mempool, and creates bundles when ready.
106106+ """
107107+ last_bundle = self._index['bundles'][-1] if self._index['bundles'] else None
108108+ cursor = last_bundle['end_time'] if last_bundle else None
109109+110110+ while True:
111111+ try:
112112+ print(f"\nFetching operations from cursor: {cursor or 'start'}...")
113113+ fetched_ops = await self._fetch_operations(cursor)
114114+ if not fetched_ops:
115115+ print('No more operations available from PLC directory.')
116116+ break
117117+118118+ self._process_and_validate_ops(fetched_ops)
119119+ cursor = fetched_ops[-1]['createdAt']
120120+121121+ while len(self._mempool) >= BUNDLE_SIZE:
122122+ await self._create_and_save_bundle()
123123+124124+ await asyncio.sleep(0.2) # Be nice to the server
125125+ except httpx.HTTPStatusError as e:
126126+ print(f"\nError: HTTP {e.response.status_code} - {e.response.text}")
127127+ break
128128+ except Exception as e:
129129+ print(f"\nAn unexpected error occurred: {e}")
130130+ break
131131+132132+ await self._save_index()
133133+ print(f"\n---\nProcess complete.")
134134+ print(f"Total bundles in index: {len(self._index['bundles'])}")
135135+ print(f"Operations in mempool: {len(self._mempool)}")
136136+ total_mb = self._index['total_size_bytes'] / 1024 / 1024
137137+ print(f"Total size: {total_mb:.2f} MB")
138138+139139+ # --- Private Helper Methods ---
140140+141141+ async def _fetch_operations(self, after: str | None) -> list[PLCOperation]:
142142+ params = {'count': 1000}
143143+ if after:
144144+ params['after'] = after
145145+146146+ response = await self._http_client.get(f"{PLC_URL}/export", params=params)
147147+ response.raise_for_status()
148148+149149+ lines = response.text.strip().split('\n')
150150+ if not lines or not lines[0]:
151151+ return []
152152+153153+ # Important: The `_raw` key is added here to preserve the original JSON string,
154154+ # ensuring byte-for-byte reproducibility as required by Spec 4.2.
155155+ return [{**json.loads(line), '_raw': line} for line in lines]
156156+157157+ def _process_and_validate_ops(self, ops: list[PLCOperation]):
158158+ last_op = self._mempool[-1] if self._mempool else None
159159+ last_bundle = self._index['bundles'][-1] if self._index['bundles'] else None
160160+ last_timestamp = last_op['createdAt'] if last_op else (last_bundle['end_time'] if last_bundle else '')
161161+162162+ new_ops_count = 0
163163+ for op in ops:
164164+ if op['cid'] in self._seen_cids:
165165+ continue
166166+167167+ if op['createdAt'] < last_timestamp:
168168+ raise ValueError(f"Chronological validation failed: op {op['cid']} at {op['createdAt']} is older than last op at {last_timestamp}")
169169+170170+ self._mempool.append(op)
171171+ self._seen_cids.add(op['cid'])
172172+ last_timestamp = op['createdAt']
173173+ new_ops_count += 1
174174+ print(f" Added {new_ops_count} new operations to mempool.")
175175+176176+ async def _create_and_save_bundle(self):
177177+ bundle_ops = self._mempool[:BUNDLE_SIZE]
178178+ self._mempool = self._mempool[BUNDLE_SIZE:]
179179+180180+ last_bundle = self._index['bundles'][-1] if self._index['bundles'] else None
181181+ parent_hash = last_bundle['hash'] if last_bundle else ''
182182+183183+ # Spec 4.2 & 6.3: Hashing and serialization must be exact.
184184+ jsonl_data = "".join([op['_raw'] + '\n' for op in bundle_ops]).encode('utf-8')
185185+ content_hash = hashlib.sha256(jsonl_data).hexdigest()
186186+ chain_hash = self._calculate_chain_hash(parent_hash, content_hash)
187187+ compressed_data = zstd.compress(jsonl_data, 3)
188188+189189+ bundle_number = self._index['last_bundle'] + 1
190190+ filename = f"{bundle_number:06d}.jsonl.zst"
191191+ (self._bundle_dir / filename).write_bytes(compressed_data)
192192+193193+ self._index['bundles'].append({
194194+ 'bundle_number': bundle_number,
195195+ 'start_time': bundle_ops[0]['createdAt'],
196196+ 'end_time': bundle_ops[-1]['createdAt'],
197197+ 'operation_count': len(bundle_ops),
198198+ 'did_count': len({op['did'] for op in bundle_ops}),
199199+ 'hash': chain_hash, 'content_hash': content_hash, 'parent': parent_hash,
200200+ 'compressed_hash': hashlib.sha256(compressed_data).hexdigest(),
201201+ 'compressed_size': len(compressed_data),
202202+ 'uncompressed_size': len(jsonl_data),
203203+ 'cursor': last_bundle['end_time'] if last_bundle else '',
204204+ 'created_at': datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
205205+ })
206206+ self._index['last_bundle'] = bundle_number
207207+ self._index['total_size_bytes'] += len(compressed_data)
208208+209209+ # Prune `seen_cids` to keep it memory-efficient.
210210+ new_boundary_cids = self._get_boundary_cids(bundle_ops)
211211+ mempool_cids = {op['cid'] for op in self._mempool}
212212+ self._seen_cids = new_boundary_cids.union(mempool_cids)
213213+214214+ await self._save_index()
215215+ print(f"\nCreating bundle {filename}...")
216216+ print(f" ✓ Saved. Hash: {chain_hash[:16]}...")
217217+ print(f" Pruned de-duplication set to {len(self._seen_cids)} CIDs.")
218218+219219+ async def _load_index(self) -> Index:
220220+ try:
221221+ return json.loads((self._bundle_dir / INDEX_FILE).read_text())
222222+ except FileNotFoundError:
223223+ return {'version': '1.0', 'last_bundle': 0, 'updated_at': '', 'total_size_bytes': 0, 'bundles': []}
224224+225225+ async def _save_index(self):
226226+ self._index['updated_at'] = datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
227227+ temp_path = self._bundle_dir / f"{INDEX_FILE}.tmp"
228228+ temp_path.write_text(json.dumps(self._index, indent=2))
229229+ temp_path.rename(self._bundle_dir / INDEX_FILE)
230230+231231+ async def _load_bundle_ops(self, bundle_number: int) -> list[PLCOperation]:
232232+ filename = f"{bundle_number:06d}.jsonl.zst"
233233+ compressed = (self._bundle_dir / filename).read_bytes()
234234+ decompressed = zstd.decompress(compressed).decode('utf-8')
235235+ return [{**json.loads(line), '_raw': line} for line in decompressed.strip().split('\n')]
236236+237237+ # --- Static Utilities ---
238238+239239+ @staticmethod
240240+ def _calculate_chain_hash(parent: str, content_hash: str) -> str:
241241+ data = f"{parent}:{content_hash}" if parent else f"plcbundle:genesis:{content_hash}"
242242+ return hashlib.sha256(data.encode('utf-8')).hexdigest()
243243+244244+ @staticmethod
245245+ def _get_boundary_cids(ops: list[PLCOperation]) -> set[str]:
246246+ if not ops: return set()
247247+ last_time = ops[-1]['createdAt']
248248+ return {op['cid'] for op in reversed(ops) if op['createdAt'] == last_time}
249249+250250+async def main():
251251+ """Entry point for the script."""
252252+ dir_path = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_DIR
253253+ manager = await PlcBundleManager.create(dir_path)
254254+ await manager.run()
255255+256256+if __name__ == "__main__":
257257+ try:
258258+ asyncio.run(main())
259259+ except KeyboardInterrupt:
260260+ print("\nProcess interrupted by user.")
261261+ except Exception as e:
262262+ print(f"\nFATAL ERROR: {e}", file=sys.stderr)
263263+ sys.exit(1)
264264+