···11+# Design Document
22+33+## Overview
44+55+The Tagged String library is a simple, zero-dependency TypeScript library that extracts tagged entities from strings. The design focuses on simplicity and minimal API surface while providing flexible configuration options.
66+77+**Example Usage:**
88+```typescript
99+import chalk from 'chalk'; // Example formatter (not a dependency of parser)
1010+1111+// Define schema for known entity types with formatters
1212+const schema = {
1313+ operation: { type: 'string', format: chalk.yellow },
1414+ stack: { type: 'string', format: chalk.blue },
1515+ changes: { type: 'number', format: (n) => chalk.green(n.toString()) },
1616+ create: 'number', // Can use shorthand without formatter
1717+ update: 'number',
1818+ destroy: 'number'
1919+};
2020+2121+const parser = new TaggedStringParser({ schema });
2222+const result = parser.parse('[operation:OP-123] started with [changes:5] to [stack:ST-456]');
2323+2424+// Access parsed entities
2525+// result.entities: [
2626+// { type: 'operation', value: 'OP-123', parsedValue: 'OP-123', formattedValue: '\x1b[33mOP-123\x1b[39m', inferredType: 'string', position: 0 },
2727+// { type: 'changes', value: '5', parsedValue: 5, formattedValue: '\x1b[32m5\x1b[39m', inferredType: 'number', position: 32 },
2828+// { type: 'stack', value: 'ST-456', parsedValue: 'ST-456', formattedValue: '\x1b[34mST-456\x1b[39m', inferredType: 'string', position: 48 }
2929+// ]
3030+3131+// Get formatted message with all entities formatted
3232+console.log(result.format());
3333+// Output: "\x1b[33mOP-123\x1b[39m started with \x1b[32m5\x1b[39m to \x1b[34mST-456\x1b[39m"
3434+3535+// Unknown entity types are automatically inferred (no formatting)
3636+const result2 = parser.parse('[count:42] [enabled:true] [name:test]');
3737+// result2.entities: [
3838+// { type: 'count', value: '42', parsedValue: 42, formattedValue: '42', inferredType: 'number', position: 0 },
3939+// { type: 'enabled', value: 'true', parsedValue: true, formattedValue: 'true', inferredType: 'boolean', position: 11 },
4040+// { type: 'name', value: 'test', parsedValue: 'test', formattedValue: 'test', inferredType: 'string', position: 26 }
4141+// ]
4242+4343+// Filter by type
4444+result.getEntitiesByType('operation'); // [{ type: 'operation', value: 'OP-123', ... }]
4545+4646+// Get all types
4747+result.getAllTypes(); // ['operation', 'changes', 'stack']
4848+```
4949+5050+## Architecture
5151+5252+The parser follows a simple single-pass scanning architecture:
5353+5454+1. **Input**: Raw string + optional configuration
5555+2. **Scanning**: Character-by-character traversal identifying tag boundaries
5656+3. **Extraction**: Parse entity type and value from tag content
5757+4. **Output**: Collection of parsed entities with original message preserved
5858+5959+The design uses a state machine approach during scanning to track whether the parser is inside or outside a tag.
6060+6161+## Components and Interfaces
6262+6363+### Entity Interface
6464+```typescript
6565+interface Entity {
6666+ type: string; // Entity type name (e.g., 'operation', 'count')
6767+ value: string; // Raw string value from tag
6868+ parsedValue: string | number | boolean; // Typed value based on schema or inference
6969+ formattedValue: string; // Formatted string (via formatter or toString of parsedValue)
7070+ inferredType: 'string' | 'number' | 'boolean'; // The determined type
7171+ position: number; // Character position in original string
7272+}
7373+```
7474+7575+### EntitySchema Type
7676+```typescript
7777+type PrimitiveType = 'string' | 'number' | 'boolean';
7878+7979+interface EntityDefinition {
8080+ type: PrimitiveType;
8181+ format?: (value: any) => string; // Optional formatter function
8282+}
8383+8484+type EntitySchema = Record<string, PrimitiveType | EntityDefinition>;
8585+```
8686+8787+### ParserConfig Interface
8888+```typescript
8989+interface ParserConfig {
9090+ openDelimiter?: string; // Default: '['
9191+ closeDelimiter?: string; // Default: ']'
9292+ typeSeparator?: string; // Default: ':'
9393+ schema?: EntitySchema; // Optional schema for known entity types
9494+}
9595+```
9696+9797+### ParseResult Interface
9898+```typescript
9999+interface ParseResult {
100100+ originalMessage: string;
101101+ entities: Entity[];
102102+103103+ // Utility methods
104104+ getEntitiesByType(type: string): Entity[];
105105+ getAllTypes(): string[];
106106+ format(): string; // Returns message with all entities replaced by their formattedValue
107107+}
108108+```
109109+110110+### TaggedStringParser Class
111111+```typescript
112112+class TaggedStringParser {
113113+ constructor(config?: ParserConfig);
114114+ parse(message: string): ParseResult;
115115+116116+ // Internal helper methods
117117+ private parseValue(type: string, rawValue: string): {
118118+ parsedValue: string | number | boolean,
119119+ formattedValue: string,
120120+ inferredType: 'string' | 'number' | 'boolean'
121121+ };
122122+ private inferType(value: string): 'string' | 'number' | 'boolean';
123123+ private applyFormatter(type: string, parsedValue: any): string;
124124+}
125125+```
126126+127127+## Data Models
128128+129129+### Internal Scanner State
130130+The parser maintains minimal state during scanning:
131131+- Current position in string
132132+- Whether currently inside a tag
133133+- Current tag content buffer
134134+- Accumulated entities array
135135+136136+### Entity Representation
137137+Entities are plain objects with six properties:
138138+- `type`: The classification/name of the entity (e.g., 'operation', 'count')
139139+- `value`: The raw string value extracted from the tag
140140+- `parsedValue`: The typed value (string, number, or boolean) based on schema or inference
141141+- `formattedValue`: The string representation after applying formatter (or toString of parsedValue)
142142+- `inferredType`: The determined primitive type
143143+- `position`: Where in the original string the tag started (useful for debugging)
144144+145145+### Schema, Type Inference, and Formatting
146146+The parser supports three layers of entity processing:
147147+148148+1. **Schema-based Type Parsing (Known Entities)**: When a schema is provided and an entity type matches a schema key, the parser uses the schema's specified type to parse the value.
149149+150150+2. **Inference-based Type Parsing (Unknown Entities)**: When no schema is provided or an entity type is not in the schema, the parser automatically infers the type:
151151+ - Numbers: Values matching `/^-?\d+(\.\d+)?$/` are parsed as numbers
152152+ - Booleans: Values matching `true` or `false` (case-insensitive) are parsed as booleans
153153+ - Strings: Everything else defaults to string type
154154+155155+3. **Formatting**: After parsing, entities can be formatted:
156156+ - If a formatter function is provided in the schema, it's applied to the parsedValue
157157+ - If no formatter is provided, the parsedValue is converted to string
158158+ - The formattedValue is used when calling `result.format()` to reconstruct the message
159159+160160+This three-layer approach allows users to:
161161+- Define expected entity types explicitly
162162+- Handle ad-hoc entities automatically
163163+- Apply custom formatting (colors, trimming, etc.) per entity type
164164+165165+## Error Handling
166166+167167+The parser follows a lenient error handling strategy:
168168+169169+1. **Malformed Tags**: Skip and continue parsing
170170+ - Unclosed tags: Ignore the incomplete tag
171171+ - Missing type separator: Treat entire content as value with empty type
172172+ - Empty tags: Skip entirely
173173+174174+2. **Invalid Configuration**: Throw errors during construction
175175+ - Empty delimiters
176176+ - Delimiter conflicts (open === close)
177177+ - Multi-character delimiters that could cause ambiguity
178178+179179+3. **Edge Cases**:
180180+ - Empty strings: Return empty entity array
181181+ - Nested tags: Not supported, inner delimiters treated as literal characters
182182+ - Escaped delimiters: Not supported in v1 (future enhancement)
183183+184184+## Testing Strategy
185185+186186+### Unit Tests
187187+Focus on core parsing logic:
188188+- Single entity extraction
189189+- Multiple entities in one message
190190+- Messages without entities
191191+- Malformed tag handling
192192+- Custom delimiter configuration
193193+- Entity ordering preservation
194194+- Type-based filtering
195195+- Schema-based type parsing
196196+- Automatic type inference for unknown entities
197197+- Number, boolean, and string type detection
198198+199199+### Test Structure
200200+```typescript
201201+describe('TaggedStringParser', () => {
202202+ describe('parse', () => {
203203+ it('should extract single entity');
204204+ it('should extract multiple entities');
205205+ it('should handle messages without entities');
206206+ it('should skip malformed tags');
207207+ it('should preserve entity order');
208208+ });
209209+210210+ describe('schema and type inference', () => {
211211+ it('should parse known entities using schema');
212212+ it('should infer number type for numeric values');
213213+ it('should infer boolean type for true/false values');
214214+ it('should default to string type for other values');
215215+ it('should handle mixed known and unknown entities');
216216+ });
217217+218218+ describe('configuration', () => {
219219+ it('should use custom delimiters');
220220+ it('should throw on invalid config');
221221+ });
222222+223223+ describe('ParseResult', () => {
224224+ it('should filter entities by type');
225225+ it('should return all entity types');
226226+ });
227227+});
228228+```
229229+230230+## Real-World Examples
231231+232232+Based on IaC system logging patterns:
233233+234234+```typescript
235235+// Operation lifecycle
236236+parser.parse('[operation:OP-123] started with [changes:5] to [stack:ST-456]');
237237+parser.parse('[operation:OP-123] completed [changes:5] to [stack:ST-456]');
238238+parser.parse('[operation:OP-123] failed: [reason:"Error message"]');
239239+240240+// Planning
241241+parser.parse('[blueprint:BP-123] planning for [stack:ST-456]');
242242+parser.parse('[blueprint:BP-123] plan complete with [create:2] [update:3] [destroy:1] for [stack:ST-456]');
243243+244244+// Resource commands
245245+parser.parse('[action:create] executing for [resource:RS-123] [resourceName:"my-function"] [type:function]');
246246+parser.parse('[action:create] completed for [resource:RS-123] [externalId:EXT-789]');
247247+parser.parse('[action:create] failed for [resource:RS-123]: [error:"Error message"]');
248248+249249+// Resource type-specific
250250+parser.parse('[resourceType:function] creating [resourceName:"my-function"]');
251251+parser.parse('[resourceType:database] updating [resourceName:"user-db"]');
252252+```
253253+254254+### Handling Quoted Values
255255+Note that values may contain quotes (e.g., `[resourceName:"my-function"]`). The parser treats everything between the type separator and closing delimiter as the value, including quotes. This keeps the implementation simple while preserving the original data format.
256256+257257+## Runtime Environment
258258+259259+### Node.js v24 Native TypeScript Support
260260+The parser is designed to run directly with Node.js v24's native TypeScript execution:
261261+262262+```bash
263263+# Run directly without compilation
264264+node --experimental-strip-types parser.ts
265265+266266+# Or with the simpler flag (Node v24+)
267267+node parser.ts
268268+```
269269+270270+No build step or compilation to JavaScript is required. The parser can be developed and executed as pure TypeScript files.
271271+272272+## Implementation Notes
273273+274274+### Performance Considerations
275275+- Single-pass parsing: O(n) time complexity
276276+- Type inference uses simple string checks (no regex for numbers/booleans)
277277+- Minimal memory allocation (reuse buffers where possible)
278278+- Schema lookup is O(1) using object property access
279279+280280+### Design Decisions
281281+282282+**Why character-by-character scanning?**
283283+- Simpler to understand and maintain
284284+- No regex complexity
285285+- Easier to handle edge cases
286286+- Predictable performance
287287+288288+**Why lenient error handling?**
289289+- Strings should never break parsing
290290+- Partial data is better than no data
291291+- Aligns with robustness principle: "Be liberal in what you accept"
292292+293293+**Why include position in Entity?**
294294+- Useful for debugging
295295+- Minimal overhead
296296+- Enables future features (e.g., highlighting in UI)
297297+298298+**Why no nested tag support?**
299299+- Keeps implementation simple
300300+- Rare use case for strings
301301+- Can be added later if needed
302302+303303+**Why both schema and inference?**
304304+- Schema provides explicit control for important entity types
305305+- Inference handles ad-hoc entities without configuration
306306+- Consumers get typed values for better formatting control
307307+- Balances flexibility with type safety
308308+309309+**Why Node v24 without compilation?**
310310+- Faster development iteration (no build step)
311311+- Simpler project setup
312312+- Native TypeScript support is stable in Node v24
313313+- Reduces tooling complexity
+89
.kiro/specs/tagged-string/requirements.md
···11+# Requirements Document
22+33+## Introduction
44+55+The Tagged String library is a lightweight TypeScript string parsing library with zero dependencies. It extracts structured entity information from strings using a tag-based syntax. The parser identifies and extracts tagged entities (such as data models, counts, identifiers, and other structured data) from strings, making them programmatically accessible while maintaining the readability of the original message.
66+77+## Glossary
88+99+- **Parser**: The system component that processes input strings and extracts tagged entities
1010+- **Entity**: A structured piece of information embedded in a string using tag syntax
1111+- **Tag**: A syntactic marker that identifies the start and type of an entity within a string
1212+- **String**: A human-readable string that may contain zero or more tagged entities
1313+- **Entity Type**: A classification label for an entity (e.g., model, count, identifier)
1414+- **Schema**: A user-defined specification that maps entity type names to their expected data types
1515+- **Known Entity**: An entity whose type is defined in the parser's schema
1616+- **Unknown Entity**: An entity whose type is not defined in the schema, requiring automatic type inference
1717+- **Primitive Type**: A basic data type (string, number, boolean) inferred from entity values
1818+1919+## Requirements
2020+2121+### Requirement 1
2222+2323+**User Story:** As a developer, I want to parse tagged entities from strings, so that I can extract structured data while keeping strings human-readable
2424+2525+#### Acceptance Criteria
2626+2727+1. WHEN the Parser receives a string containing tagged entities, THE Parser SHALL extract all entities with their type and value
2828+2. WHEN the Parser receives a string without any tagged entities, THE Parser SHALL return an empty entity collection
2929+3. THE Parser SHALL preserve the original string text during parsing
3030+4. THE Parser SHALL support multiple entities within a single string
3131+5. WHEN the Parser encounters malformed tag syntax, THE Parser SHALL skip the malformed tag and continue parsing
3232+3333+### Requirement 2
3434+3535+**User Story:** As a developer, I want to define custom tag syntax, so that I can adapt the parser to different conventions
3636+3737+#### Acceptance Criteria
3838+3939+1. THE Parser SHALL accept configuration for tag opening delimiter
4040+2. THE Parser SHALL accept configuration for tag closing delimiter
4141+3. THE Parser SHALL accept configuration for type-value separator syntax
4242+4. WHEN no configuration is provided, THE Parser SHALL use default tag syntax
4343+5. THE Parser SHALL validate configuration parameters before parsing
4444+4545+### Requirement 3
4646+4747+**User Story:** As a developer, I want to access parsed entities by type, so that I can easily retrieve specific kinds of information from strings
4848+4949+#### Acceptance Criteria
5050+5151+1. THE Parser SHALL provide a method to retrieve all entities of a specific type
5252+2. THE Parser SHALL provide a method to retrieve all parsed entities
5353+3. THE Parser SHALL return entities in the order they appear in the string
5454+4. WHEN no entities of a requested type exist, THE Parser SHALL return an empty collection
5555+5656+### Requirement 4
5757+5858+**User Story:** As a developer, I want to define a schema for known entity types, so that the parser can provide typed values for entities I care about
5959+6060+#### Acceptance Criteria
6161+6262+1. THE Parser SHALL accept an optional schema mapping entity type names to expected data types
6363+2. WHEN the Parser encounters a Known Entity, THE Parser SHALL parse the value according to the schema type
6464+3. WHEN the Parser encounters an Unknown Entity, THE Parser SHALL infer the primitive type from the value
6565+4. THE Parser SHALL support string, number, and boolean primitive types for Unknown Entities
6666+5. THE Parser SHALL expose typed values to consumers for programmatic formatting
6767+6868+### Requirement 5
6969+7070+**User Story:** As a developer, I want to apply custom formatters to entity values, so that I can control how entities are displayed in output
7171+7272+#### Acceptance Criteria
7373+7474+1. THE Parser SHALL accept optional formatter functions in the schema for each entity type
7575+2. WHEN a formatter is provided for an entity type, THE Parser SHALL apply the formatter to the parsed value
7676+3. WHEN no formatter is provided, THE Parser SHALL convert the parsed value to string
7777+4. THE Parser SHALL store the formatted result in the Entity formattedValue property
7878+5. THE Parser SHALL provide a format method on ParseResult that reconstructs the message with formatted entities
7979+8080+### Requirement 6
8181+8282+**User Story:** As a developer, I want the parser to have zero runtime dependencies and run directly with Node.js, so that I can use it without compilation overhead
8383+8484+#### Acceptance Criteria
8585+8686+1. THE Parser SHALL be implemented using only TypeScript standard library features
8787+2. THE Parser SHALL not require any third-party runtime dependencies
8888+3. THE Parser SHALL be executable directly with Node.js v24 native TypeScript support
8989+4. THE Parser SHALL not require compilation to JavaScript for execution
+121
.kiro/specs/tagged-string/tasks.md
···11+# Implementation Plan
22+33+- [x] 1. Set up project structure and type definitions
44+ - Create TypeScript configuration file (tsconfig.json) for Node v24 native execution
55+ - Define Entity interface with type, value, parsedValue, formattedValue, inferredType, and position properties
66+ - Define PrimitiveType and EntityDefinition types for schema with optional formatters
77+ - Define EntitySchema type for mapping entity types to primitive types or definitions with formatters
88+ - Define ParserConfig interface with delimiter, separator, and schema options
99+ - Define ParseResult interface with utility methods including format()
1010+ - _Requirements: 6.1, 6.2, 6.3, 6.4_
1111+1212+- [x] 2. Implement ParseResult class
1313+ - [x] 2.1 Create ParseResult class with constructor accepting original message and entities array
1414+ - Store original message and entities as properties
1515+ - _Requirements: 1.3_
1616+1717+ - [x] 2.2 Implement getEntitiesByType method
1818+ - Filter entities array by type parameter
1919+ - Return filtered array in original order
2020+ - Return empty array when no matches found
2121+ - _Requirements: 3.1, 3.2, 3.3, 3.4_
2222+2323+ - [x] 2.3 Implement getAllTypes method
2424+ - Extract unique entity types from entities array
2525+ - Return array of type strings
2626+ - _Requirements: 3.2_
2727+2828+ - [x] 2.4 Implement format method
2929+ - Reconstruct original message replacing tags with formattedValue from entities
3030+ - Use entity position to correctly place formatted values
3131+ - Return formatted string
3232+ - _Requirements: 5.5_
3333+3434+- [x] 3. Implement TaggedStringParser class
3535+ - [x] 3.1 Create parser class with configuration support
3636+ - Accept optional ParserConfig in constructor (including schema)
3737+ - Set default delimiters: '[' and ']'
3838+ - Set default type separator: ':'
3939+ - Store schema for entity type lookup
4040+ - Validate configuration (no empty delimiters, no delimiter conflicts)
4141+ - _Requirements: 2.1, 2.2, 2.3, 2.4, 2.5, 4.1, 5.1_
4242+4343+ - [x] 3.2 Implement type inference and parsing helpers
4444+ - Create inferType method to detect number, boolean, or string from raw value
4545+ - Create parseValue method that uses schema (if available) or falls back to inference
4646+ - Handle number parsing (including decimals and negatives)
4747+ - Handle boolean parsing (case-insensitive true/false)
4848+ - Default to string for all other values
4949+ - _Requirements: 4.2, 4.3, 4.4, 4.5_
5050+5151+ - [x] 3.3 Implement formatter application
5252+ - Create applyFormatter method that checks schema for formatter function
5353+ - If formatter exists, apply it to parsedValue and return result
5454+ - If no formatter, convert parsedValue to string
5555+ - Store result as formattedValue in entity
5656+ - _Requirements: 5.1, 5.2, 5.3, 5.4_
5757+5858+ - [x] 3.4 Implement core parsing logic in parse method
5959+ - Create character-by-character scanner
6060+ - Track parser state (inside/outside tag)
6161+ - Accumulate tag content when inside tag boundaries
6262+ - Extract entity type and raw value from tag content using separator
6363+ - Call parseValue to get typed parsedValue and inferredType
6464+ - Call applyFormatter to get formattedValue
6565+ - Record entity position in original string
6666+ - Handle malformed tags by skipping and continuing
6767+ - Return ParseResult with original message and extracted entities
6868+ - _Requirements: 1.1, 1.2, 1.3, 1.4, 1.5, 3.3, 4.2, 4.5, 5.4_
6969+7070+ - [x] 3.5 Handle edge cases in parsing
7171+ - Return empty entities array for empty input strings
7272+ - Skip unclosed tags at end of string
7373+ - Handle tags without type separator (treat as value with empty type)
7474+ - Skip empty tags
7575+ - Preserve quoted values in entity values
7676+ - _Requirements: 1.2, 1.5_
7777+7878+- [x] 4. Write unit tests for ParseResult
7979+ - Test getEntitiesByType with matching and non-matching types
8080+ - Test getAllTypes with multiple and zero entities
8181+ - Test entity order preservation
8282+ - Test format method reconstructs message with formatted entities
8383+ - Test format method with entities that have custom formatters
8484+ - _Requirements: 3.1, 3.2, 3.3, 3.4, 5.5_
8585+8686+- [x] 5. Write unit tests for TaggedStringParser
8787+ - Test single entity extraction
8888+ - Test multiple entities in one message
8989+ - Test messages without entities
9090+ - Test custom delimiter configuration
9191+ - Test configuration validation (invalid delimiters)
9292+ - Test malformed tag handling (unclosed, missing separator, empty)
9393+ - Test entity position tracking
9494+ - Test schema-based type parsing for known entities
9595+ - Test automatic type inference for unknown entities (numbers, booleans, strings)
9696+ - Test mixed known and unknown entities in same message
9797+ - Test formatter functions applied to entity values
9898+ - Test entities without formatters default to string conversion
9999+ - Test shorthand schema syntax (just type) vs full EntityDefinition with formatter
100100+ - Test real-world IaC log examples from design document
101101+ - _Requirements: 1.1, 1.2, 1.4, 1.5, 2.1, 2.2, 2.3, 2.4, 2.5, 4.2, 4.3, 4.4, 4.5, 5.1, 5.2, 5.3, 5.4_
102102+103103+- [x] 6. Create example usage file
104104+ - Create examples.ts demonstrating basic usage with schema
105105+ - Show schema definition for known entity types with formatters
106106+ - Include example with chalk or simple string formatters (String.trim, toUpperCase, etc.)
107107+ - Include IaC logging examples from design document
108108+ - Show custom configuration usage
109109+ - Show entity filtering by type
110110+ - Demonstrate accessing parsedValue, formattedValue, and inferredType properties
111111+ - Show examples of automatic type inference for unknown entities
112112+ - Demonstrate format() method to get fully formatted message
113113+ - _Requirements: 1.1, 2.1, 3.1, 4.2, 4.5, 5.1, 5.5_
114114+115115+- [x] 7. Create package.json and Node v24 configuration
116116+ - Set up package.json with TypeScript as dev dependency only
117117+ - Configure for Node v24 native TypeScript execution
118118+ - Add script to run examples directly without compilation
119119+ - Set module type and entry points
120120+ - Verify zero runtime dependencies
121121+ - _Requirements: 6.1, 6.2, 6.3, 6.4_
+15
.kiro/steering/product.md
···11+# Product Overview
22+33+Tagged String is a lightweight, zero-dependency TypeScript library for extracting structured entity information from strings using tag-based syntax (e.g., `[operation:OP-123]`).
44+55+## Core Purpose
66+77+Parse tagged entities from strings while maintaining readability, enabling programmatic access to structured data embedded in text. Designed for IaC systems and other applications that need to extract typed information from string output.
88+99+## Key Features
1010+1111+- Tag-based entity extraction with configurable delimiters
1212+- Schema-based type parsing for known entities with optional formatters
1313+- Automatic type inference (string, number, boolean) for unknown entities
1414+- Entity filtering and message reconstruction with formatted values
1515+- Zero runtime dependencies, runs natively on Node.js v24
+43
.kiro/steering/structure.md
···11+# Project Structure
22+33+## Directory Layout
44+55+```
66+.
77+├── .kiro/
88+│ ├── specs/ # Feature specifications (requirements, design, tasks)
99+│ └── steering/ # AI assistant guidance documents
1010+├── src/
1111+│ └── types.ts # Core type definitions and interfaces
1212+├── dist/ # Compiled output (generated)
1313+├── TODO.md # Project task list (keep updated)
1414+└── tsconfig.json # TypeScript configuration
1515+```
1616+1717+## Code Organization
1818+1919+### Type Definitions (`src/types.ts`)
2020+2121+Central location for all TypeScript interfaces and types:
2222+- `PrimitiveType`: Supported primitive types (string, number, boolean)
2323+- `EntityDefinition`: Entity schema with optional formatter
2424+- `EntitySchema`: Schema mapping for entity types
2525+- `Entity`: Parsed entity structure with type, value, parsedValue, formattedValue, inferredType, position
2626+- `ParserConfig`: Parser configuration options
2727+- `ParseResult`: Parse result with utility methods
2828+2929+### Implementation Files (to be created)
3030+3131+- Parser class implementation
3232+- ParseResult class implementation
3333+- Helper functions for type inference and formatting
3434+3535+## Conventions
3636+3737+- All interfaces and types defined before implementation
3838+- Comprehensive JSDoc comments on public interfaces
3939+- Single-pass parsing architecture
4040+- Lenient error handling (skip malformed input, don't throw)
4141+- Position tracking for all extracted entities
4242+- Keep TODO.md updated with current tasks and progress
4343+- Add issues, bugs, and important features to TODO.md when discovered
+29
.kiro/steering/tech.md
···11+# Technology Stack
22+33+## Runtime
44+55+- **Node.js**: v24+ with native TypeScript support
66+- **Dependencies**: Zero runtime dependencies (core principle)
77+88+## TS Configuration
99+1010+- **Target**: ES2022
1111+- **Module System**: ESNext with Node resolution
1212+- **Strict Mode**: Enabled for type safety
1313+1414+## Common Commands
1515+1616+```bash
1717+# Run tests; no flags required
1818+npm t
1919+2020+# Run TypeScript directly (Node v24+)
2121+node src/parser.ts
2222+```
2323+2424+## Development Principles
2525+2626+- No third-party runtime dependencies
2727+- Direct TypeScript execution without build step during development
2828+- TypeScript standard library only
2929+- Simple, maintainable implementations over complex optimizations
+177
README.md
···11+# Tagged String
22+33+Extract structured data from strings using tag-based syntax. Zero dependencies, runs natively on Node.js v24+.
44+55+```typescript
66+import { TaggedStringParser } from 'tagged-string';
77+88+const parser = new TaggedStringParser();
99+const result = parser.parse('[operation:deploy] started with [changes:5] to [stack:prod-stack]');
1010+1111+console.log(result.entities);
1212+// [
1313+// { type: 'operation', value: 'deploy', parsedValue: 'deploy', inferredType: 'string', ... },
1414+// { type: 'changes', value: '5', parsedValue: 5, inferredType: 'number', ... },
1515+// { type: 'stack', value: 'prod-stack', parsedValue: 'prod-stack', inferredType: 'string', ... }
1616+// ]
1717+```
1818+1919+## Installation
2020+2121+```bash
2222+npm install tagged-string
2323+```
2424+2525+Requires Node.js v24 or later for native TypeScript support.
2626+2727+## Usage
2828+2929+### Basic Parsing
3030+3131+The parser extracts `[type:value]` tags from strings and automatically infers types:
3232+3333+```typescript
3434+const parser = new TaggedStringParser();
3535+const result = parser.parse('[count:42] items processed, [enabled:true] flag set');
3636+3737+result.entities.forEach(entity => {
3838+ console.log(entity.type, entity.parsedValue, entity.inferredType);
3939+});
4040+// count 42 number
4141+// enabled true boolean
4242+```
4343+4444+### Schema-Based Parsing
4545+4646+Define a schema to enforce types and add formatters:
4747+4848+```typescript
4949+const parser = new TaggedStringParser({
5050+ schema: {
5151+ operation: { type: 'string', format: (v) => v.toUpperCase() },
5252+ changes: { type: 'number', format: (n) => `${n} changes` },
5353+ stack: 'string', // shorthand without formatter
5454+ }
5555+});
5656+5757+const result = parser.parse('[operation:deploy] started with [changes:5] to [stack:prod-stack]');
5858+console.log(result.format());
5959+// "DEPLOY started with 5 changes to prod-stack"
6060+```
6161+6262+### Filtering Entities
6363+6464+```typescript
6565+const result = parser.parse('[action:create] [resource:function] with [count:3] instances');
6666+6767+result.getEntitiesByType('action'); // [{ type: 'action', parsedValue: 'create', ... }]
6868+result.getAllTypes(); // ['action', 'resource', 'count']
6969+```
7070+7171+### Custom Delimiters
7272+7373+```typescript
7474+const parser = new TaggedStringParser({
7575+ openDelimiter: '{{',
7676+ closeDelimiter: '}}',
7777+ typeSeparator: '=',
7878+ schema: {
7979+ user: { type: 'string', format: (v) => `@${v}` }
8080+ }
8181+});
8282+8383+const result = parser.parse('User {{user=john}} performed {{count=10}} actions');
8484+console.log(result.format());
8585+// "User @john performed 10 actions"
8686+```
8787+8888+## API
8989+9090+### `TaggedStringParser`
9191+9292+```typescript
9393+constructor(config?: ParserConfig)
9494+```
9595+9696+**Config options:**
9797+- `openDelimiter` (default: `'['`) - Opening tag delimiter
9898+- `closeDelimiter` (default: `']'`) - Closing tag delimiter
9999+- `typeSeparator` (default: `':'`) - Separator between type and value
100100+- `schema` - Entity type definitions with optional formatters
101101+102102+```typescript
103103+parse(message: string): ParseResult
104104+```
105105+106106+Extracts all tagged entities from the message.
107107+108108+### `ParseResult`
109109+110110+**Properties:**
111111+- `originalMessage: string` - The input message
112112+- `entities: Entity[]` - Extracted entities in order
113113+114114+**Methods:**
115115+- `getEntitiesByType(type: string): Entity[]` - Filter entities by type
116116+- `getAllTypes(): string[]` - Get unique entity types
117117+- `format(): string` - Reconstruct message with formatted values
118118+119119+### `Entity`
120120+121121+```typescript
122122+interface Entity {
123123+ type: string; // Entity type name
124124+ value: string; // Raw string value
125125+ parsedValue: string | number | boolean; // Typed value
126126+ formattedValue: string; // Formatted display value
127127+ inferredType: 'string' | 'number' | 'boolean';
128128+ position: number; // Start position in message
129129+ endPosition: number; // End position in message
130130+}
131131+```
132132+133133+### `EntitySchema`
134134+135135+```typescript
136136+type EntitySchema = Record<string, PrimitiveType | EntityDefinition>;
137137+138138+interface EntityDefinition {
139139+ type: 'string' | 'number' | 'boolean';
140140+ format?: (value: any) => string;
141141+}
142142+```
143143+144144+## Type Inference
145145+146146+Without a schema, the parser infers types automatically:
147147+148148+- **number**: Matches `/^-?\d+(\.\d+)?$/` (integers and decimals)
149149+- **boolean**: `'true'` or `'false'` (case-insensitive)
150150+- **string**: Everything else
151151+152152+## Error Handling
153153+154154+The parser is lenient by design:
155155+- Malformed tags are skipped
156156+- Unclosed tags at end of string are ignored
157157+- Empty tag content is skipped
158158+- Invalid config throws on construction
159159+160160+## Examples
161161+162162+Run the included examples:
163163+164164+```bash
165165+node src/examples.ts
166166+```
167167+168168+## Development
169169+170170+```bash
171171+npm test # Run tests
172172+node src/examples.ts # Run examples
173173+```
174174+175175+## License
176176+177177+MIT
+74
TODO.md
···11+# TODO / Known Issues
22+33+## Known Limitations
44+55+(No known limitations at this time)
66+77+---
88+99+## Recently Resolved
1010+1111+### ✅ ParseResult.format() doesn't support custom delimiters
1212+**Status:** RESOLVED
1313+**Resolved:** November 11, 2025
1414+**Solution:** Added `endPosition` field to Entity interface and updated format() method to use stored positions
1515+1616+The `ParseResult.format()` method now correctly handles custom delimiters by storing the tag end position during parsing. The Entity interface was extended with an `endPosition` field, and the format() method uses this stored position instead of searching for hardcoded delimiters.
1717+1818+**Implementation:**
1919+- Added `endPosition: number` to Entity interface
2020+- Modified TaggedStringParser to calculate and store tag end positions
2121+- Updated ParseResult.format() to use entity.endPosition
2222+- Added comprehensive tests for custom delimiter formatting
2323+- Updated examples to demonstrate custom delimiter usage
2424+2525+---
2626+2727+## Future Enhancements
2828+2929+### Nested Delimiter Support
3030+**Current behavior:** `[outer:[inner:value]]` extracts `[inner:value` as the value (stops at first closing delimiter)
3131+**Consideration:** Add support for properly parsing nested tags, either by:
3232+- Escaping inner delimiters
3333+- Counting delimiter depth
3434+- Supporting a different syntax for nested structures
3535+3636+**Use case:** Complex structured data like `[config:{host:localhost,port:8080}]`
3737+3838+### Empty Type Handling
3939+**Current behavior:** `[:value]` works but creates entity with empty string type
4040+**Consideration:** Decide on explicit behavior:
4141+- Allow empty types as valid (current)
4242+- Treat as error/skip
4343+- Use a default type name like `"_default"` or `"value"`
4444+4545+**Use case:** Quick tagging without type classification: `[:important]` or `[:TODO]`
4646+4747+### Multiple Separator Behavior
4848+**Current behavior:** `[type:value:extra]` splits on first `:` only, value becomes `value:extra`
4949+**Consideration:** Document this behavior explicitly or add options:
5050+- Split on first separator only (current, implicit)
5151+- Support escaped separators: `[type:value\:with\:colons]`
5252+- Allow configuration for multi-part values
5353+5454+**Use case:** Values containing the separator character like URLs or timestamps
5555+5656+### NaN Handling for Invalid Numbers
5757+**Current behavior:** `[count:abc]` with `number` schema produces `NaN` as parsedValue
5858+**Consideration:** Add validation/error handling:
5959+- Throw error on invalid number
6060+- Fall back to string type
6161+- Add a `parseError` field to Entity
6262+- Provide a validation callback in schema
6363+6464+**Use case:** Catching malformed numeric data early
6565+6666+### Formatter Error Handling
6767+**Current behavior:** If a custom formatter throws an error, parsing crashes
6868+**Consideration:** Make parsing more resilient:
6969+- Catch formatter errors and fall back to `String(value)`
7070+- Add error callback to config
7171+- Add `formatterError` field to Entity
7272+- Skip entities with formatter errors
7373+7474+**Use case:** Robust parsing even with buggy custom formatters
···11+import type { Entity, ParseResult as IParseResult } from './types.ts'
22+33+/**
44+ * Implementation of ParseResult interface
55+ * Holds the original message and extracted entities with utility methods
66+ */
77+export class ParseResult implements IParseResult {
88+ public readonly originalMessage: string
99+ public readonly entities: Entity[]
1010+ private readonly closeDelimiter?: string
1111+1212+ constructor(
1313+ originalMessage: string,
1414+ entities: Entity[],
1515+ closeDelimiter?: string,
1616+ ) {
1717+ this.originalMessage = originalMessage
1818+ this.entities = entities
1919+ this.closeDelimiter = closeDelimiter
2020+ }
2121+2222+ /**
2323+ * Get all entities of a specific type
2424+ * @param type - The entity type to filter by
2525+ * @returns Array of entities matching the type, in original order
2626+ */
2727+ getEntitiesByType(type: string): Entity[] {
2828+ return this.entities.filter((entity) => entity.type === type)
2929+ }
3030+3131+ /**
3232+ * Get all unique entity types found in the message
3333+ * @returns Array of unique type strings
3434+ */
3535+ getAllTypes(): string[] {
3636+ const types = new Set<string>()
3737+ for (const entity of this.entities) {
3838+ types.add(entity.type)
3939+ }
4040+ return Array.from(types)
4141+ }
4242+4343+ /**
4444+ * Reconstruct the message with formatted entity values
4545+ * Replaces tags with their formattedValue from entities
4646+ * @returns Formatted string with all entities replaced
4747+ */
4848+ format(): string {
4949+ if (this.entities.length === 0) {
5050+ return this.originalMessage
5151+ }
5252+5353+ // Sort entities by position to process them in order
5454+ const sortedEntities = [...this.entities].sort(
5555+ (a, b) => a.position - b.position,
5656+ )
5757+5858+ let result = ''
5959+ let lastIndex = 0
6060+6161+ for (const entity of sortedEntities) {
6262+ // Add text before this entity
6363+ result += this.originalMessage.substring(lastIndex, entity.position)
6464+6565+ // Add the formatted value instead of the original tag
6666+ result += entity.formattedValue
6767+6868+ // Use stored endPosition if available, otherwise fall back to searching
6969+ let tagEnd: number
7070+ if (entity.endPosition !== undefined) {
7171+ tagEnd = entity.endPosition
7272+ } else {
7373+ // Fallback: search for closing delimiter
7474+ const delimiter = this.closeDelimiter ?? ']'
7575+ const closingDelimiterIndex = this.originalMessage.indexOf(
7676+ delimiter,
7777+ entity.position,
7878+ )
7979+ tagEnd =
8080+ closingDelimiterIndex !== -1
8181+ ? closingDelimiterIndex + delimiter.length
8282+ : entity.position
8383+ }
8484+8585+ lastIndex = tagEnd
8686+ }
8787+8888+ // Add remaining text after the last entity
8989+ result += this.originalMessage.substring(lastIndex)
9090+9191+ return result
9292+ }
9393+}
+593
src/TaggedStringParser.test.ts
···11+import assert from 'node:assert'
22+import { describe, test } from 'node:test'
33+import { TaggedStringParser } from './TaggedStringParser.ts'
44+import type { EntitySchema } from './types.ts'
55+66+describe('TaggedStringParser', () => {
77+ describe('basic parsing', () => {
88+ test('should extract single entity', () => {
99+ const parser = new TaggedStringParser()
1010+ const result = parser.parse('[operation:OP-123]')
1111+1212+ assert.strictEqual(result.entities.length, 1)
1313+ assert.strictEqual(result.entities[0].type, 'operation')
1414+ assert.strictEqual(result.entities[0].value, 'OP-123')
1515+ assert.strictEqual(result.entities[0].position, 0)
1616+ })
1717+1818+ test('should extract multiple entities in one message', () => {
1919+ const parser = new TaggedStringParser()
2020+ const result = parser.parse(
2121+ '[operation:OP-123] started with [changes:5] to [stack:ST-456]',
2222+ )
2323+2424+ assert.strictEqual(result.entities.length, 3)
2525+ assert.strictEqual(result.entities[0].type, 'operation')
2626+ assert.strictEqual(result.entities[0].value, 'OP-123')
2727+ assert.strictEqual(result.entities[1].type, 'changes')
2828+ assert.strictEqual(result.entities[1].value, '5')
2929+ assert.strictEqual(result.entities[2].type, 'stack')
3030+ assert.strictEqual(result.entities[2].value, 'ST-456')
3131+ })
3232+3333+ test('should handle messages without entities', () => {
3434+ const parser = new TaggedStringParser()
3535+ const result = parser.parse('This is a plain log message')
3636+3737+ assert.strictEqual(result.entities.length, 0)
3838+ assert.strictEqual(result.originalMessage, 'This is a plain log message')
3939+ })
4040+4141+ test('should handle empty string', () => {
4242+ const parser = new TaggedStringParser()
4343+ const result = parser.parse('')
4444+4545+ assert.strictEqual(result.entities.length, 0)
4646+ assert.strictEqual(result.originalMessage, '')
4747+ })
4848+ })
4949+5050+ describe('custom delimiter configuration', () => {
5151+ test('should use custom delimiters', () => {
5252+ const parser = new TaggedStringParser({
5353+ openDelimiter: '{',
5454+ closeDelimiter: '}',
5555+ })
5656+ const result = parser.parse('{operation:OP-123} started')
5757+5858+ assert.strictEqual(result.entities.length, 1)
5959+ assert.strictEqual(result.entities[0].type, 'operation')
6060+ assert.strictEqual(result.entities[0].value, 'OP-123')
6161+ })
6262+6363+ test('should use custom type separator', () => {
6464+ const parser = new TaggedStringParser({
6565+ typeSeparator: '=',
6666+ })
6767+ const result = parser.parse('[operation=OP-123]')
6868+6969+ assert.strictEqual(result.entities.length, 1)
7070+ assert.strictEqual(result.entities[0].type, 'operation')
7171+ assert.strictEqual(result.entities[0].value, 'OP-123')
7272+ })
7373+7474+ test('should use multiple custom configurations together', () => {
7575+ const parser = new TaggedStringParser({
7676+ openDelimiter: '<',
7777+ closeDelimiter: '>',
7878+ typeSeparator: '|',
7979+ })
8080+ const result = parser.parse('<operation|OP-123> started')
8181+8282+ assert.strictEqual(result.entities.length, 1)
8383+ assert.strictEqual(result.entities[0].type, 'operation')
8484+ assert.strictEqual(result.entities[0].value, 'OP-123')
8585+ })
8686+ })
8787+8888+ describe('configuration validation', () => {
8989+ test('should throw error for empty open delimiter', () => {
9090+ assert.throws(
9191+ () => new TaggedStringParser({ openDelimiter: '' }),
9292+ /Open delimiter cannot be empty/,
9393+ )
9494+ })
9595+9696+ test('should throw error for empty close delimiter', () => {
9797+ assert.throws(
9898+ () => new TaggedStringParser({ closeDelimiter: '' }),
9999+ /Close delimiter cannot be empty/,
100100+ )
101101+ })
102102+103103+ test('should throw error when delimiters are the same', () => {
104104+ assert.throws(
105105+ () =>
106106+ new TaggedStringParser({ openDelimiter: '|', closeDelimiter: '|' }),
107107+ /Open and close delimiters cannot be the same/,
108108+ )
109109+ })
110110+ })
111111+112112+ describe('malformed tag handling', () => {
113113+ test('should skip unclosed tag at end of string', () => {
114114+ const parser = new TaggedStringParser()
115115+ const result = parser.parse('[operation:OP-123] started [incomplete')
116116+117117+ assert.strictEqual(result.entities.length, 1)
118118+ assert.strictEqual(result.entities[0].type, 'operation')
119119+ assert.strictEqual(result.entities[0].value, 'OP-123')
120120+ })
121121+122122+ test('should handle tag without type separator', () => {
123123+ const parser = new TaggedStringParser()
124124+ const result = parser.parse('[justvalue]')
125125+126126+ assert.strictEqual(result.entities.length, 1)
127127+ assert.strictEqual(result.entities[0].type, '')
128128+ assert.strictEqual(result.entities[0].value, 'justvalue')
129129+ })
130130+131131+ test('should skip empty tags', () => {
132132+ const parser = new TaggedStringParser()
133133+ const result = parser.parse('[operation:OP-123] [] [stack:ST-456]')
134134+135135+ assert.strictEqual(result.entities.length, 2)
136136+ assert.strictEqual(result.entities[0].type, 'operation')
137137+ assert.strictEqual(result.entities[1].type, 'stack')
138138+ })
139139+140140+ test('should skip tags with only whitespace', () => {
141141+ const parser = new TaggedStringParser()
142142+ const result = parser.parse('[operation:OP-123] [ ] [stack:ST-456]')
143143+144144+ assert.strictEqual(result.entities.length, 2)
145145+ assert.strictEqual(result.entities[0].type, 'operation')
146146+ assert.strictEqual(result.entities[1].type, 'stack')
147147+ })
148148+ })
149149+150150+ describe('entity position tracking', () => {
151151+ test('should track position of single entity', () => {
152152+ const parser = new TaggedStringParser()
153153+ const result = parser.parse('[operation:OP-123]')
154154+155155+ assert.strictEqual(result.entities[0].position, 0)
156156+ })
157157+158158+ test('should track positions of multiple entities', () => {
159159+ const parser = new TaggedStringParser()
160160+ const result = parser.parse('[operation:OP-123] started with [changes:5]')
161161+162162+ assert.strictEqual(result.entities[0].position, 0)
163163+ assert.strictEqual(result.entities[1].position, 32)
164164+ })
165165+166166+ test('should track position with text before entity', () => {
167167+ const parser = new TaggedStringParser()
168168+ const result = parser.parse('Starting [operation:OP-123] now')
169169+170170+ assert.strictEqual(result.entities[0].position, 9)
171171+ })
172172+ })
173173+174174+ describe('schema-based type parsing', () => {
175175+ test('should parse known entity using schema type', () => {
176176+ const schema: EntitySchema = {
177177+ count: 'number',
178178+ enabled: 'boolean',
179179+ name: 'string',
180180+ }
181181+ const parser = new TaggedStringParser({ schema })
182182+ const result = parser.parse('[count:42] [enabled:true] [name:test]')
183183+184184+ assert.strictEqual(result.entities[0].parsedValue, 42)
185185+ assert.strictEqual(result.entities[0].inferredType, 'number')
186186+ assert.strictEqual(result.entities[1].parsedValue, true)
187187+ assert.strictEqual(result.entities[1].inferredType, 'boolean')
188188+ assert.strictEqual(result.entities[2].parsedValue, 'test')
189189+ assert.strictEqual(result.entities[2].inferredType, 'string')
190190+ })
191191+192192+ test('should parse number from schema even if value looks like string', () => {
193193+ const schema: EntitySchema = {
194194+ id: 'number',
195195+ }
196196+ const parser = new TaggedStringParser({ schema })
197197+ const result = parser.parse('[id:123]')
198198+199199+ assert.strictEqual(result.entities[0].parsedValue, 123)
200200+ assert.strictEqual(typeof result.entities[0].parsedValue, 'number')
201201+ })
202202+203203+ test('should parse boolean from schema case-insensitively', () => {
204204+ const schema: EntitySchema = {
205205+ flag: 'boolean',
206206+ }
207207+ const parser = new TaggedStringParser({ schema })
208208+ const result = parser.parse('[flag:TRUE] [flag:False]')
209209+210210+ assert.strictEqual(result.entities[0].parsedValue, true)
211211+ assert.strictEqual(result.entities[1].parsedValue, false)
212212+ })
213213+ })
214214+215215+ describe('automatic type inference', () => {
216216+ test('should infer number type for numeric values', () => {
217217+ const parser = new TaggedStringParser()
218218+ const result = parser.parse('[count:42] [price:19.99] [temp:-5]')
219219+220220+ assert.strictEqual(result.entities[0].parsedValue, 42)
221221+ assert.strictEqual(result.entities[0].inferredType, 'number')
222222+ assert.strictEqual(result.entities[1].parsedValue, 19.99)
223223+ assert.strictEqual(result.entities[1].inferredType, 'number')
224224+ assert.strictEqual(result.entities[2].parsedValue, -5)
225225+ assert.strictEqual(result.entities[2].inferredType, 'number')
226226+ })
227227+228228+ test('should infer boolean type for true/false values', () => {
229229+ const parser = new TaggedStringParser()
230230+ const result = parser.parse(
231231+ '[enabled:true] [disabled:false] [active:TRUE]',
232232+ )
233233+234234+ assert.strictEqual(result.entities[0].parsedValue, true)
235235+ assert.strictEqual(result.entities[0].inferredType, 'boolean')
236236+ assert.strictEqual(result.entities[1].parsedValue, false)
237237+ assert.strictEqual(result.entities[1].inferredType, 'boolean')
238238+ assert.strictEqual(result.entities[2].parsedValue, true)
239239+ assert.strictEqual(result.entities[2].inferredType, 'boolean')
240240+ })
241241+242242+ test('should infer string type for other values', () => {
243243+ const parser = new TaggedStringParser()
244244+ const result = parser.parse(
245245+ '[name:test] [id:abc123] [message:hello world]',
246246+ )
247247+248248+ assert.strictEqual(result.entities[0].parsedValue, 'test')
249249+ assert.strictEqual(result.entities[0].inferredType, 'string')
250250+ assert.strictEqual(result.entities[1].parsedValue, 'abc123')
251251+ assert.strictEqual(result.entities[1].inferredType, 'string')
252252+ assert.strictEqual(result.entities[2].parsedValue, 'hello world')
253253+ assert.strictEqual(result.entities[2].inferredType, 'string')
254254+ })
255255+ })
256256+257257+ describe('mixed known and unknown entities', () => {
258258+ test('should handle both schema-defined and inferred entities', () => {
259259+ const schema: EntitySchema = {
260260+ operation: 'string',
261261+ changes: 'number',
262262+ }
263263+ const parser = new TaggedStringParser({ schema })
264264+ const result = parser.parse(
265265+ '[operation:OP-123] with [changes:5] and [unknown:42]',
266266+ )
267267+268268+ // Schema-defined entities
269269+ assert.strictEqual(result.entities[0].type, 'operation')
270270+ assert.strictEqual(result.entities[0].parsedValue, 'OP-123')
271271+ assert.strictEqual(result.entities[0].inferredType, 'string')
272272+273273+ assert.strictEqual(result.entities[1].type, 'changes')
274274+ assert.strictEqual(result.entities[1].parsedValue, 5)
275275+ assert.strictEqual(result.entities[1].inferredType, 'number')
276276+277277+ // Unknown entity with inference
278278+ assert.strictEqual(result.entities[2].type, 'unknown')
279279+ assert.strictEqual(result.entities[2].parsedValue, 42)
280280+ assert.strictEqual(result.entities[2].inferredType, 'number')
281281+ })
282282+ })
283283+284284+ describe('formatter functions', () => {
285285+ test('should apply formatter to entity values', () => {
286286+ const schema: EntitySchema = {
287287+ operation: {
288288+ type: 'string',
289289+ format: (val) => `**${val}**`,
290290+ },
291291+ count: {
292292+ type: 'number',
293293+ format: (val) => `[${val} items]`,
294294+ },
295295+ }
296296+ const parser = new TaggedStringParser({ schema })
297297+ const result = parser.parse('[operation:OP-123] has [count:5]')
298298+299299+ assert.strictEqual(result.entities[0].formattedValue, '**OP-123**')
300300+ assert.strictEqual(result.entities[1].formattedValue, '[5 items]')
301301+ })
302302+303303+ test('should apply formatter with uppercase transformation', () => {
304304+ const schema: EntitySchema = {
305305+ name: {
306306+ type: 'string',
307307+ format: (val) => val.toUpperCase(),
308308+ },
309309+ }
310310+ const parser = new TaggedStringParser({ schema })
311311+ const result = parser.parse('[name:alice]')
312312+313313+ assert.strictEqual(result.entities[0].formattedValue, 'ALICE')
314314+ })
315315+ })
316316+317317+ describe('entities without formatters', () => {
318318+ test('should default to string conversion when no formatter provided', () => {
319319+ const schema: EntitySchema = {
320320+ count: 'number',
321321+ enabled: 'boolean',
322322+ }
323323+ const parser = new TaggedStringParser({ schema })
324324+ const result = parser.parse('[count:42] [enabled:true]')
325325+326326+ assert.strictEqual(result.entities[0].formattedValue, '42')
327327+ assert.strictEqual(result.entities[1].formattedValue, 'true')
328328+ })
329329+330330+ test('should convert unknown entities to string', () => {
331331+ const parser = new TaggedStringParser()
332332+ const result = parser.parse('[count:42] [enabled:true] [name:test]')
333333+334334+ assert.strictEqual(result.entities[0].formattedValue, '42')
335335+ assert.strictEqual(result.entities[1].formattedValue, 'true')
336336+ assert.strictEqual(result.entities[2].formattedValue, 'test')
337337+ })
338338+ })
339339+340340+ describe('shorthand vs full EntityDefinition', () => {
341341+ test('should support shorthand schema syntax', () => {
342342+ const schema: EntitySchema = {
343343+ count: 'number',
344344+ name: 'string',
345345+ }
346346+ const parser = new TaggedStringParser({ schema })
347347+ const result = parser.parse('[count:42] [name:test]')
348348+349349+ assert.strictEqual(result.entities[0].parsedValue, 42)
350350+ assert.strictEqual(result.entities[0].inferredType, 'number')
351351+ assert.strictEqual(result.entities[1].parsedValue, 'test')
352352+ assert.strictEqual(result.entities[1].inferredType, 'string')
353353+ })
354354+355355+ test('should support full EntityDefinition with formatter', () => {
356356+ const schema: EntitySchema = {
357357+ count: {
358358+ type: 'number',
359359+ format: (val) => `Count: ${val}`,
360360+ },
361361+ }
362362+ const parser = new TaggedStringParser({ schema })
363363+ const result = parser.parse('[count:42]')
364364+365365+ assert.strictEqual(result.entities[0].parsedValue, 42)
366366+ assert.strictEqual(result.entities[0].formattedValue, 'Count: 42')
367367+ })
368368+369369+ test('should mix shorthand and full definitions in same schema', () => {
370370+ const schema: EntitySchema = {
371371+ count: 'number',
372372+ name: {
373373+ type: 'string',
374374+ format: (val) => val.toUpperCase(),
375375+ },
376376+ }
377377+ const parser = new TaggedStringParser({ schema })
378378+ const result = parser.parse('[count:42] [name:alice]')
379379+380380+ assert.strictEqual(result.entities[0].parsedValue, 42)
381381+ assert.strictEqual(result.entities[0].formattedValue, '42')
382382+ assert.strictEqual(result.entities[1].parsedValue, 'alice')
383383+ assert.strictEqual(result.entities[1].formattedValue, 'ALICE')
384384+ })
385385+ })
386386+387387+ describe('endPosition calculation', () => {
388388+ test('should calculate endPosition with single-character delimiters (default)', () => {
389389+ const parser = new TaggedStringParser()
390390+ const result = parser.parse('[operation:OP-123]')
391391+392392+ assert.strictEqual(result.entities[0].position, 0)
393393+ assert.strictEqual(result.entities[0].endPosition, 18)
394394+ })
395395+396396+ test('should calculate endPosition with multi-character delimiters', () => {
397397+ const parser = new TaggedStringParser({
398398+ openDelimiter: '{{',
399399+ closeDelimiter: '}}',
400400+ })
401401+ const result = parser.parse('{{operation:OP-123}}')
402402+403403+ assert.strictEqual(result.entities[0].position, 0)
404404+ assert.strictEqual(result.entities[0].endPosition, 20)
405405+ })
406406+407407+ test('should calculate endPosition for multiple entities in one message', () => {
408408+ const parser = new TaggedStringParser()
409409+ const result = parser.parse(
410410+ '[operation:OP-123] started with [changes:5] to [stack:ST-456]',
411411+ )
412412+413413+ assert.strictEqual(result.entities.length, 3)
414414+415415+ // First entity
416416+ assert.strictEqual(result.entities[0].position, 0)
417417+ assert.strictEqual(result.entities[0].endPosition, 18)
418418+419419+ // Second entity
420420+ assert.strictEqual(result.entities[1].position, 32)
421421+ assert.strictEqual(result.entities[1].endPosition, 43)
422422+423423+ // Third entity
424424+ assert.strictEqual(result.entities[2].position, 47)
425425+ assert.strictEqual(result.entities[2].endPosition, 61)
426426+ })
427427+428428+ test('should calculate endPosition for entity at start of message', () => {
429429+ const parser = new TaggedStringParser()
430430+ const result = parser.parse('[operation:OP-123] started')
431431+432432+ assert.strictEqual(result.entities[0].position, 0)
433433+ assert.strictEqual(result.entities[0].endPosition, 18)
434434+ })
435435+436436+ test('should calculate endPosition for entity in middle of message', () => {
437437+ const parser = new TaggedStringParser()
438438+ const result = parser.parse('Starting [operation:OP-123] now')
439439+440440+ assert.strictEqual(result.entities[0].position, 9)
441441+ assert.strictEqual(result.entities[0].endPosition, 27)
442442+ })
443443+444444+ test('should calculate endPosition for entity at end of message', () => {
445445+ const parser = new TaggedStringParser()
446446+ const result = parser.parse('Completed [operation:OP-123]')
447447+448448+ assert.strictEqual(result.entities[0].position, 10)
449449+ assert.strictEqual(result.entities[0].endPosition, 28)
450450+ })
451451+452452+ test('should calculate endPosition correctly with custom single-character delimiters', () => {
453453+ const parser = new TaggedStringParser({
454454+ openDelimiter: '<',
455455+ closeDelimiter: '>',
456456+ })
457457+ const result = parser.parse('<operation:OP-123>')
458458+459459+ assert.strictEqual(result.entities[0].position, 0)
460460+ assert.strictEqual(result.entities[0].endPosition, 18)
461461+ })
462462+463463+ test('should calculate endPosition correctly with longer multi-character delimiters', () => {
464464+ const parser = new TaggedStringParser({
465465+ openDelimiter: '<<<',
466466+ closeDelimiter: '>>>',
467467+ })
468468+ const result = parser.parse('<<<operation:OP-123>>>')
469469+470470+ assert.strictEqual(result.entities[0].position, 0)
471471+ assert.strictEqual(result.entities[0].endPosition, 22)
472472+ })
473473+474474+ test('should calculate endPosition for multiple entities with custom delimiters', () => {
475475+ const parser = new TaggedStringParser({
476476+ openDelimiter: '{{',
477477+ closeDelimiter: '}}',
478478+ })
479479+ const result = parser.parse(
480480+ 'User {{user:john}} performed {{count:10}} actions',
481481+ )
482482+483483+ assert.strictEqual(result.entities.length, 2)
484484+485485+ // First entity
486486+ assert.strictEqual(result.entities[0].position, 5)
487487+ assert.strictEqual(result.entities[0].endPosition, 18)
488488+489489+ // Second entity
490490+ assert.strictEqual(result.entities[1].position, 29)
491491+ assert.strictEqual(result.entities[1].endPosition, 41)
492492+ })
493493+ })
494494+495495+ describe('real-world IaC log examples', () => {
496496+ test('should parse operation lifecycle logs', () => {
497497+ const schema: EntitySchema = {
498498+ operation: 'string',
499499+ changes: 'number',
500500+ stack: 'string',
501501+ }
502502+ const parser = new TaggedStringParser({ schema })
503503+504504+ const result1 = parser.parse(
505505+ '[operation:OP-123] started with [changes:5] to [stack:ST-456]',
506506+ )
507507+ assert.strictEqual(result1.entities.length, 3)
508508+ assert.strictEqual(result1.entities[0].parsedValue, 'OP-123')
509509+ assert.strictEqual(result1.entities[1].parsedValue, 5)
510510+ assert.strictEqual(result1.entities[2].parsedValue, 'ST-456')
511511+512512+ const result2 = parser.parse(
513513+ '[operation:OP-123] completed [changes:5] to [stack:ST-456]',
514514+ )
515515+ assert.strictEqual(result2.entities.length, 3)
516516+ })
517517+518518+ test('should parse planning logs', () => {
519519+ const schema: EntitySchema = {
520520+ blueprint: 'string',
521521+ stack: 'string',
522522+ create: 'number',
523523+ update: 'number',
524524+ destroy: 'number',
525525+ }
526526+ const parser = new TaggedStringParser({ schema })
527527+528528+ const result = parser.parse(
529529+ '[blueprint:BP-123] plan complete with [create:2] [update:3] [destroy:1] for [stack:ST-456]',
530530+ )
531531+532532+ assert.strictEqual(result.entities.length, 5)
533533+ assert.strictEqual(result.entities[0].parsedValue, 'BP-123')
534534+ assert.strictEqual(result.entities[1].parsedValue, 2)
535535+ assert.strictEqual(result.entities[2].parsedValue, 3)
536536+ assert.strictEqual(result.entities[3].parsedValue, 1)
537537+ assert.strictEqual(result.entities[4].parsedValue, 'ST-456')
538538+ })
539539+540540+ test('should parse resource command logs', () => {
541541+ const schema: EntitySchema = {
542542+ action: 'string',
543543+ resource: 'string',
544544+ resourceName: 'string',
545545+ type: 'string',
546546+ externalId: 'string',
547547+ }
548548+ const parser = new TaggedStringParser({ schema })
549549+550550+ const result1 = parser.parse(
551551+ '[action:create] executing for [resource:RS-123] [resourceName:"my-function"] [type:function]',
552552+ )
553553+ assert.strictEqual(result1.entities.length, 4)
554554+ assert.strictEqual(result1.entities[0].parsedValue, 'create')
555555+ assert.strictEqual(result1.entities[2].parsedValue, '"my-function"')
556556+557557+ const result2 = parser.parse(
558558+ '[action:create] completed for [resource:RS-123] [externalId:EXT-789]',
559559+ )
560560+ assert.strictEqual(result2.entities.length, 3)
561561+ })
562562+563563+ test('should preserve quoted values in entity values', () => {
564564+ const parser = new TaggedStringParser()
565565+ const result = parser.parse(
566566+ '[resourceName:"my-function"] [error:"Error message"]',
567567+ )
568568+569569+ assert.strictEqual(result.entities[0].value, '"my-function"')
570570+ assert.strictEqual(result.entities[1].value, '"Error message"')
571571+ })
572572+573573+ test('should parse resource type-specific logs', () => {
574574+ const schema: EntitySchema = {
575575+ resourceType: 'string',
576576+ resourceName: 'string',
577577+ }
578578+ const parser = new TaggedStringParser({ schema })
579579+580580+ const result1 = parser.parse(
581581+ '[resourceType:function] creating [resourceName:"my-function"]',
582582+ )
583583+ assert.strictEqual(result1.entities[0].parsedValue, 'function')
584584+ assert.strictEqual(result1.entities[1].parsedValue, '"my-function"')
585585+586586+ const result2 = parser.parse(
587587+ '[resourceType:database] updating [resourceName:"user-db"]',
588588+ )
589589+ assert.strictEqual(result2.entities[0].parsedValue, 'database')
590590+ assert.strictEqual(result2.entities[1].parsedValue, '"user-db"')
591591+ })
592592+ })
593593+})
+232
src/TaggedStringParser.ts
···11+import { ParseResult } from './ParseResult.ts'
22+import type {
33+ Entity,
44+ EntitySchema,
55+ ParserConfig,
66+ PrimitiveType,
77+} from './types.ts'
88+99+/**
1010+ * TaggedStringParser extracts tagged entities from strings
1111+ * Supports configurable delimiters, schema-based type parsing, and automatic type inference
1212+ */
1313+export class TaggedStringParser {
1414+ private readonly openDelimiter: string
1515+ private readonly closeDelimiter: string
1616+ private readonly typeSeparator: string
1717+ private readonly schema?: EntitySchema
1818+1919+ /**
2020+ * Create a new TaggedStringParser with optional configuration
2121+ * @param config - Parser configuration options
2222+ * @throws Error if configuration is invalid
2323+ */
2424+ constructor(config?: ParserConfig) {
2525+ // Set defaults
2626+ this.openDelimiter = config?.openDelimiter ?? '['
2727+ this.closeDelimiter = config?.closeDelimiter ?? ']'
2828+ this.typeSeparator = config?.typeSeparator ?? ':'
2929+ this.schema = config?.schema
3030+3131+ // Validate configuration
3232+ this.validateConfig()
3333+ }
3434+3535+ /**
3636+ * Validate parser configuration
3737+ * @throws Error if configuration is invalid
3838+ */
3939+ private validateConfig(): void {
4040+ if (this.openDelimiter === '') {
4141+ throw new Error('Open delimiter cannot be empty')
4242+ }
4343+ if (this.closeDelimiter === '') {
4444+ throw new Error('Close delimiter cannot be empty')
4545+ }
4646+ if (this.openDelimiter === this.closeDelimiter) {
4747+ throw new Error('Open and close delimiters cannot be the same')
4848+ }
4949+ }
5050+5151+ /**
5252+ * Parse a string and extract all tagged entities
5353+ * @param message - The string to parse
5454+ * @returns ParseResult containing original message and extracted entities
5555+ */
5656+ parse(message: string): ParseResult {
5757+ if (message === '') {
5858+ return new ParseResult(message, [])
5959+ }
6060+6161+ // Escape special regex characters in delimiters
6262+ const escapeRegex = (str: string) =>
6363+ str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
6464+ const openEscaped = escapeRegex(this.openDelimiter)
6565+ const closeEscaped = escapeRegex(this.closeDelimiter)
6666+6767+ // Build regex to match tags: openDelimiter + content + closeDelimiter
6868+ const tagRegex = new RegExp(
6969+ `${openEscaped}([^${closeEscaped}]+?)${closeEscaped}`,
7070+ 'g',
7171+ )
7272+7373+ const entities: Entity[] = []
7474+7575+ for (const match of message.matchAll(tagRegex)) {
7676+ const tagContent = match[1].trim()
7777+7878+ if (tagContent !== '' && match.index !== undefined) {
7979+ const entity = this.processTag(
8080+ tagContent,
8181+ match.index,
8282+ match.index + match[0].length,
8383+ )
8484+ if (entity) {
8585+ entities.push(entity)
8686+ }
8787+ }
8888+ }
8989+9090+ return new ParseResult(message, entities, this.closeDelimiter)
9191+ }
9292+9393+ /**
9494+ * Process a tag's content and create an Entity
9595+ * @param tagContent - The content between delimiters
9696+ * @param position - The position of the tag in the original message
9797+ * @param endPosition - The position after the closing delimiter
9898+ * @returns Entity or null if tag is malformed
9999+ */
100100+ private processTag(
101101+ tagContent: string,
102102+ position: number,
103103+ endPosition: number,
104104+ ): Entity | null {
105105+ // Find the type separator
106106+ const separatorIndex = tagContent.indexOf(this.typeSeparator)
107107+108108+ let type: string
109109+ let value: string
110110+111111+ if (separatorIndex === -1) {
112112+ // No separator - treat entire content as value with empty type
113113+ type = ''
114114+ value = tagContent
115115+ } else {
116116+ // Split by separator
117117+ type = tagContent.substring(0, separatorIndex)
118118+ value = tagContent.substring(separatorIndex + 1)
119119+ }
120120+121121+ // Parse the value and get typed result
122122+ const { parsedValue, inferredType } = this.parseValue(type, value)
123123+124124+ // Apply formatter to get formatted value
125125+ const formattedValue = this.applyFormatter(type, parsedValue)
126126+127127+ return {
128128+ type,
129129+ value,
130130+ parsedValue,
131131+ formattedValue,
132132+ inferredType,
133133+ position,
134134+ endPosition,
135135+ }
136136+ }
137137+138138+ /**
139139+ * Infer the primitive type from a raw string value
140140+ * @param value - The raw string value
141141+ * @returns The inferred primitive type
142142+ */
143143+ private inferType(value: string): PrimitiveType {
144144+ // Check for number (including decimals and negatives)
145145+ if (/^-?\d+(\.\d+)?$/.test(value)) {
146146+ return 'number'
147147+ }
148148+149149+ // Check for boolean (case-insensitive)
150150+ const lowerValue = value.toLowerCase()
151151+ if (lowerValue === 'true' || lowerValue === 'false') {
152152+ return 'boolean'
153153+ }
154154+155155+ // Default to string
156156+ return 'string'
157157+ }
158158+159159+ /**
160160+ * Parse a value using schema (if available) or type inference
161161+ * @param type - The entity type
162162+ * @param rawValue - The raw string value
163163+ * @returns Object with parsedValue and inferredType
164164+ */
165165+ private parseValue(
166166+ type: string,
167167+ rawValue: string,
168168+ ): {
169169+ parsedValue: string | number | boolean
170170+ inferredType: PrimitiveType
171171+ } {
172172+ let targetType: PrimitiveType
173173+174174+ // Check if type is in schema
175175+ if (this.schema && type in this.schema) {
176176+ const schemaEntry = this.schema[type]
177177+ // Handle both shorthand (just type) and full definition
178178+ targetType =
179179+ typeof schemaEntry === 'string' ? schemaEntry : schemaEntry.type
180180+ } else {
181181+ // Use inference for unknown types
182182+ targetType = this.inferType(rawValue)
183183+ }
184184+185185+ // Parse based on target type
186186+ let parsedValue: string | number | boolean
187187+188188+ switch (targetType) {
189189+ case 'number':
190190+ parsedValue = parseFloat(rawValue)
191191+ break
192192+ case 'boolean':
193193+ parsedValue = rawValue.toLowerCase() === 'true'
194194+ break
195195+ case 'string':
196196+ parsedValue = rawValue
197197+ break
198198+ default:
199199+ parsedValue = rawValue
200200+ break
201201+ }
202202+203203+ return {
204204+ parsedValue,
205205+ inferredType: targetType,
206206+ }
207207+ }
208208+209209+ /**
210210+ * Apply formatter function to a parsed value
211211+ * @param type - The entity type
212212+ * @param parsedValue - The parsed value
213213+ * @returns Formatted string
214214+ */
215215+ private applyFormatter(
216216+ type: string,
217217+ parsedValue: string | number | boolean,
218218+ ): string {
219219+ // Check if schema has a formatter for this type
220220+ if (this.schema && type in this.schema) {
221221+ const schemaEntry = this.schema[type]
222222+223223+ // Only full EntityDefinition can have a formatter
224224+ if (typeof schemaEntry !== 'string' && schemaEntry.format) {
225225+ return schemaEntry.format(parsedValue)
226226+ }
227227+ }
228228+229229+ // No formatter - convert to string
230230+ return String(parsedValue)
231231+ }
232232+}
···11+/**
22+ * Primitive types supported by the parser
33+ */
44+export type PrimitiveType = 'string' | 'number' | 'boolean'
55+66+/**
77+ * Entity definition with optional formatter function
88+ */
99+export interface EntityDefinition {
1010+ type: PrimitiveType
1111+ format?: (value: unknown) => string
1212+}
1313+1414+/**
1515+ * Schema mapping entity type names to their definitions
1616+ * Can use shorthand (just the type) or full definition with formatter
1717+ */
1818+export type EntitySchema = Record<string, PrimitiveType | EntityDefinition>
1919+2020+/**
2121+ * Parsed entity extracted from a string
2222+ */
2323+export interface Entity {
2424+ type: string
2525+ value: string
2626+ parsedValue: string | number | boolean
2727+ formattedValue: string
2828+ inferredType: PrimitiveType
2929+ /** The starting position of the tag in the original message */
3030+ position: number
3131+ /** The ending position of the tag in the original message (after the closing delimiter) */
3232+ endPosition: number
3333+}
3434+3535+/**
3636+ * Configuration options for the parser
3737+ */
3838+export interface ParserConfig {
3939+ openDelimiter?: string
4040+ closeDelimiter?: string
4141+ typeSeparator?: string
4242+ schema?: EntitySchema
4343+}
4444+4545+/**
4646+ * Result of parsing a string
4747+ */
4848+export interface ParseResult {
4949+ originalMessage: string
5050+ entities: Entity[]
5151+5252+ /**
5353+ * Get all entities of a specific type
5454+ */
5555+ getEntitiesByType(type: string): Entity[]
5656+5757+ /**
5858+ * Get all unique entity types found in the message
5959+ */
6060+ getAllTypes(): string[]
6161+6262+ /**
6363+ * Reconstruct the message with formatted entity values
6464+ */
6565+ format(): string
6666+}