//! python code generation from lexicon documents use std::collections::{HashMap, HashSet}; use std::fs; use std::io; use std::path::Path; use atrium_lex::lexicon::{LexObject, LexRecord, LexUserType}; use atrium_lex::LexiconDoc; use heck::ToSnakeCase; use crate::builtin::builtin_lexicons; use crate::types::{collect_external_refs, property_to_python, to_class_name, RefContext}; const HEADER: &str = r#"# auto-generated by pmgfal - do not edit from __future__ import annotations from typing import Any from pydantic import BaseModel, Field "#; /// python keywords that need escaping as field names const PYTHON_KEYWORDS: &[&str] = &[ "type", "class", "import", "from", "global", "lambda", "def", "return", "yield", "raise", "try", "except", "finally", "with", "as", "if", "elif", "else", "for", "while", "break", "continue", "pass", "and", "or", "not", "in", "is", "None", "True", "False", "async", "await", ]; /// generate pydantic models for all documents pub fn generate_models( docs: &[LexiconDoc], output_dir: &Path, namespace_prefix: Option<&str>, ) -> Result, io::Error> { let filtered: Vec<_> = docs .iter() .filter(|doc| { namespace_prefix .map(|p| doc.id.starts_with(p)) .unwrap_or(true) }) .collect(); if filtered.is_empty() { return Ok(vec![]); } // build lookup of all available lexicons (user + builtin) let mut all_docs: HashMap<&str, &LexiconDoc> = HashMap::new(); for doc in docs { all_docs.insert(&doc.id, doc); } for doc in builtin_lexicons() { all_docs.entry(&doc.id).or_insert(doc); } // collect external refs from user documents let mut external_refs: HashSet = HashSet::new(); for doc in &filtered { external_refs.extend(collect_external_refs(doc)); } // find which external refs we can resolve from builtins let mut resolved_externals: Vec<&LexiconDoc> = Vec::new(); for ref_nsid in &external_refs { if let Some(doc) = all_docs.get(ref_nsid.as_str()) { // only include if not already in user docs if !filtered.iter().any(|d| d.id == *ref_nsid) { resolved_externals.push(doc); } } } resolved_externals.sort_by(|a, b| a.id.cmp(&b.id)); fs::create_dir_all(output_dir)?; let mut output = String::from(HEADER); output.push('\n'); // generate external deps first (so they're defined before use) for doc in &resolved_externals { output.push_str(&format!("\n# {} (builtin)\n", doc.id)); output.push_str(&generate_document(doc)); } // generate user documents for doc in &filtered { output.push_str(&format!("\n# {}\n", doc.id)); output.push_str(&generate_document(doc)); } let output_file = match namespace_prefix { Some(prefix) => output_dir.join(format!("{}.py", prefix.replace('.', "_"))), None => output_dir.join("models.py"), }; fs::write(&output_file, &output)?; Ok(vec![output_file.to_string_lossy().to_string()]) } /// generate python code for a single lexicon document fn generate_document(doc: &LexiconDoc) -> String { let ctx = RefContext::new(&doc.id); let mut output = String::new(); for (def_name, def) in &doc.defs { let class_name = to_class_name(&doc.id, def_name); match def { LexUserType::Record(LexRecord { record, description, .. }) => { let atrium_lex::lexicon::LexRecordRecord::Object(obj) = record; let desc = description.as_deref().unwrap_or(&doc.id); output.push_str(&generate_class(&class_name, obj, Some(desc), &ctx)); output.push_str("\n\n"); } LexUserType::Object(obj) => { output.push_str(&generate_class( &class_name, obj, obj.description.as_deref(), &ctx, )); output.push_str("\n\n"); } LexUserType::Token(_) => { output.push_str(&format!( "# token: {}\n{} = \"{}#{}\"\n\n", class_name, class_name.to_uppercase(), doc.id, def_name )); } _ => {} } } output } /// generate a pydantic model class fn generate_class( class_name: &str, obj: &LexObject, description: Option<&str>, ctx: &RefContext, ) -> String { let mut lines = vec![format!("class {class_name}(BaseModel):")]; if let Some(desc) = description { lines.push(format!(" \"\"\"{desc}\"\"\"")); } if obj.properties.is_empty() { lines.push(" pass".into()); return lines.join("\n"); } let required: HashSet<_> = obj .required .as_ref() .map(|r| r.iter().map(String::as_str).collect()) .unwrap_or_default(); // generate required fields first, then optional let mut fields: Vec<_> = obj.properties.iter().collect(); fields.sort_by_key(|(name, _)| !required.contains(name.as_str())); for (name, prop) in fields { let field_name = to_field_name(name); let is_required = required.contains(name.as_str()); let mut py_type = property_to_python(prop, ctx); if !is_required { py_type = format!("{py_type} | None"); } let needs_alias = field_name != *name; let needs_default = !is_required; let field_def = match (needs_alias, needs_default) { (false, false) => format!(" {field_name}: {py_type}"), (true, false) => format!(" {field_name}: {py_type} = Field(alias=\"{name}\")"), (false, true) => format!(" {field_name}: {py_type} = Field(default=None)"), (true, true) => { format!(" {field_name}: {py_type} = Field(default=None, alias=\"{name}\")") } }; lines.push(field_def); } lines.join("\n") } /// convert property name to valid python field name fn to_field_name(name: &str) -> String { let snake = name.to_snake_case(); if PYTHON_KEYWORDS.contains(&snake.as_str()) { format!("{snake}_") } else { snake } }