···10101111[dependencies]
1212oxyroot = "0.1.25"
1313-pyo3 = { version = "0.26.0", features = ["abi3-py38"] }
1313+pyo3 = { version = "0.25.0", features = ["abi3-py38"] }
1414parking_lot = "0.12.3"
1515-numpy = "0.26.0"
1515+numpy = "0.25.0"
1616parquet = { version = "53.0.0", features = ["arrow"] }
1717arrow = "53.0.0"
1818+polars = "0.50.0"
1919+pyo3-polars = "0.23.1"
+8-6
README.md
···8899A fast, Rust-powered Python reader for CERN ROOT files.
10101111-This package provides a simple and Pythonic interface bindings to `oxyroot`, a rust package, to read data from `.root` files, inspired by libraries like `uproot`. It leverages the speed of Rust for high-performance data extraction and integrates with the scientific Python ecosystem by providing data as NumPy arrays.
1111+This python package provides simple bindings to [`oxyroot`, a rust package](https://github.com/m-dupont/oxyroot), to read data from `.root` files, inspired by libraries like `uproot`. It leverages the speed of Rust and integrates with the scientific Python ecosystem by providing data as NumPy arrays or polars dataframes.
12121313## Features
14141515-- **High-Performance**: Core logic is written in Rust for maximum speed.
1616-- **Parquet Conversion**: Convert TTrees directly to Apache Parquet files with a single command.
1717-- **NumPy Integration**: Get branch data directly as NumPy arrays.
1818-- **Simple, Pythonic API**: Easy to learn and use, and similar to `uproot`
1515+- Simple API similar to `uproot`
1616+- Core logic is written in Rust.
1717+- Get branch data directly as NumPy arrays or Polars dataframe.
1818+- Parquet Conversion: Convert TTrees directly to Apache Parquet files with a single command.
19192020## Quick Start
2121···60606161## Performance
62626363-`oxyroot` is designed to be fast. Here is a simple benchmark comparing the time taken to read all branches of a TTree with `uproot` and `oxyroot`.
6363+`oxyroot` is intended to be fast. Here is a simple benchmark comparing the time taken to read all branches of a TTree with `uproot` and `oxyroot`.
64646565```python
6666import oxyroot
···8787end_time = time.time()
8888print(f"Oxyroot took: {end_time - start_time:.3f}s")
8989```
9090+9191+On a small file (~20 MB) with multiple data formats, oxyroot took half the time of uproot, and also read in the branch with strings!
90929193## License
9294
+69
src/lib.rs
···1414use parquet::arrow::ArrowWriter;
1515use parquet::basic::{BrotliLevel, Compression, GzipLevel, ZstdLevel};
1616use parquet::file::properties::WriterProperties;
1717+use polars::prelude::*;
1818+use pyo3_polars::PyDataFrame;
17191820#[pyclass(name = "RootFile")]
1921struct PyRootFile {
···9294 branches: branches.into_iter(),
9395 },
9496 )
9797+ }
9898+9999+ #[pyo3(signature = (columns = None))]
100100+ fn arrays(&self, columns: Option<Vec<String>>) -> PyResult<PyDataFrame> {
101101+ let mut file =
102102+ RootFile::open(&self.path).map_err(|e| PyValueError::new_err(e.to_string()))?;
103103+ let tree = file
104104+ .get_tree(&self.name)
105105+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
106106+107107+ let branches_to_save = if let Some(columns) = columns {
108108+ columns
109109+ } else {
110110+ tree.branches().map(|b| b.name().to_string()).collect()
111111+ };
112112+113113+ let mut series_vec = Vec::new();
114114+115115+ for branch_name in branches_to_save {
116116+ let branch = match tree.branch(&branch_name) {
117117+ Some(branch) => branch,
118118+ None => {
119119+ println!("Branch '{}' not found, skipping", branch_name);
120120+ continue;
121121+ }
122122+ };
123123+124124+ let series = match branch.item_type_name().as_str() {
125125+ "float" => {
126126+ let data = branch.as_iter::<f32>().unwrap().collect::<Vec<_>>();
127127+ Series::new((&branch_name).into(), data)
128128+ }
129129+ "double" => {
130130+ let data = branch.as_iter::<f64>().unwrap().collect::<Vec<_>>();
131131+ Series::new((&branch_name).into(), data)
132132+ }
133133+ "int32_t" => {
134134+ let data = branch.as_iter::<i32>().unwrap().collect::<Vec<_>>();
135135+ Series::new((&branch_name).into(), data)
136136+ }
137137+ "int64_t" => {
138138+ let data = branch.as_iter::<i64>().unwrap().collect::<Vec<_>>();
139139+ Series::new((&branch_name).into(), data)
140140+ }
141141+ "uint32_t" => {
142142+ let data = branch.as_iter::<u32>().unwrap().collect::<Vec<_>>();
143143+ Series::new((&branch_name).into(), data)
144144+ }
145145+ "uint64_t" => {
146146+ let data = branch.as_iter::<u64>().unwrap().collect::<Vec<_>>();
147147+ Series::new((&branch_name).into(), data)
148148+ }
149149+ "string" => {
150150+ let data = branch.as_iter::<String>().unwrap().collect::<Vec<_>>();
151151+ Series::new((&branch_name).into(), data)
152152+ }
153153+ other => {
154154+ println!("Unsupported branch type: {}, skipping", other);
155155+ continue;
156156+ }
157157+ };
158158+ series_vec.push(series);
159159+ }
160160+161161+ let df = DataFrame::new(series_vec.into_iter().map(|s| s.into()).collect())
162162+ .map_err(|e| PyValueError::new_err(e.to_string()))?;
163163+ Ok(PyDataFrame(df))
95164 }
9616597166 #[pyo3(signature = (output_file, overwrite = false, compression = "snappy", columns = None))]