Python bindings to oxyroot. Makes reading .root files blazing fast ๐Ÿš€

Keep the api similar to uproot

+211 -101
+2 -1
Cargo.toml
··· 9 9 crate-type = ["cdylib"] 10 10 11 11 [dependencies] 12 - numpy = "0.26.0" 13 12 oxyroot = "0.1.25" 14 13 pyo3 = { version = "0.26.0", features = ["abi3-py38"] } 14 + parking_lot = "0.12.3" 15 + numpy = "0.26.0"
+12 -11
python/tests/read_ntuples.py
··· 10 10 11 11 up_start_time = time.time() 12 12 up_tree = uproot.open(file_name)[tree_name] 13 - for branch in up_tree.keys(): 14 - # print(branch, up_tree[branch].typename) 15 - if up_tree[branch].typename != "std::string": 16 - up_values = up_tree[branch].array(library="np") 17 - print(f"Uproot read {branch} into a {type(up_values)} and it has a mean of {np.nanmean(up_values):.2f}") 13 + for branch in up_tree: 14 + # print(branch, branch.typename) 15 + if branch.typename != "std::string": # Uproot cannot read strings? 16 + up_values = branch.array(library="np") 17 + print(f"Uproot read {branch.name} into a {type(up_values)} and it has a mean of {np.nanmean(up_values):.2f}") 18 18 up_end_time = time.time() 19 19 20 20 print("\n") 21 21 22 22 oxy_start_time = time.time() 23 - oxy_branches = oxyroot.read_root(file_name, tree_name=tree_name) 24 - for branch in oxy_branches: 25 - oxyroot.read_root(file_name, tree_name=tree_name, branch=branch) 26 - oxy_values = oxyroot.read_root(file_name, tree_name=tree_name, branch=branch) 23 + oxy_tree = oxyroot.open(file_name)[tree_name] 24 + for branch in oxy_tree: 25 + # print(branch, branch.typename) 26 + # if branch.typename != "string": 27 + oxy_values = branch.array() 27 28 if type(oxy_values) is np.ndarray: 28 - print(f"Oxyroot read {branch} into a {type(oxy_values)} and it has a mean of {np.nanmean(oxy_values):.2f}") 29 + print(f"Oxyroot read {branch.name} into a {type(oxy_values)} and it has a mean of {np.nanmean(oxy_values):.2f}") 29 30 else: 30 - print(f"Oxyroot read {branch} into a {type(oxy_values)} and it has a length of {len(oxy_values)}") 31 + print(f"Oxyroot read {branch.name} into a {type(oxy_values)} and it has a length of {len(oxy_values)}") 31 32 oxy_end_time = time.time() 32 33 33 34 print("\n Total time")
+3 -2
python/tests/test_read_from_uproot.py
··· 1 - import pytest 2 1 import oxyroot 3 2 import uproot 4 3 import numpy as np 5 4 import os 5 + 6 + print(oxyroot.__version__) 6 7 7 8 def test_read_from_uproot(): 8 9 # Create a dummy ROOT file for testing ··· 15 16 f["tree1"].extend({"branch1": input}) 16 17 17 18 18 - output = oxyroot.read_root(file_name, tree_name="tree1", branch="branch1") 19 + output = oxyroot.open(file_name)["tree1"]["branch1"].array() 19 20 assert(type(output) is np.ndarray) 20 21 assert(np.array_equal(input, output)) 21 22
+194 -87
src/lib.rs
··· 1 1 use ::oxyroot::{Named, RootFile}; 2 - use numpy::ToPyArray; 3 - use pyo3::{exceptions::PyValueError, prelude::*, types::PyModule, IntoPyObjectExt}; 2 + use numpy::IntoPyArray; 3 + use pyo3::{exceptions::PyValueError, prelude::*, IntoPyObjectExt}; 4 + 5 + #[pyclass(name = "RootFile")] 6 + struct PyRootFile { 7 + #[pyo3(get)] 8 + path: String, 9 + } 10 + 11 + #[pyclass(name = "Tree")] 12 + struct PyTree { 13 + #[pyo3(get)] 14 + path: String, 15 + #[pyo3(get)] 16 + name: String, 17 + } 18 + 19 + #[pyclass(name = "Branch")] 20 + struct PyBranch { 21 + #[pyo3(get)] 22 + path: String, 23 + #[pyo3(get)] 24 + tree_name: String, 25 + #[pyo3(get)] 26 + name: String, 27 + } 28 + 29 + #[pymethods] 30 + impl PyRootFile { 31 + #[new] 32 + fn new(path: String) -> Self { 33 + PyRootFile { path } 34 + } 35 + 36 + fn keys(&self) -> PyResult<Vec<String>> { 37 + let file = RootFile::open(&self.path).map_err(|e| PyValueError::new_err(e.to_string()))?; 38 + Ok(file 39 + .keys() 40 + .into_iter() 41 + .map(|k| k.name().to_string()) 42 + .collect()) 43 + } 44 + 45 + fn __getitem__(&self, name: &str) -> PyResult<PyTree> { 46 + Ok(PyTree { 47 + path: self.path.clone(), 48 + name: name.to_string(), 49 + }) 50 + } 51 + } 52 + 53 + #[pymethods] 54 + impl PyTree { 55 + fn branches(&self) -> PyResult<Vec<String>> { 56 + let mut file = 57 + RootFile::open(&self.path).map_err(|e| PyValueError::new_err(e.to_string()))?; 58 + let tree = file 59 + .get_tree(&self.name) 60 + .map_err(|e| PyValueError::new_err(e.to_string()))?; 61 + Ok(tree.branches().map(|b| b.name().to_string()).collect()) 62 + } 63 + 64 + fn __getitem__(&self, name: &str) -> PyResult<PyBranch> { 65 + Ok(PyBranch { 66 + path: self.path.clone(), 67 + tree_name: self.name.clone(), 68 + name: name.to_string(), 69 + }) 70 + } 4 71 5 - #[pyfunction] 6 - fn version() -> PyResult<String> { 7 - Ok(env!("CARGO_PKG_VERSION").to_string()) 72 + fn __iter__(slf: PyRef<Self>) -> PyResult<Py<PyBranchIterator>> { 73 + let branches = slf.branches()?; 74 + Py::new( 75 + slf.py(), 76 + PyBranchIterator { 77 + path: slf.path.clone(), 78 + tree_name: slf.name.clone(), 79 + branches: branches.into_iter(), 80 + }, 81 + ) 82 + } 8 83 } 9 84 10 - /// Read a ROOT file and return the list of trees, the branches of a tree, or the values of a branch. 11 - #[pyfunction] 12 - #[pyo3(signature = (path, tree_name = None, branch = None))] 13 - fn read_root( 85 + #[pyclass] 86 + struct PyBranchIterator { 14 87 path: String, 15 - tree_name: Option<String>, 16 - branch: Option<String>, 17 - ) -> PyResult<Py<PyAny>> { 18 - let mut file = RootFile::open(&path).unwrap(); 19 - let keys: Vec<String> = file 20 - .keys() 21 - .into_iter() 22 - .map(|k| k.name().to_string()) 23 - .collect(); 88 + tree_name: String, 89 + branches: std::vec::IntoIter<String>, 90 + } 24 91 25 - Python::attach(|py| -> PyResult<Py<PyAny>> { 26 - match tree_name { 27 - Some(name) => { 28 - if let Ok(tree) = file.get_tree(&name) { 29 - let branches_available: Vec<String> = 30 - tree.branches().map(|b| b.name().to_string()).collect(); 92 + #[pymethods] 93 + impl PyBranchIterator { 94 + fn __iter__(slf: PyRef<Self>) -> PyRef<Self> { 95 + slf 96 + } 97 + 98 + fn __next__(&mut self) -> Option<PyBranch> { 99 + self.branches.next().map(|name| PyBranch { 100 + path: self.path.clone(), 101 + tree_name: self.tree_name.clone(), 102 + name, 103 + }) 104 + } 105 + } 106 + 107 + #[pymethods] 108 + impl PyBranch { 109 + fn array(&self, py: Python) -> PyResult<Py<PyAny>> { 110 + let mut file = 111 + RootFile::open(&self.path).map_err(|e| PyValueError::new_err(e.to_string()))?; 112 + let tree = file 113 + .get_tree(&self.tree_name) 114 + .map_err(|e| PyValueError::new_err(e.to_string()))?; 115 + let branch = tree 116 + .branch(&self.name) 117 + .ok_or_else(|| PyValueError::new_err("Branch not found"))?; 31 118 32 - match branch { 33 - Some(bs) => { 34 - if let Some(branch) = tree.branch(&bs) { 35 - match branch.item_type_name().as_str() { 36 - "f32" => { 37 - let data = 38 - branch.as_iter::<f32>().unwrap().collect::<Vec<_>>(); 39 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 40 - } 41 - "double" => { 42 - let data = 43 - branch.as_iter::<f64>().unwrap().collect::<Vec<_>>(); 44 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 45 - } 46 - "int32_t" => { 47 - let data = 48 - branch.as_iter::<i32>().unwrap().collect::<Vec<_>>(); 49 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 50 - } 51 - "int64_t" => { 52 - let data = 53 - branch.as_iter::<i64>().unwrap().collect::<Vec<_>>(); 54 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 55 - } 56 - "uint32_t" => { 57 - let data = 58 - branch.as_iter::<u32>().unwrap().collect::<Vec<_>>(); 59 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 60 - } 61 - "uint64_t" => { 62 - let data = 63 - branch.as_iter::<u64>().unwrap().collect::<Vec<_>>(); 64 - Ok(data.to_pyarray(py).into_py_any(py).unwrap()) 65 - } 66 - "string" => { 67 - let data = 68 - branch.as_iter::<String>().unwrap().collect::<Vec<_>>(); 69 - Ok(data.into_py_any(py).unwrap()) 70 - } 71 - other => Err(PyValueError::new_err(format!( 72 - "Unsupported branch type: {}", 73 - other 74 - ))), 75 - } 76 - } else { 77 - Err(PyValueError::new_err(format!( 78 - "Branch '{}' not found. Available branches are: {:?}", 79 - bs, branches_available 80 - ))) 81 - } 82 - } 83 - None => Ok(branches_available.into_py_any(py).unwrap()), 84 - } 85 - } else { 86 - Err(PyValueError::new_err(format!( 87 - "Tree '{}' not found. Available trees are: {:?}", 88 - name, keys 89 - ))) 90 - } 119 + match branch.item_type_name().as_str() { 120 + "float" => { 121 + let data = branch 122 + .as_iter::<f32>() 123 + .map_err(|e| PyValueError::new_err(e.to_string()))? 124 + .collect::<Vec<_>>(); 125 + Ok(data.into_pyarray(py).into()) 126 + } 127 + "double" => { 128 + let data = branch 129 + .as_iter::<f64>() 130 + .map_err(|e| PyValueError::new_err(e.to_string()))? 131 + .collect::<Vec<_>>(); 132 + Ok(data.into_pyarray(py).into()) 91 133 } 92 - None => Ok(keys.into_py_any(py).unwrap()), 134 + "int32_t" => { 135 + let data = branch 136 + .as_iter::<i32>() 137 + .map_err(|e| PyValueError::new_err(e.to_string()))? 138 + .collect::<Vec<_>>(); 139 + Ok(data.into_pyarray(py).into()) 140 + } 141 + "int64_t" => { 142 + let data = branch 143 + .as_iter::<i64>() 144 + .map_err(|e| PyValueError::new_err(e.to_string()))? 145 + .collect::<Vec<_>>(); 146 + Ok(data.into_pyarray(py).into()) 147 + } 148 + "uint32_t" => { 149 + let data = branch 150 + .as_iter::<u32>() 151 + .map_err(|e| PyValueError::new_err(e.to_string()))? 152 + .collect::<Vec<_>>(); 153 + Ok(data.into_pyarray(py).into()) 154 + } 155 + "uint64_t" => { 156 + let data = branch 157 + .as_iter::<u64>() 158 + .map_err(|e| PyValueError::new_err(e.to_string()))? 159 + .collect::<Vec<_>>(); 160 + Ok(data.into_pyarray(py).into()) 161 + } 162 + "string" => { 163 + let data = branch 164 + .as_iter::<String>() 165 + .map_err(|e| PyValueError::new_err(e.to_string()))? 166 + .collect::<Vec<_>>(); 167 + Ok(data.into_py_any(py).unwrap()) 168 + } 169 + other => Err(PyValueError::new_err(format!( 170 + "Unsupported branch type: {}", 171 + other 172 + ))), 93 173 } 94 - }) 174 + } 175 + 176 + #[getter] 177 + fn typename(&self) -> PyResult<String> { 178 + let mut file = 179 + RootFile::open(&self.path).map_err(|e| PyValueError::new_err(e.to_string()))?; 180 + let tree = file 181 + .get_tree(&self.tree_name) 182 + .map_err(|e| PyValueError::new_err(e.to_string()))?; 183 + let branch = tree 184 + .branch(&self.name) 185 + .ok_or_else(|| PyValueError::new_err("Branch not found"))?; 186 + Ok(branch.item_type_name()) 187 + } 188 + } 189 + 190 + #[pyfunction] 191 + fn open(path: String) -> PyResult<PyRootFile> { 192 + Ok(PyRootFile::new(path)) 193 + } 194 + 195 + #[pyfunction] 196 + fn version() -> PyResult<String> { 197 + Ok(env!("CARGO_PKG_VERSION").to_string()) 95 198 } 96 199 97 - /// A Python module to read root files implemented in Rust. 200 + /// A Python module to read root files, implemented in Rust. 98 201 #[pymodule] 99 202 fn oxyroot(m: &Bound<'_, PyModule>) -> PyResult<()> { 100 203 m.add_function(wrap_pyfunction!(version, m)?)?; 101 - m.add_function(wrap_pyfunction!(read_root, m)?)?; 204 + m.add_function(wrap_pyfunction!(open, m)?)?; 205 + m.add_class::<PyRootFile>()?; 206 + m.add_class::<PyTree>()?; 207 + m.add_class::<PyBranch>()?; 208 + m.add_class::<PyBranchIterator>()?; 102 209 Ok(()) 103 210 }