pasturepy is a Python tool for generating JSON feed definitions for use with Graze. Use it to programmatically create and customize feeds for Graze.

feat: regex node from line-delimited files

+64
+64
pasturepy/nodes/text.py
··· 1 + from pathlib import Path 2 + 1 3 from pasturepy.constants.fields import OPTION_FIELDS, TEXT_FIELDS 2 4 from pasturepy.constants.graze_json import REGEX_METHODS, WORD_METHODS 3 5 ··· 41 43 TextNode._validate_field(field) 42 44 43 45 return filter_group.add_filter({method: [field, term, ignore_case]}) 46 + 47 + @staticmethod 48 + def regex_from_files( 49 + filter_group, 50 + method: str, 51 + field: str, 52 + file_paths: str | list[str], 53 + optimize: bool = False, 54 + ignore_case: bool = True, 55 + ): 56 + if method not in REGEX_METHODS: 57 + raise ValueError( 58 + f"Invalid method '{method}'. Must be one of {REGEX_METHODS}" 59 + ) 60 + TextNode._validate_field(field) 61 + 62 + paths = [file_paths] if isinstance(file_paths, str) else file_paths 63 + 64 + all_terms = [] 65 + for file in paths: 66 + terms = _read_term_file(file) 67 + all_terms.extend(terms) 68 + 69 + if not all_terms: 70 + raise ValueError("No patterns found in term files") 71 + 72 + if optimize: 73 + final_regex = _optimal_join(all_terms) 74 + else: 75 + final_regex = _simple_join(all_terms) 76 + 77 + return filter_group.add_filter({method: [field, final_regex, ignore_case]}) 78 + 79 + 80 + def _read_term_file(file_path: str) -> list[str]: 81 + path = Path(file_path) 82 + if not path.exists(): 83 + raise FileNotFoundError(f"{file_path} not found") 84 + 85 + terms = [] 86 + with path.open("r", encoding="utf-8") as f: 87 + for line in f: 88 + line = line.strip() 89 + if not line: 90 + continue 91 + 92 + terms.append(line) 93 + 94 + return terms 95 + 96 + 97 + def _simple_join(terms: list[str]) -> str: 98 + if len(terms) == 1: 99 + return terms[0] 100 + return f"(?:{'|'.join(terms)})" 101 + 102 + 103 + def _optimal_join(terms: list[str]) -> str: 104 + # TODO: actually do optimization lol 105 + if len(terms) == 1: 106 + return terms[0] 107 + return f"(?:{'|'.join(terms)})"