tangled
alpha
login
or
join now
byarielm.fyi
/
pasturepy
1
fork
atom
pasturepy
is a Python tool for generating JSON feed definitions for use with
Graze
. Use it to programmatically create and customize feeds for Graze.
1
fork
atom
overview
issues
pulls
pipelines
feat: regex node from line-delimited files
byarielm.fyi
1 month ago
d7c77551
72c5ae58
+64
1 changed file
expand all
collapse all
unified
split
pasturepy
nodes
text.py
+64
pasturepy/nodes/text.py
···
0
0
1
from pasturepy.constants.fields import OPTION_FIELDS, TEXT_FIELDS
2
from pasturepy.constants.graze_json import REGEX_METHODS, WORD_METHODS
3
···
41
TextNode._validate_field(field)
42
43
return filter_group.add_filter({method: [field, term, ignore_case]})
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
···
1
+
from pathlib import Path
2
+
3
from pasturepy.constants.fields import OPTION_FIELDS, TEXT_FIELDS
4
from pasturepy.constants.graze_json import REGEX_METHODS, WORD_METHODS
5
···
43
TextNode._validate_field(field)
44
45
return filter_group.add_filter({method: [field, term, ignore_case]})
46
+
47
+
@staticmethod
48
+
def regex_from_files(
49
+
filter_group,
50
+
method: str,
51
+
field: str,
52
+
file_paths: str | list[str],
53
+
optimize: bool = False,
54
+
ignore_case: bool = True,
55
+
):
56
+
if method not in REGEX_METHODS:
57
+
raise ValueError(
58
+
f"Invalid method '{method}'. Must be one of {REGEX_METHODS}"
59
+
)
60
+
TextNode._validate_field(field)
61
+
62
+
paths = [file_paths] if isinstance(file_paths, str) else file_paths
63
+
64
+
all_terms = []
65
+
for file in paths:
66
+
terms = _read_term_file(file)
67
+
all_terms.extend(terms)
68
+
69
+
if not all_terms:
70
+
raise ValueError("No patterns found in term files")
71
+
72
+
if optimize:
73
+
final_regex = _optimal_join(all_terms)
74
+
else:
75
+
final_regex = _simple_join(all_terms)
76
+
77
+
return filter_group.add_filter({method: [field, final_regex, ignore_case]})
78
+
79
+
80
+
def _read_term_file(file_path: str) -> list[str]:
81
+
path = Path(file_path)
82
+
if not path.exists():
83
+
raise FileNotFoundError(f"{file_path} not found")
84
+
85
+
terms = []
86
+
with path.open("r", encoding="utf-8") as f:
87
+
for line in f:
88
+
line = line.strip()
89
+
if not line:
90
+
continue
91
+
92
+
terms.append(line)
93
+
94
+
return terms
95
+
96
+
97
+
def _simple_join(terms: list[str]) -> str:
98
+
if len(terms) == 1:
99
+
return terms[0]
100
+
return f"(?:{'|'.join(terms)})"
101
+
102
+
103
+
def _optimal_join(terms: list[str]) -> str:
104
+
# TODO: actually do optimization lol
105
+
if len(terms) == 1:
106
+
return terms[0]
107
+
return f"(?:{'|'.join(terms)})"