tangled
alpha
login
or
join now
dekker.one
/
Fourmi
0
fork
atom
A web scraper build to search specific information for a given compound (and its pseudonyms)
0
fork
atom
overview
issues
pulls
pipelines
added ignore list
RTB
12 years ago
56ee6b1a
98f58ea4
+8
-2
1 changed file
expand all
collapse all
unified
split
FourmiCrawler
sources
NIST.py
+8
-2
FourmiCrawler/sources/NIST.py
···
5
5
from FourmiCrawler.items import Result
6
6
import re
7
7
8
8
+
# [TODO]: values can be '128.', perhaps remove the dot in that case?
9
9
+
8
10
class NIST(Source):
9
11
website = "http://webbook.nist.gov/*"
10
12
11
13
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
14
14
+
15
15
+
ignore_list = set()
12
16
13
17
def __init__(self):
14
18
Source.__init__(self)
···
235
239
return results
236
240
237
241
def new_compound_request(self, compound):
238
238
-
return Request(url=self.website[:-1] + self.search % compound,
239
239
-
callback=self.parse)
242
242
+
if compound not in self.ignore_list:
243
243
+
self.ignore_list.update(compound)
244
244
+
return Request(url=self.website[:-1] + self.search % compound,
245
245
+
callback=self.parse)