tangled
alpha
login
or
join now
dekker.one
/
Fourmi
0
fork
atom
A web scraper build to search specific information for a given compound (and its pseudonyms)
0
fork
atom
overview
issues
pulls
pipelines
No two requests shall be the same!
dekker.one
11 years ago
eb727bd6
0c9862d8
+8
-6
1 changed file
expand all
collapse all
unified
split
FourmiCrawler
spider.py
+8
-6
FourmiCrawler/spider.py
···
10
10
"""
11
11
name = "FourmiSpider"
12
12
_sources = []
13
13
-
synonyms = []
13
13
+
synonyms = set()
14
14
15
15
def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
16
16
"""
···
19
19
:param selected_attributes: A list of regular expressions that the attributes should match.
20
20
"""
21
21
super(FourmiSpider, self).__init__(*args, **kwargs)
22
22
-
self.synonyms.append(compound)
22
22
+
self.synonyms.add(compound)
23
23
self.selected_attributes = selected_attributes
24
24
25
25
def parse(self, response):
···
42
42
:return: A list of Scrapy Request objects
43
43
"""
44
44
requests = []
45
45
-
for parser in self._sources:
46
46
-
parser_requests = parser.new_compound_request(compound)
47
47
-
if parser_requests is not None:
48
48
-
requests.append(parser_requests)
45
45
+
if compound not in self.synonyms:
46
46
+
self.synonyms.add(compound)
47
47
+
for parser in self._sources:
48
48
+
parser_requests = parser.new_compound_request(compound)
49
49
+
if parser_requests is not None:
50
50
+
requests.append(parser_requests)
49
51
return requests
50
52
51
53
def start_requests(self):