A web scraper build to search specific information for a given compound (and its pseudonyms)

No two requests shall be the same!

+8 -6
+8 -6
FourmiCrawler/spider.py
··· 10 10 """ 11 11 name = "FourmiSpider" 12 12 _sources = [] 13 - synonyms = [] 13 + synonyms = set() 14 14 15 15 def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs): 16 16 """ ··· 19 19 :param selected_attributes: A list of regular expressions that the attributes should match. 20 20 """ 21 21 super(FourmiSpider, self).__init__(*args, **kwargs) 22 - self.synonyms.append(compound) 22 + self.synonyms.add(compound) 23 23 self.selected_attributes = selected_attributes 24 24 25 25 def parse(self, response): ··· 42 42 :return: A list of Scrapy Request objects 43 43 """ 44 44 requests = [] 45 - for parser in self._sources: 46 - parser_requests = parser.new_compound_request(compound) 47 - if parser_requests is not None: 48 - requests.append(parser_requests) 45 + if compound not in self.synonyms: 46 + self.synonyms.add(compound) 47 + for parser in self._sources: 48 + parser_requests = parser.new_compound_request(compound) 49 + if parser_requests is not None: 50 + requests.append(parser_requests) 49 51 return requests 50 52 51 53 def start_requests(self):