A web scraper build to search specific information for a given compound (and its pseudonyms)

Basic structure to make sure the spider use an argument

+8 -6
+4 -3
Fourmi/spiders/Chemspider.py
··· 3 3 class ChemspiderSpider(Spider): 4 4 name = "Chemspider" 5 5 allowed_domains = ["chemspider.com"] 6 - start_urls = ( 7 - 'http://www.chemspider.com/', 8 - ) 6 + 7 + def __init__(self, compound=None, *args, **kwargs): 8 + super(ChemspiderSpider, self).__init__(*args, **kwargs) 9 + self.start_urls = ["http://chemspiderapiurl/something/%s" % compound] #[TODO] - Give an logical start url. 9 10 10 11 def parse(self, response): 11 12 pass
+4 -3
Fourmi/spiders/Wikipedia.py
··· 3 3 class WikipediaSpider(Spider): 4 4 name = "Wikipedia" 5 5 allowed_domains = ["wikipedia.org"] 6 - start_urls = ( 7 - 'http://www.wikipedia.org/', 8 - ) 6 + 7 + def __init__(self, compound=None, *args, **kwargs): 8 + super(WikipediaSpider, self).__init__(*args, **kwargs) 9 + self.start_urls = ["http://wikipediaurl/something/%s" % compound] #[TODO] - Give an logical start url. 9 10 10 11 def parse(self, response): 11 12 pass