···11+import re
22+13from scrapy.spider import Spider
24from scrapy import log
33-import re
455667class FourmiSpider(Spider):
88+ """
99+ A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data.
1010+ """
711 name = "FourmiSpider"
812 __parsers = []
913 synonyms = []
10141115 def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
1616+ """
1717+ Initiation of the Spider
1818+ :param compound: compound that will be searched.
1919+ :param selected_attributes: A list of regular expressions that the attributes should match.
2020+ """
1221 super(FourmiSpider, self).__init__(*args, **kwargs)
1322 self.synonyms.append(compound)
1423 self.selected_attributes = selected_attributes;
15241625 def parse(self, reponse):
2626+ """
2727+ The function that is called when a response to a request is available. This function distributes this to a
2828+ parser which should be able to handle parsing the data.
2929+ :param reponse: A Scrapy Response object that should be parsed
3030+ :return: A list of Result items and new Request to be handled by the scrapy core.
3131+ """
1732 for parser in self.__parsers:
1833 if re.match(parser.website, reponse.url):
1934 log.msg("Url: " + reponse.url + " -> Source: " + parser.website, level=log.DEBUG)
···2136 return None
22372338 def get_synonym_requests(self, compound):
3939+ """
4040+ A function that generates new Scrapy Request for each source given a new synonym of a compound.
4141+ :param compound: A compound name
4242+ :return: A list of Scrapy Request objects
4343+ """
2444 requests = []
2545 for parser in self.__parsers:
2646 parser_requests = parser.new_compound_request(compound)
···2949 return requests
30503151 def start_requests(self):
5252+ """
5353+ The function called by Scrapy for it's first Requests
5454+ :return: A list of Scrapy Request generated from the known synonyms using the available sources.
5555+ """
3256 requests = []
3357 for synonym in self.synonyms:
3458 requests.extend(self.get_synonym_requests(synonym))
3559 return requests
36603761 def add_parsers(self, parsers):
6262+ """
6363+ A function to add a new Parser objects to the list of available parsers.
6464+ :param parsers: A list of Parser Objects.
6565+ """
3866 for parser in parsers:
3967 self.add_parser(parser)
40684169 def add_parser(self, parser):
7070+ """
7171+ A function add a new Parser object to the list of available parsers.
7272+ :param parser: A Parser Object
7373+ """
4274 self.__parsers.append(parser)
4375 parser.set_spider(self)