A web scraper build to search specific information for a given compound (and its pseudonyms)

Optimized imports

+14 -6
+5 -2
FourmiCrawler/sources/ChemSpider.py
··· 1 - from source import Source 1 + import re 2 + 2 3 from scrapy import log 3 4 from scrapy.http import Request 4 5 from scrapy.selector import Selector 6 + 7 + from source import Source 5 8 from FourmiCrawler.items import Result 6 - import re 9 + 7 10 8 11 # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception. 9 12
+5 -2
FourmiCrawler/sources/NIST.py
··· 1 - from source import Source 1 + import re 2 + 2 3 from scrapy import log 3 4 from scrapy.http import Request 4 5 from scrapy.selector import Selector 6 + 7 + from source import Source 5 8 from FourmiCrawler.items import Result 6 - import re 9 + 7 10 8 11 # [TODO]: values can be '128.', perhaps remove the dot in that case? 9 12 # [TODO]: properties have references and comments which do not exist in the
+4 -2
FourmiCrawler/sources/WikipediaParser.py
··· 1 + import re 2 + 1 3 from scrapy.http import Request 2 4 from scrapy import log 3 - from source import Source 4 5 from scrapy.selector import Selector 6 + 7 + from source import Source 5 8 from FourmiCrawler.items import Result 6 - import re 7 9 8 10 9 11 class WikipediaParser(Source):