A web scraper build to search specific information for a given compound (and its pseudonyms)

Added tests for the pipline

+49 -1
+1 -1
FourmiCrawler/pipelines.py
··· 35 35 """ 36 36 value = (item['attribute'], item['value'], item['conditions']) 37 37 if value in self.known_values: 38 - raise DropItem("Duplicate item found: %s" % item) # #[todo] append sources of first item. 38 + raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item. 39 39 else: 40 40 self.known_values.add(value) 41 41 return item
+48
tests/test_pipeline.py
··· 1 + import copy 2 + import unittest 3 + from FourmiCrawler import pipelines, spider, items 4 + from scrapy.exceptions import DropItem 5 + 6 + 7 + class TestPipelines(unittest.TestCase): 8 + 9 + def setUp(self): 10 + self.testItem = items.Result() 11 + 12 + def test_NonePipeline(self): 13 + self.testItem["value"] = "abc" 14 + pipe = pipelines.RemoveNonePipeline() 15 + processed = pipe.process_item(self.testItem, spider.FourmiSpider()) 16 + 17 + self.assertTrue(processed["value"] == "abc") 18 + 19 + for key in self.testItem: 20 + self.assertIsNotNone(processed[key]) 21 + if key is not "value": 22 + self.assertIs(processed[key], "") 23 + 24 + def test_DuplicatePipeline(self): 25 + self.testItem["attribute"] = "test" 26 + self.testItem["value"] = "test" 27 + self.testItem["conditions"] = "test" 28 + 29 + pipe = pipelines.DuplicatePipeline() 30 + self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem) 31 + self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider()) 32 + 33 + otherItem = copy.deepcopy(self.testItem) 34 + otherItem["value"] = "test1" 35 + self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem) 36 + 37 + def test_AttributeSelection(self): 38 + item1 = copy.deepcopy(self.testItem) 39 + item2 = copy.deepcopy(self.testItem) 40 + 41 + item1["attribute"] = "abd" 42 + item2["attribute"] = "abc" 43 + 44 + s = spider.FourmiSpider(selected_attributes=["a.d"]) 45 + pipe = pipelines.AttributeSelectionPipeline() 46 + 47 + self.assertEqual(pipe.process_item(item1, s), item1) 48 + self.assertRaises(DropItem, pipe.process_item, item2, s)