A web scraper build to search specific information for a given compound (and its pseudonyms)
at main 53 lines 1.9 kB view raw
1import copy 2import unittest 3 4from scrapy.exceptions import DropItem 5 6from FourmiCrawler import pipelines, spider, items 7 8 9class TestPipelines(unittest.TestCase): 10 def setUp(self): 11 self.testItem = items.Result() 12 13 def test_none_pipeline(self): 14 # Testing the pipeline that replaces the None values in items. 15 self.testItem["value"] = "abc" 16 self.testItem["source"] = None 17 pipe = pipelines.RemoveNonePipeline() 18 processed = pipe.process_item(self.testItem, spider.FourmiSpider()) 19 20 self.assertTrue(processed["value"] == "abc") 21 22 for key in self.testItem: 23 self.assertIsNotNone(processed[key]) 24 if key is not "value": 25 self.assertIs(processed[key], "") 26 27 def test_duplicate_pipeline(self): 28 # Testing the pipeline that removes duplicates. 29 self.testItem["attribute"] = "test" 30 self.testItem["value"] = "test" 31 self.testItem["conditions"] = "test" 32 33 pipe = pipelines.DuplicatePipeline() 34 self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem) 35 self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider()) 36 37 other_item = copy.deepcopy(self.testItem) 38 other_item["value"] = "test1" 39 self.assertEqual(pipe.process_item(other_item, spider.FourmiSpider()), other_item) 40 41 def test_attribute_selection(self): 42 # Testing the pipeline that selects attributes. 43 item1 = copy.deepcopy(self.testItem) 44 item2 = copy.deepcopy(self.testItem) 45 46 item1["attribute"] = "abd" 47 item2["attribute"] = "abc" 48 49 s = spider.FourmiSpider(selected_attributes=["a.d"]) 50 pipe = pipelines.AttributeSelectionPipeline() 51 52 self.assertEqual(pipe.process_item(item1, s), item1) 53 self.assertRaises(DropItem, pipe.process_item, item2, s)