A web scraper build to search specific information for a given compound (and its pseudonyms)
1import copy
2import unittest
3
4from scrapy.exceptions import DropItem
5
6from FourmiCrawler import pipelines, spider, items
7
8
9class TestPipelines(unittest.TestCase):
10 def setUp(self):
11 self.testItem = items.Result()
12
13 def test_none_pipeline(self):
14 # Testing the pipeline that replaces the None values in items.
15 self.testItem["value"] = "abc"
16 self.testItem["source"] = None
17 pipe = pipelines.RemoveNonePipeline()
18 processed = pipe.process_item(self.testItem, spider.FourmiSpider())
19
20 self.assertTrue(processed["value"] == "abc")
21
22 for key in self.testItem:
23 self.assertIsNotNone(processed[key])
24 if key is not "value":
25 self.assertIs(processed[key], "")
26
27 def test_duplicate_pipeline(self):
28 # Testing the pipeline that removes duplicates.
29 self.testItem["attribute"] = "test"
30 self.testItem["value"] = "test"
31 self.testItem["conditions"] = "test"
32
33 pipe = pipelines.DuplicatePipeline()
34 self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem)
35 self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider())
36
37 other_item = copy.deepcopy(self.testItem)
38 other_item["value"] = "test1"
39 self.assertEqual(pipe.process_item(other_item, spider.FourmiSpider()), other_item)
40
41 def test_attribute_selection(self):
42 # Testing the pipeline that selects attributes.
43 item1 = copy.deepcopy(self.testItem)
44 item2 = copy.deepcopy(self.testItem)
45
46 item1["attribute"] = "abd"
47 item2["attribute"] = "abc"
48
49 s = spider.FourmiSpider(selected_attributes=["a.d"])
50 pipe = pipelines.AttributeSelectionPipeline()
51
52 self.assertEqual(pipe.process_item(item1, s), item1)
53 self.assertRaises(DropItem, pipe.process_item, item2, s)