A web scraper build to search specific information for a given compound (and its pseudonyms)
1import copy
2import unittest
3
4from scrapy.exceptions import DropItem
5
6from FourmiCrawler import pipelines, spider, items
7
8
9class TestPipelines(unittest.TestCase):
10 def setUp(self):
11 self.testItem = items.Result()
12
13 def test_none_pipeline(self):
14 # Testing the pipeline that replaces the None values in items.
15 self.testItem["value"] = "abc"
16 pipe = pipelines.RemoveNonePipeline()
17 processed = pipe.process_item(self.testItem, spider.FourmiSpider())
18
19 self.assertTrue(processed["value"] == "abc")
20
21 for key in self.testItem:
22 self.assertIsNotNone(processed[key])
23 if key is not "value":
24 self.assertIs(processed[key], "")
25
26 def test_duplicate_pipeline(self):
27 # Testing the pipeline that removes duplicates.
28 self.testItem["attribute"] = "test"
29 self.testItem["value"] = "test"
30 self.testItem["conditions"] = "test"
31
32 pipe = pipelines.DuplicatePipeline()
33 self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem)
34 self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider())
35
36 other_item = copy.deepcopy(self.testItem)
37 other_item["value"] = "test1"
38 self.assertEqual(pipe.process_item(other_item, spider.FourmiSpider()), other_item)
39
40 def test_attribute_selection(self):
41 # Testing the pipeline that selects attributes.
42 item1 = copy.deepcopy(self.testItem)
43 item2 = copy.deepcopy(self.testItem)
44
45 item1["attribute"] = "abd"
46 item2["attribute"] = "abc"
47
48 s = spider.FourmiSpider(selected_attributes=["a.d"])
49 pipe = pipelines.AttributeSelectionPipeline()
50
51 self.assertEqual(pipe.process_item(item1, s), item1)
52 self.assertRaises(DropItem, pipe.process_item, item2, s)