tangled
alpha
login
or
join now
dekker.one
/
Fourmi
0
fork
atom
A web scraper build to search specific information for a given compound (and its pseudonyms)
0
fork
atom
overview
issues
pulls
pipelines
Added tests for the pipline
dekker.one
11 years ago
75c0be1f
c48c4ec6
+49
-1
2 changed files
expand all
collapse all
unified
split
FourmiCrawler
pipelines.py
tests
test_pipeline.py
+1
-1
FourmiCrawler/pipelines.py
···
35
35
"""
36
36
value = (item['attribute'], item['value'], item['conditions'])
37
37
if value in self.known_values:
38
38
-
raise DropItem("Duplicate item found: %s" % item) # #[todo] append sources of first item.
38
38
+
raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item.
39
39
else:
40
40
self.known_values.add(value)
41
41
return item
+48
tests/test_pipeline.py
···
1
1
+
import copy
2
2
+
import unittest
3
3
+
from FourmiCrawler import pipelines, spider, items
4
4
+
from scrapy.exceptions import DropItem
5
5
+
6
6
+
7
7
+
class TestPipelines(unittest.TestCase):
8
8
+
9
9
+
def setUp(self):
10
10
+
self.testItem = items.Result()
11
11
+
12
12
+
def test_NonePipeline(self):
13
13
+
self.testItem["value"] = "abc"
14
14
+
pipe = pipelines.RemoveNonePipeline()
15
15
+
processed = pipe.process_item(self.testItem, spider.FourmiSpider())
16
16
+
17
17
+
self.assertTrue(processed["value"] == "abc")
18
18
+
19
19
+
for key in self.testItem:
20
20
+
self.assertIsNotNone(processed[key])
21
21
+
if key is not "value":
22
22
+
self.assertIs(processed[key], "")
23
23
+
24
24
+
def test_DuplicatePipeline(self):
25
25
+
self.testItem["attribute"] = "test"
26
26
+
self.testItem["value"] = "test"
27
27
+
self.testItem["conditions"] = "test"
28
28
+
29
29
+
pipe = pipelines.DuplicatePipeline()
30
30
+
self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem)
31
31
+
self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider())
32
32
+
33
33
+
otherItem = copy.deepcopy(self.testItem)
34
34
+
otherItem["value"] = "test1"
35
35
+
self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)
36
36
+
37
37
+
def test_AttributeSelection(self):
38
38
+
item1 = copy.deepcopy(self.testItem)
39
39
+
item2 = copy.deepcopy(self.testItem)
40
40
+
41
41
+
item1["attribute"] = "abd"
42
42
+
item2["attribute"] = "abc"
43
43
+
44
44
+
s = spider.FourmiSpider(selected_attributes=["a.d"])
45
45
+
pipe = pipelines.AttributeSelectionPipeline()
46
46
+
47
47
+
self.assertEqual(pipe.process_item(item1, s), item1)
48
48
+
self.assertRaises(DropItem, pipe.process_item, item2, s)