-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathtest_cpu_gpu.py
129 lines (118 loc) · 5.36 KB
/
test_cpu_gpu.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import unittest
from thinc.api import prefer_gpu, require_cpu
import holmes_extractor as holmes
class CpuGpuTest(unittest.TestCase):
def test_document_based_structural_matching_cpu_gpu(self):
require_cpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
prefer_gpu()
holmes_manager.register_search_phrase("A dog chases a cat")
self.assertEqual(len(holmes_manager.match()), 1)
def test_document_based_structural_matching_gpu_cpu(self):
prefer_gpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
require_cpu()
holmes_manager.register_search_phrase("A dog chases a cat")
self.assertEqual(len(holmes_manager.match()), 1)
def test_search_phrase_based_structural_matching_cpu_gpu(self):
require_cpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.register_search_phrase("A dog chases a cat")
prefer_gpu()
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
self.assertEqual(len(holmes_manager.match()), 1)
def test_search_phrase_based_structural_matching_gpu_cpu(self):
prefer_gpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.register_search_phrase("A dog chases a cat")
require_cpu()
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
self.assertEqual(len(holmes_manager.match()), 1)
def test_topic_matching_cpu_gpu(self):
require_cpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
prefer_gpu()
topic_matches = holmes_manager.topic_match_documents_against("A dog chases a cat")
self.assertEqual(len(topic_matches), 1)
def test_topic_matching_gpu_cpu(self):
prefer_gpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
holmes_manager.parse_and_register_document(
document_text="The dog chased the cat.", label='pets')
require_cpu()
topic_matches = holmes_manager.topic_match_documents_against("A dog chases a cat")
self.assertEqual(len(topic_matches), 1)
def test_supervised_document_classification_cpu_gpu(self):
require_cpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
sttb = holmes_manager.get_supervised_topic_training_basis(
one_hot=False
)
sttb.parse_and_register_training_document("An animal", "animal", "d4")
sttb.parse_and_register_training_document("A computer", "computers", "d5")
sttb.prepare()
# With so little training data, the NN does not consistently learn correctly
for i in range(20):
trainer = sttb.train(
minimum_occurrences=0,
cv_threshold=0,
max_epochs=1000,
learning_rate=0.0001,
convergence_threshold=0,
)
stc = trainer.classifier()
if (
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal"
):
break
if i == 20:
self.assertTrue(
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal"
)
prefer_gpu()
self.assertTrue(
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal")
self.assertIsNone(
stc.parse_and_classify("My name is Charles and I like sewing.")
)
def test_supervised_document_classification_gpu_cpu(self):
prefer_gpu()
holmes_manager = holmes.Manager('en_core_web_sm', number_of_workers=2)
sttb = holmes_manager.get_supervised_topic_training_basis(
one_hot=False
)
sttb.parse_and_register_training_document("An animal", "animal", "d4")
sttb.parse_and_register_training_document("A computer", "computers", "d5")
sttb.prepare()
# With so little training data, the NN does not consistently learn correctly
for i in range(20):
trainer = sttb.train(
minimum_occurrences=0,
cv_threshold=0,
max_epochs=1000,
learning_rate=0.0001,
convergence_threshold=0,
)
stc = trainer.classifier()
if (
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal"
):
break
if i == 20:
self.assertTrue(
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal"
)
require_cpu()
self.assertTrue(
list(stc.parse_and_classify("You are an animal.").keys())[0] == "animal")
self.assertIsNone(
stc.parse_and_classify("My name is Charles and I like sewing.")
)