@@ -26,23 +26,23 @@ def data_sem(self):
26
26
def mean_values_per_point (self ):
27
27
return np .mean ([x .n for x in self ._data .values ()])
28
28
29
- def get_seed (self , point ):
29
+ def _next_seed (self , point ):
30
30
_data = self ._data .get (point , {})
31
31
pending_seeds = self .pending_points .get (point , set ())
32
32
seed = len (_data ) + len (pending_seeds )
33
33
if seed in _data or seed in pending_seeds :
34
- # means that the seed already exists, for example
34
+ # Means that the seed already exists, for example
35
35
# when '_data[point].keys() | pending_points[point] == {0, 2}'.
36
+ # Only happens when starting the learner after cancelling/loading.
36
37
return (set (range (seed )) - pending_seeds - _data .keys ()).pop ()
37
38
return seed
38
39
39
40
def loss_per_existing_point (self ):
40
41
scale = self .value_scale ()
41
-
42
42
points = []
43
43
loss_improvements = []
44
44
for p , sem in self .data_sem .items ():
45
- points .append ((p , self .get_seed (p )))
45
+ points .append ((p , self ._next_seed (p )))
46
46
N = self .n_values (p )
47
47
sem_improvement = (1 - sqrt (N - 1 ) / sqrt (N )) * sem
48
48
loss_improvement = self .weight * sem_improvement / scale
@@ -102,8 +102,12 @@ def _mean_values_per_neighbor(self, neighbors):
102
102
for p , ns in neighbors .items ()}
103
103
104
104
def _normalize_new_points_loss_improvements (self , points , loss_improvements ):
105
- """If we are suggesting a new point, then its 'loss_improvement' should
106
- be divided by the average number of values of its neigbors."""
105
+ """If we are suggesting a new (not yet suggested) point, then its
106
+ 'loss_improvement' should be divided by the average number of values
107
+ of its neigbors.
108
+
109
+ This is because it will take a similar amount of points to reach
110
+ that loss. """
107
111
if len (self ._data ) < 4 :
108
112
return loss_improvements
109
113
@@ -116,7 +120,10 @@ def _normalize_new_points_loss_improvements(self, points, loss_improvements):
116
120
117
121
def _normalize_existing_points_loss_improvements (self , points , loss_improvements ):
118
122
"""If the neighbors of 'point' have twice as much values
119
- on average, then that 'point' should have an infinite loss."""
123
+ on average, then that 'point' should have an infinite loss.
124
+
125
+ We do this because this point probably has a incorrect
126
+ estimate of the sem."""
120
127
if len (self ._data ) < 4 :
121
128
return loss_improvements
122
129
@@ -136,7 +143,7 @@ def _get_data(self):
136
143
137
144
def add_average_mixin (cls ):
138
145
names = ('data' , 'data_sem' , 'mean_values_per_point' ,
139
- 'get_seed ' , 'loss_per_existing_point' , '_add_to_pending' ,
146
+ '_next_seed ' , 'loss_per_existing_point' , '_add_to_pending' ,
140
147
'_remove_from_to_pending' , '_add_to_data' , 'ask' , 'n_values' ,
141
148
'_normalize_new_points_loss_improvements' ,
142
149
'_normalize_existing_points_loss_improvements' ,
0 commit comments