7
7
8
8
package org .elasticsearch .xpack .ml .aggs .frequentitemsets ;
9
9
10
+ import org .apache .logging .log4j .LogManager ;
11
+ import org .apache .logging .log4j .Logger ;
10
12
import org .apache .lucene .util .BitSet ;
11
13
import org .apache .lucene .util .FixedBitSet ;
12
- import org .apache .lucene .util .LongsRef ;
13
14
import org .elasticsearch .core .Releasable ;
14
15
import org .elasticsearch .core .Releasables ;
16
+ import org .elasticsearch .xpack .ml .aggs .frequentitemsets .TransactionStore .TopItemIds ;
15
17
16
18
import java .io .IOException ;
17
19
import java .util .Arrays ;
30
32
* if [a, b] is not in T, [a, b, c] can not be in T either
31
33
*/
32
34
class CountingItemSetTraverser implements Releasable {
35
+ private static final Logger logger = LogManager .getLogger (CountingItemSetTraverser .class );
33
36
34
37
// start size and size increment for the occurences stack
35
38
private static final int OCCURENCES_SIZE_INCREMENT = 10 ;
@@ -48,13 +51,19 @@ class CountingItemSetTraverser implements Releasable {
48
51
// growable bit set from java util
49
52
private java .util .BitSet visited ;
50
53
51
- CountingItemSetTraverser (TransactionStore transactionStore , int cacheTraversalDepth , int cacheNumberOfTransactions , long minCount ) {
54
+ CountingItemSetTraverser (
55
+ TransactionStore transactionStore ,
56
+ TopItemIds topItemIds ,
57
+ int cacheTraversalDepth ,
58
+ int cacheNumberOfTransactions ,
59
+ long minCount
60
+ ) {
52
61
this .transactionStore = transactionStore ;
53
62
54
63
boolean success = false ;
55
64
try {
56
65
// we allocate 2 big arrays, if the 2nd allocation fails, ensure we clean up
57
- this .topItemSetTraverser = transactionStore . getTopItemIdTraverser ( );
66
+ this .topItemSetTraverser = new ItemSetTraverser ( topItemIds );
58
67
this .topTransactionIds = transactionStore .getTopTransactionIds ();
59
68
success = true ;
60
69
} finally {
@@ -80,11 +89,15 @@ public boolean next(long earlyStopMinCount) throws IOException {
80
89
final long totalTransactionCount = transactionStore .getTotalTransactionCount ();
81
90
82
91
int depth = topItemSetTraverser .getNumberOfItems ();
92
+ long occurencesOfSingleItem = transactionStore .getItemCount (topItemSetTraverser .getItemId ());
93
+
83
94
if (depth == 1 ) {
84
95
// at the 1st level, we can take the count directly from the transaction store
85
- occurencesStack [0 ] = transactionStore .getItemCount (topItemSetTraverser .getItemId ());
96
+ occurencesStack [0 ] = occurencesOfSingleItem ;
97
+ return true ;
98
+ } else if (occurencesOfSingleItem < earlyStopMinCount ) {
99
+ rememberCountInStack (depth , occurencesOfSingleItem );
86
100
return true ;
87
-
88
101
// till a certain depth store results in a cache matrix
89
102
} else if (depth < cacheTraversalDepth ) {
90
103
// get the cached skip count
@@ -187,7 +200,7 @@ public long getCount() {
187
200
/**
188
201
* Get the count of the item set without the last item
189
202
*/
190
- public long getPreviousCount () {
203
+ public long getParentCount () {
191
204
if (topItemSetTraverser .getNumberOfItems () > 1 ) {
192
205
return occurencesStack [topItemSetTraverser .getNumberOfItems () - 2 ];
193
206
}
@@ -201,7 +214,7 @@ public boolean hasBeenVisited() {
201
214
return true ;
202
215
}
203
216
204
- public boolean hasPredecessorBeenVisited () {
217
+ public boolean hasParentBeenVisited () {
205
218
if (topItemSetTraverser .getNumberOfItems () > 1 ) {
206
219
return visited .get (topItemSetTraverser .getNumberOfItems () - 2 );
207
220
}
@@ -214,7 +227,7 @@ public void setVisited() {
214
227
}
215
228
}
216
229
217
- public void setPredecessorVisited () {
230
+ public void setParentVisited () {
218
231
if (topItemSetTraverser .getNumberOfItems () > 1 ) {
219
232
visited .set (topItemSetTraverser .getNumberOfItems () - 2 );
220
233
}
@@ -228,10 +241,15 @@ public int getNumberOfItems() {
228
241
}
229
242
230
243
/**
231
- * Get the current item set
244
+ *
245
+ * Get a bitset representation of the current item set
232
246
*/
233
- public LongsRef getItemSet () {
234
- return topItemSetTraverser .getItemSet ();
247
+ public ItemSetBitSet getItemSetBitSet () {
248
+ return topItemSetTraverser .getItemSetBitSet ();
249
+ }
250
+
251
+ public ItemSetBitSet getParentItemSetBitSet () {
252
+ return topItemSetTraverser .getParentItemSetBitSet ();
235
253
}
236
254
237
255
/**
@@ -250,7 +268,7 @@ public boolean atLeaf() {
250
268
251
269
@ Override
252
270
public void close () {
253
- Releasables .close (topItemSetTraverser , topTransactionIds );
271
+ Releasables .close (topTransactionIds );
254
272
}
255
273
256
274
// remember the count in the stack without tracking push and pop
0 commit comments