|
7 | 7 | package org.elasticsearch.xpack.ml.featureindexbuilder.job;
|
8 | 8 |
|
9 | 9 | import org.apache.log4j.Logger;
|
10 |
| -import org.elasticsearch.action.admin.indices.create.CreateIndexRequest; |
11 |
| -import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; |
12 |
| -import org.elasticsearch.action.bulk.BulkRequest; |
13 | 10 | import org.elasticsearch.action.index.IndexRequest;
|
14 | 11 | import org.elasticsearch.action.search.SearchRequest;
|
15 | 12 | import org.elasticsearch.action.search.SearchResponse;
|
16 |
| -import org.elasticsearch.client.Client; |
17 |
| -import org.elasticsearch.client.IndicesAdminClient; |
18 |
| -import org.elasticsearch.common.settings.Settings; |
19 | 13 | import org.elasticsearch.common.xcontent.XContentBuilder;
|
20 |
| -import org.elasticsearch.common.xcontent.XContentType; |
21 |
| -import org.elasticsearch.index.IndexNotFoundException; |
22 | 14 | import org.elasticsearch.index.query.MatchAllQueryBuilder;
|
23 | 15 | import org.elasticsearch.index.query.QueryBuilder;
|
24 | 16 | import org.elasticsearch.search.aggregations.AggregationBuilders;
|
25 | 17 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation;
|
26 |
| -import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregation.Bucket; |
27 | 18 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeAggregationBuilder;
|
28 | 19 | import org.elasticsearch.search.aggregations.bucket.composite.CompositeValuesSourceBuilder;
|
29 | 20 | import org.elasticsearch.search.aggregations.bucket.composite.TermsValuesSourceBuilder;
|
30 | 21 | import org.elasticsearch.search.aggregations.metrics.avg.InternalAvg;
|
31 | 22 | import org.elasticsearch.search.builder.SearchSourceBuilder;
|
| 23 | +import org.elasticsearch.xpack.core.indexing.AsyncTwoPhaseIndexer; |
| 24 | +import org.elasticsearch.xpack.core.indexing.IndexerState; |
| 25 | +import org.elasticsearch.xpack.core.indexing.IterationResult; |
32 | 26 |
|
33 | 27 | import java.io.IOException;
|
| 28 | +import java.io.UncheckedIOException; |
34 | 29 | import java.util.ArrayList;
|
35 | 30 | import java.util.List;
|
36 | 31 | import java.util.Map;
|
37 |
| -import java.util.concurrent.ExecutionException; |
| 32 | +import java.util.concurrent.Executor; |
| 33 | +import java.util.concurrent.atomic.AtomicReference; |
| 34 | +import java.util.stream.Collectors; |
38 | 35 |
|
39 |
| -import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; |
40 | 36 | import static org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings.DOC_TYPE;
|
| 37 | +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; |
41 | 38 |
|
42 |
| -public class FeatureIndexBuilderIndexer { |
| 39 | +public abstract class FeatureIndexBuilderIndexer extends AsyncTwoPhaseIndexer<Map<String, Object>, FeatureIndexBuilderJobStats> { |
43 | 40 | private static final String PIVOT_INDEX = "pivot-reviews";
|
44 | 41 | private static final String SOURCE_INDEX = "anonreviews";
|
45 | 42 |
|
46 | 43 | private static final Logger logger = Logger.getLogger(FeatureIndexBuilderIndexer.class.getName());
|
47 | 44 | private FeatureIndexBuilderJob job;
|
48 |
| - private Client client; |
49 | 45 |
|
50 |
| - public FeatureIndexBuilderIndexer(FeatureIndexBuilderJob job, Client client) { |
| 46 | + public FeatureIndexBuilderIndexer(Executor executor, FeatureIndexBuilderJob job, AtomicReference<IndexerState> initialState, |
| 47 | + Map<String, Object> initialPosition) { |
| 48 | + super(executor, initialState, initialPosition, new FeatureIndexBuilderJobStats()); |
51 | 49 |
|
52 | 50 | this.job = job;
|
53 |
| - this.client = client; |
54 |
| - logger.info("delete pivot-reviews"); |
55 |
| - |
56 | 51 | }
|
57 | 52 |
|
58 |
| - public synchronized void start() { |
59 |
| - deleteIndex(client); |
60 |
| - |
61 |
| - createIndex(client); |
62 |
| - |
63 |
| - int runs = 0; |
64 |
| - |
65 |
| - Map<String, Object> after = null; |
66 |
| - logger.info("start feature indexing"); |
67 |
| - SearchResponse response; |
68 |
| - |
69 |
| - try { |
70 |
| - response = runQuery(client, after); |
71 |
| - |
72 |
| - CompositeAggregation compositeAggregation = response.getAggregations().get("feature"); |
73 |
| - after = compositeAggregation.afterKey(); |
74 |
| - |
75 |
| - while (after != null) { |
76 |
| - indexBuckets(compositeAggregation); |
77 |
| - |
78 |
| - ++runs; |
79 |
| - response = runQuery(client, after); |
80 |
| - |
81 |
| - compositeAggregation = response.getAggregations().get("feature"); |
82 |
| - after = compositeAggregation.afterKey(); |
83 |
| - |
84 |
| - //after = null; |
85 |
| - } |
86 |
| - |
87 |
| - indexBuckets(compositeAggregation); |
88 |
| - } catch (InterruptedException | ExecutionException e) { |
89 |
| - logger.error("Failed to build feature index", e); |
90 |
| - } |
91 |
| - |
92 |
| - logger.info("Finished feature indexing"); |
| 53 | + @Override |
| 54 | + protected String getJobId() { |
| 55 | + return job.getConfig().getId(); |
93 | 56 | }
|
94 | 57 |
|
95 |
| - private void indexBuckets(CompositeAggregation compositeAggregation) { |
96 |
| - BulkRequest bulkIndexRequest = new BulkRequest(); |
97 |
| - try { |
98 |
| - for (Bucket b : compositeAggregation.getBuckets()) { |
| 58 | + @Override |
| 59 | + protected void onStartJob(long now) { |
| 60 | + } |
99 | 61 |
|
100 |
| - InternalAvg avgAgg = b.getAggregations().get("avg_rating"); |
| 62 | + @Override |
| 63 | + protected IterationResult<Map<String, Object>> doProcess(SearchResponse searchResponse) { |
| 64 | + final CompositeAggregation agg = searchResponse.getAggregations().get("feature"); |
| 65 | + return new IterationResult<>(processBuckets(agg), agg.afterKey(), agg.getBuckets().isEmpty()); |
| 66 | + } |
101 | 67 |
|
102 |
| - XContentBuilder builder; |
| 68 | + /* |
| 69 | + * Mocked demo case |
| 70 | + * |
| 71 | + * TODO: replace with proper implementation |
| 72 | + */ |
| 73 | + private List<IndexRequest> processBuckets(CompositeAggregation agg) { |
| 74 | + return agg.getBuckets().stream().map(b -> { |
| 75 | + InternalAvg avgAgg = b.getAggregations().get("avg_rating"); |
| 76 | + XContentBuilder builder; |
| 77 | + try { |
103 | 78 | builder = jsonBuilder();
|
| 79 | + |
104 | 80 | builder.startObject();
|
105 | 81 | builder.field("reviewerId", b.getKey().get("reviewerId"));
|
106 | 82 | builder.field("avg_rating", avgAgg.getValue());
|
107 | 83 | builder.endObject();
|
108 |
| - bulkIndexRequest.add(new IndexRequest(PIVOT_INDEX, DOC_TYPE).source(builder)); |
109 |
| - |
| 84 | + } catch (IOException e) { |
| 85 | + throw new UncheckedIOException(e); |
110 | 86 | }
|
111 |
| - client.bulk(bulkIndexRequest); |
112 |
| - } catch (IOException e) { |
113 |
| - logger.error("Failed to index", e); |
114 |
| - } |
| 87 | + |
| 88 | + String indexName = PIVOT_INDEX + "_" + job.getConfig().getId(); |
| 89 | + IndexRequest request = new IndexRequest(indexName, DOC_TYPE).source(builder); |
| 90 | + return request; |
| 91 | + }).collect(Collectors.toList()); |
| 92 | + } |
| 93 | + |
| 94 | + @Override |
| 95 | + protected SearchRequest buildSearchRequest() { |
| 96 | + |
| 97 | + final Map<String, Object> position = getPosition(); |
| 98 | + SearchRequest request = buildFeatureQuery(position); |
| 99 | + return request; |
115 | 100 | }
|
116 |
| - |
| 101 | + |
117 | 102 | /*
|
118 |
| - * Hardcoded demo case for pivoting |
| 103 | + * Mocked demo case |
| 104 | + * |
| 105 | + * TODO: everything below will be replaced with proper implementation read from job configuration |
119 | 106 | */
|
120 |
| - |
121 |
| - private static void deleteIndex(Client client) { |
122 |
| - DeleteIndexRequest deleteIndex = new DeleteIndexRequest(PIVOT_INDEX); |
123 |
| - |
124 |
| - IndicesAdminClient adminClient = client.admin().indices(); |
125 |
| - try { |
126 |
| - adminClient.delete(deleteIndex).actionGet(); |
127 |
| - } catch (IndexNotFoundException e) { |
128 |
| - } |
129 |
| - } |
130 |
| - |
131 |
| - private static void createIndex(Client client) { |
132 |
| - |
133 |
| - CreateIndexRequest request = new CreateIndexRequest(PIVOT_INDEX); |
134 |
| - request.settings(Settings.builder() // <1> |
135 |
| - .put("index.number_of_shards", 1) |
136 |
| - .put("index.number_of_replicas", 0) |
137 |
| - ); |
138 |
| - request.mapping(DOC_TYPE, // <1> |
139 |
| - "{\n" + |
140 |
| - " \"" + DOC_TYPE + "\": {\n" + |
141 |
| - " \"properties\": {\n" + |
142 |
| - " \"reviewerId\": {\n" + |
143 |
| - " \"type\": \"keyword\"\n" + |
144 |
| - " },\n" + |
145 |
| - " \"avg_rating\": {\n" + |
146 |
| - " \"type\": \"integer\"\n" + |
147 |
| - " }\n" + |
148 |
| - " }\n" + |
149 |
| - " }\n" + |
150 |
| - "}", // <2> |
151 |
| - XContentType.JSON); |
152 |
| - IndicesAdminClient adminClient = client.admin().indices(); |
153 |
| - adminClient.create(request).actionGet(); |
154 |
| - } |
155 |
| - |
156 | 107 | private static SearchRequest buildFeatureQuery(Map<String, Object> after) {
|
157 | 108 | QueryBuilder queryBuilder = new MatchAllQueryBuilder();
|
158 | 109 | SearchRequest searchRequest = new SearchRequest(SOURCE_INDEX);
|
159 |
| - |
| 110 | + |
160 | 111 | List<CompositeValuesSourceBuilder<?>> sources = new ArrayList<>();
|
161 | 112 | sources.add(new TermsValuesSourceBuilder("reviewerId").field("reviewerId"));
|
162 |
| - |
| 113 | + |
163 | 114 | CompositeAggregationBuilder compositeAggregation = new CompositeAggregationBuilder("feature", sources);
|
164 | 115 | compositeAggregation.size(1000);
|
165 |
| - |
| 116 | + |
166 | 117 | if (after != null) {
|
167 | 118 | compositeAggregation.aggregateAfter(after);
|
168 | 119 | }
|
169 |
| - |
| 120 | + |
170 | 121 | compositeAggregation.subAggregation(AggregationBuilders.avg("avg_rating").field("rating"));
|
171 | 122 | compositeAggregation.subAggregation(AggregationBuilders.cardinality("dc_vendors").field("vendorId"));
|
172 | 123 | SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
|
173 | 124 | sourceBuilder.aggregation(compositeAggregation);
|
174 | 125 | sourceBuilder.size(0);
|
175 | 126 | sourceBuilder.query(queryBuilder);
|
176 | 127 | searchRequest.source(sourceBuilder);
|
177 |
| - |
| 128 | + |
178 | 129 | return searchRequest;
|
179 |
| - } |
180 |
| - |
181 |
| - private static SearchResponse runQuery(Client client, Map<String, Object> after) throws InterruptedException, ExecutionException { |
182 |
| - |
183 |
| - SearchRequest request = buildFeatureQuery(after); |
184 |
| - SearchResponse response = client.search(request).get(); |
185 |
| - |
186 |
| - return response; |
187 |
| - } |
188 |
| - |
189 |
| - private static void indexResult() { |
190 |
| - |
191 |
| - |
192 |
| - |
193 | 130 | }
|
194 | 131 | }
|
0 commit comments