7
7
8
8
import org .apache .logging .log4j .LogManager ;
9
9
import org .apache .logging .log4j .Logger ;
10
+ import org .apache .logging .log4j .message .ParameterizedMessage ;
10
11
import org .elasticsearch .action .search .ClearScrollAction ;
11
12
import org .elasticsearch .action .search .ClearScrollRequest ;
12
13
import org .elasticsearch .action .search .SearchAction ;
20
21
import org .elasticsearch .search .SearchHit ;
21
22
import org .elasticsearch .search .sort .SortOrder ;
22
23
import org .elasticsearch .xpack .core .ClientHelper ;
23
- import org .elasticsearch .xpack .core .ml .datafeed .extractor .ExtractorUtils ;
24
24
import org .elasticsearch .xpack .ml .datafeed .extractor .fields .ExtractedField ;
25
25
import org .elasticsearch .xpack .ml .dataframe .DataFrameAnalyticsFields ;
26
26
34
34
import java .util .Objects ;
35
35
import java .util .Optional ;
36
36
import java .util .concurrent .TimeUnit ;
37
+ import java .util .function .Supplier ;
37
38
import java .util .stream .Collectors ;
38
39
39
40
/**
@@ -91,9 +92,28 @@ public Optional<List<Row>> next() throws IOException {
91
92
92
93
protected List <Row > initScroll () throws IOException {
93
94
LOGGER .debug ("[{}] Initializing scroll" , context .jobId );
94
- SearchResponse searchResponse = executeSearchRequest (buildSearchRequest ());
95
- LOGGER .debug ("[{}] Search response was obtained" , context .jobId );
96
- return processSearchResponse (searchResponse );
95
+ return tryRequestWithSearchResponse (() -> executeSearchRequest (buildSearchRequest ()));
96
+ }
97
+
98
+ private List <Row > tryRequestWithSearchResponse (Supplier <SearchResponse > request ) throws IOException {
99
+ try {
100
+ // We've set allow_partial_search_results to false which means if something
101
+ // goes wrong the request will throw.
102
+ SearchResponse searchResponse = request .get ();
103
+ LOGGER .debug ("[{}] Search response was obtained" , context .jobId );
104
+
105
+ // Request was successful so we can restore the flag to retry if a future failure occurs
106
+ searchHasShardFailure = false ;
107
+
108
+ return processSearchResponse (searchResponse );
109
+ } catch (Exception e ) {
110
+ if (searchHasShardFailure ) {
111
+ throw e ;
112
+ }
113
+ LOGGER .warn (new ParameterizedMessage ("[{}] Search resulted to failure; retrying once" , context .jobId ), e );
114
+ markScrollAsErrored ();
115
+ return initScroll ();
116
+ }
97
117
}
98
118
99
119
protected SearchResponse executeSearchRequest (SearchRequestBuilder searchRequestBuilder ) {
@@ -103,6 +123,8 @@ protected SearchResponse executeSearchRequest(SearchRequestBuilder searchRequest
103
123
private SearchRequestBuilder buildSearchRequest () {
104
124
SearchRequestBuilder searchRequestBuilder = new SearchRequestBuilder (client , SearchAction .INSTANCE )
105
125
.setScroll (SCROLL_TIMEOUT )
126
+ // This ensures the search throws if there are failures and the scroll context gets cleared automatically
127
+ .setAllowPartialSearchResults (false )
106
128
.addSort (DataFrameAnalyticsFields .ID , SortOrder .ASC )
107
129
.setIndices (context .indices )
108
130
.setSize (context .scrollSize )
@@ -117,14 +139,6 @@ private SearchRequestBuilder buildSearchRequest() {
117
139
}
118
140
119
141
private List <Row > processSearchResponse (SearchResponse searchResponse ) throws IOException {
120
-
121
- if (searchResponse .getFailedShards () > 0 && searchHasShardFailure == false ) {
122
- LOGGER .debug ("[{}] Resetting scroll search after shard failure" , context .jobId );
123
- markScrollAsErrored ();
124
- return initScroll ();
125
- }
126
-
127
- ExtractorUtils .checkSearchWasSuccessful (context .jobId , searchResponse );
128
142
scrollId = searchResponse .getScrollId ();
129
143
if (searchResponse .getHits ().getHits ().length == 0 ) {
130
144
hasNext = false ;
@@ -143,7 +157,6 @@ private List<Row> processSearchResponse(SearchResponse searchResponse) throws IO
143
157
rows .add (createRow (hit ));
144
158
}
145
159
return rows ;
146
-
147
160
}
148
161
149
162
private Row createRow (SearchHit hit ) {
@@ -163,15 +176,13 @@ private Row createRow(SearchHit hit) {
163
176
164
177
private List <Row > continueScroll () throws IOException {
165
178
LOGGER .debug ("[{}] Continuing scroll with id [{}]" , context .jobId , scrollId );
166
- SearchResponse searchResponse = executeSearchScrollRequest (scrollId );
167
- LOGGER .debug ("[{}] Search response was obtained" , context .jobId );
168
- return processSearchResponse (searchResponse );
179
+ return tryRequestWithSearchResponse (() -> executeSearchScrollRequest (scrollId ));
169
180
}
170
181
171
182
private void markScrollAsErrored () {
172
183
// This could be a transient error with the scroll Id.
173
184
// Reinitialise the scroll and try again but only once.
174
- resetScroll () ;
185
+ scrollId = null ;
175
186
searchHasShardFailure = true ;
176
187
}
177
188
@@ -183,11 +194,6 @@ protected SearchResponse executeSearchScrollRequest(String scrollId) {
183
194
.get ());
184
195
}
185
196
186
- private void resetScroll () {
187
- clearScroll (scrollId );
188
- scrollId = null ;
189
- }
190
-
191
197
private void clearScroll (String scrollId ) {
192
198
if (scrollId != null ) {
193
199
ClearScrollRequest request = new ClearScrollRequest ();
0 commit comments