-
Notifications
You must be signed in to change notification settings - Fork 25.2k
[FEATURE][ML] Parse results and join them in the data-frame copy index #36382
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
6ff76cb
51cc252
9ebedd9
3cabd4f
5764914
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ | |
import org.elasticsearch.xpack.ml.MachineLearning; | ||
import org.elasticsearch.xpack.ml.analytics.DataFrameAnalysis; | ||
import org.elasticsearch.xpack.ml.analytics.DataFrameDataExtractor; | ||
import org.elasticsearch.xpack.ml.analytics.DataFrameDataExtractorFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
|
@@ -41,28 +42,39 @@ public AnalyticsProcessManager(Client client, Environment environment, ThreadPoo | |
this.processFactory = Objects.requireNonNull(analyticsProcessFactory); | ||
} | ||
|
||
public void processData(String jobId, DataFrameDataExtractor dataExtractor) { | ||
public void runJob(String jobId, DataFrameDataExtractorFactory dataExtractorFactory) { | ||
threadPool.generic().execute(() -> { | ||
AnalyticsProcess process = createProcess(jobId, dataExtractor); | ||
try { | ||
writeHeaderRecord(dataExtractor, process); | ||
writeDataRows(dataExtractor, process); | ||
process.writeEndOfDataMessage(); | ||
process.flushStream(); | ||
DataFrameDataExtractor dataExtractor = dataExtractorFactory.newExtractor(false); | ||
AnalyticsProcess process = createProcess(jobId, createProcessConfig(dataExtractor)); | ||
ExecutorService executorService = threadPool.executor(MachineLearning.AUTODETECT_THREAD_POOL_NAME); | ||
AnalyticsResultProcessor resultProcessor = new AnalyticsResultProcessor(client, dataExtractorFactory.newExtractor(true)); | ||
executorService.execute(() -> resultProcessor.process(process)); | ||
executorService.execute(() -> processData(jobId, dataExtractor, process, resultProcessor)); | ||
}); | ||
} | ||
|
||
private void processData(String jobId, DataFrameDataExtractor dataExtractor, AnalyticsProcess process, | ||
AnalyticsResultProcessor resultProcessor) { | ||
try { | ||
writeHeaderRecord(dataExtractor, process); | ||
writeDataRows(dataExtractor, process); | ||
process.writeEndOfDataMessage(); | ||
process.flushStream(); | ||
|
||
LOGGER.debug("[{}] Closing process", jobId); | ||
LOGGER.info("[{}] Waiting for result processor to complete", jobId); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we'll want this to be an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, this shouldn't be an info message. I left it on purpose as I think it's useful during development. I had in mind that we'd review all logging when this is refactored into persistent tasks. Not sure I'd have a todo for each one of them, but I can add them if you think it's best. |
||
resultProcessor.awaitForCompletion(); | ||
LOGGER.info("[{}] Result processor has completed", jobId); | ||
} catch (IOException e) { | ||
LOGGER.error(new ParameterizedMessage("[{}] Error writing data to the process", jobId), e); | ||
} finally { | ||
LOGGER.info("[{}] Closing process", jobId); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ditto either downgrade this now or add a |
||
try { | ||
process.close(); | ||
LOGGER.info("[{}] Closed process", jobId); | ||
} catch (IOException e) { | ||
LOGGER.error(new ParameterizedMessage("[{}] Error writing data to the process", jobId), e); | ||
} finally { | ||
try { | ||
process.close(); | ||
} catch (IOException e) { | ||
LOGGER.error("[{}] Error closing data frame analyzer process", jobId); | ||
} | ||
LOGGER.error("[{}] Error closing data frame analyzer process", jobId); | ||
} | ||
}); | ||
} | ||
} | ||
|
||
private void writeDataRows(DataFrameDataExtractor dataExtractor, AnalyticsProcess process) throws IOException { | ||
|
@@ -75,8 +87,8 @@ private void writeDataRows(DataFrameDataExtractor dataExtractor, AnalyticsProces | |
Optional<List<DataFrameDataExtractor.Row>> rows = dataExtractor.next(); | ||
if (rows.isPresent()) { | ||
for (DataFrameDataExtractor.Row row : rows.get()) { | ||
String[] rowValues = row.getValues(); | ||
if (rowValues != null) { | ||
if (row.shouldSkip() == false) { | ||
String[] rowValues = row.getValues(); | ||
System.arraycopy(rowValues, 0, record, 0, rowValues.length); | ||
process.writeRecord(record); | ||
} | ||
|
@@ -96,10 +108,10 @@ private void writeHeaderRecord(DataFrameDataExtractor dataExtractor, AnalyticsPr | |
process.writeRecord(headerRecord); | ||
} | ||
|
||
private AnalyticsProcess createProcess(String jobId, DataFrameDataExtractor dataExtractor) { | ||
private AnalyticsProcess createProcess(String jobId, AnalyticsProcessConfig analyticsProcessConfig) { | ||
// TODO We should rename the thread pool to reflect its more general use now, e.g. JOB_PROCESS_THREAD_POOL_NAME | ||
ExecutorService executorService = threadPool.executor(MachineLearning.AUTODETECT_THREAD_POOL_NAME); | ||
AnalyticsProcess process = processFactory.createAnalyticsProcess(jobId, createProcessConfig(dataExtractor), executorService); | ||
AnalyticsProcess process = processFactory.createAnalyticsProcess(jobId, analyticsProcessConfig, executorService); | ||
if (process.isProcessAlive() == false) { | ||
throw ExceptionsHelper.serverError("Failed to start analytics process"); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License; | ||
* you may not use this file except in compliance with the Elastic License. | ||
*/ | ||
package org.elasticsearch.xpack.ml.analytics.process; | ||
|
||
import org.elasticsearch.common.ParseField; | ||
import org.elasticsearch.common.xcontent.ConstructingObjectParser; | ||
import org.elasticsearch.common.xcontent.ToXContentObject; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
|
||
import java.io.IOException; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
||
public class AnalyticsResult implements ToXContentObject { | ||
|
||
public static final ParseField TYPE = new ParseField("analytics_result"); | ||
public static final ParseField ID_HASH = new ParseField("id_hash"); | ||
public static final ParseField RESULTS = new ParseField("results"); | ||
|
||
static final ConstructingObjectParser<AnalyticsResult, Void> PARSER = new ConstructingObjectParser<>(TYPE.getPreferredName(), | ||
a -> new AnalyticsResult((String) a[0], (Map<String, Object>) a[1])); | ||
|
||
static { | ||
PARSER.declareString(ConstructingObjectParser.constructorArg(), ID_HASH); | ||
PARSER.declareObject(ConstructingObjectParser.constructorArg(), (p, context) -> p.map(), RESULTS); | ||
} | ||
|
||
private final String idHash; | ||
private final Map<String, Object> results; | ||
|
||
public AnalyticsResult(String idHash, Map<String, Object> results) { | ||
this.idHash = Objects.requireNonNull(idHash); | ||
this.results = Objects.requireNonNull(results); | ||
} | ||
|
||
public String getIdHash() { | ||
return idHash; | ||
} | ||
|
||
public Map<String, Object> getResults() { | ||
return results; | ||
} | ||
|
||
@Override | ||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { | ||
builder.startObject(); | ||
builder.field(ID_HASH.getPreferredName(), idHash); | ||
builder.field(RESULTS.getPreferredName(), results); | ||
builder.endObject(); | ||
return builder; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (this == other) { | ||
return true; | ||
} | ||
if (other == null || getClass() != other.getClass()) { | ||
return false; | ||
} | ||
|
||
AnalyticsResult that = (AnalyticsResult) other; | ||
return Objects.equals(idHash, that.idHash) && Objects.equals(results, that.results); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(idHash, results); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's still
'r'
on the C++ side.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This https://github.com/elastic/ml-cpp/pull/334/files changes it to
$
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK cool. I missed that.