elastic
diff --git a/‎x-pack/plugin/sql/src/benchmarks/java/org/elasticsearch/xpack/sql/action/OperatorBenchmark.java
Lines changed: 92 additions & 5 deletions b/‎x-pack/plugin/sql/src/benchmarks/java/org/elasticsearch/xpack/sql/action/OperatorBenchmark.java
Lines changed: 92 additions & 5 deletions
diff --git a/‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/action/compute/lucene/LuceneSourceOperator.java
Lines changed: 137 additions & 27 deletions b/‎x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/action/compute/lucene/LuceneSourceOperator.java
Lines changed: 137 additions & 27 deletions
@@ -29,6 +29,7 @@
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.util.BigArrays;
 import org.elasticsearch.common.util.LongHash;
+import org.elasticsearch.common.util.concurrent.EsExecutors;
 import org.elasticsearch.node.Node;
 import org.elasticsearch.threadpool.ThreadPool;
 import org.elasticsearch.xpack.sql.action.compute.data.Block;
@@ -88,6 +89,9 @@ public class OperatorBenchmark {
     @Param({ "100000000" }) // 100 million
     int numDocs;
 
+    @Param({ "1", "10" })
+    int maxNumSegments;
+
     ThreadPool threadPool;
 
     @Setup
@@ -105,7 +109,7 @@ public void setup() throws IOException {
                 indexWriter.addDocument(doc);
             }
             indexWriter.commit();
-            indexWriter.forceMerge(1);
+            indexWriter.forceMerge(maxNumSegments);
             indexWriter.flush();
         }
         indexReader = DirectoryReader.open(dir);
@@ -340,7 +344,7 @@ public long testOperatorsWithLucene() {
     }
 
     @Benchmark
-    public long testSingleThreadedAvg() {
+    public long testLongAvgSingleThreadedAvg() {
         return runWithDriver(
             ByteSizeValue.ofKb(16).bytesAsInt(),
             new NumericDocValuesExtractor(indexReader, 0, 1, "value"),
@@ -350,9 +354,9 @@ public long testSingleThreadedAvg() {
     }
 
     @Benchmark
-    public long testMultiThreadedAvg() {
+    public long testLongAvgMultiThreadedAvgWithSingleThreadedSearch() {
         AtomicInteger rowCount = new AtomicInteger();
-        int parallelCount = 8;
+        int parallelCount = ThreadPool.searchThreadPoolSize(EsExecutors.allocatedProcessors(Settings.EMPTY));
         List<Driver> drivers = new ArrayList<>(parallelCount);
         List<ExchangeSource> forkExchangeSources = new ArrayList<>(parallelCount);
         List<ExchangeSource> joinExchangeSources = new ArrayList<>(parallelCount);
@@ -404,7 +408,90 @@ public long testMultiThreadedAvg() {
         );
         drivers.add(reduceDriver);
 
-        Driver.runToCompletion(threadPool.executor(ThreadPool.Names.SEARCH), drivers).actionGet();
+        Driver.runToCompletion(threadPool.executor(ThreadPool.Names.SEARCH), drivers);
+        return rowCount.get();
+    }
+
+    @Benchmark
+    public long testLongAvgMultiThreadedAvgWithMultiThreadedSearch() {
+        AtomicInteger rowCount = new AtomicInteger();
+        int parallelCount = ThreadPool.searchThreadPoolSize(EsExecutors.allocatedProcessors(Settings.EMPTY));
+        List<Driver> drivers = new ArrayList<>(parallelCount);
+        List<ExchangeSource> joinExchangeSources = new ArrayList<>(parallelCount);
+
+        for (LuceneSourceOperator luceneSourceOperator : new LuceneSourceOperator(
+            indexReader,
+            new MatchAllDocsQuery(),
+            ByteSizeValue.ofKb(16).bytesAsInt()
+        ).slice(parallelCount)) {
+            ExchangeSource joinExchangeSource = new ExchangeSource();
+            joinExchangeSources.add(joinExchangeSource);
+            Driver driver = new Driver(
+                List.of(
+                    luceneSourceOperator,
+                    new NumericDocValuesExtractor(indexReader, 0, 1, "value"),
+                    new LongAvgOperator(2), // PARTIAL
+                    new ExchangeSinkOperator(
+                        new ExchangeSink(new PassthroughExchanger(joinExchangeSource, Integer.MAX_VALUE), s -> joinExchangeSource.finish())
+                    )
+                ),
+                () -> {}
+            );
+            drivers.add(driver);
+        }
+
+        Driver reduceDriver = new Driver(
+            List.of(
+                new RandomUnionSourceOperator(joinExchangeSources),
+                new LongAvgOperator(0, 1), // FINAL
+                new PageConsumerOperator(page -> rowCount.addAndGet(page.getPositionCount()))
+            ),
+            () -> {}
+        );
+        drivers.add(reduceDriver);
+
+        Driver.runToCompletion(threadPool.executor(ThreadPool.Names.SEARCH), drivers);
+        return rowCount.get();
+    }
+
+    @Benchmark
+    public long testLongAvgMultiThreadedAvgWithMultiThreadedSegmentSearch() {
+        AtomicInteger rowCount = new AtomicInteger();
+        List<Driver> drivers = new ArrayList<>();
+        List<ExchangeSource> joinExchangeSources = new ArrayList<>();
+
+        for (LuceneSourceOperator luceneSourceOperator : new LuceneSourceOperator(
+            indexReader,
+            new MatchAllDocsQuery(),
+            ByteSizeValue.ofKb(16).bytesAsInt()
+        ).segmentSlice()) {
+            ExchangeSource joinExchangeSource = new ExchangeSource();
+            joinExchangeSources.add(joinExchangeSource);
+            Driver driver = new Driver(
+                List.of(
+                    luceneSourceOperator,
+                    new NumericDocValuesExtractor(indexReader, 0, 1, "value"),
+                    new LongAvgOperator(2), // PARTIAL
+                    new ExchangeSinkOperator(
+                        new ExchangeSink(new PassthroughExchanger(joinExchangeSource, Integer.MAX_VALUE), s -> joinExchangeSource.finish())
+                    )
+                ),
+                () -> {}
+            );
+            drivers.add(driver);
+        }
+
+        Driver reduceDriver = new Driver(
+            List.of(
+                new RandomUnionSourceOperator(joinExchangeSources),
+                new LongAvgOperator(0, 1), // FINAL
+                new PageConsumerOperator(page -> rowCount.addAndGet(page.getPositionCount()))
+            ),
+            () -> {}
+        );
+        drivers.add(reduceDriver);
+
+        Driver.runToCompletion(threadPool.executor(ThreadPool.Names.SEARCH), drivers);
         return rowCount.get();
     }
 }
@@ -11,20 +11,24 @@
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreQuery;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.LeafCollector;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorable;
 import org.apache.lucene.search.ScoreMode;
 import org.apache.lucene.search.Weight;
+import org.elasticsearch.core.Nullable;
 import org.elasticsearch.xpack.sql.action.compute.data.ConstantIntBlock;
 import org.elasticsearch.xpack.sql.action.compute.data.IntBlock;
 import org.elasticsearch.xpack.sql.action.compute.data.Page;
 import org.elasticsearch.xpack.sql.action.compute.operator.Operator;
 
 import java.io.IOException;
 import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * Source operator that incrementally runs Lucene searches
@@ -33,19 +37,22 @@ public class LuceneSourceOperator implements Operator {
 
     private static final int PAGE_SIZE = 4096;
 
-    private final IndexReader reader;
+    @Nullable
+    private final IndexReader indexReader;
+    @Nullable
     private final Query query;
+    private final List<PartialLeafReaderContext> leaves;
     private final int maxPageSize;
     private final int minPageSize;
 
     private Weight weight;
 
     private int currentLeaf = 0;
-    private LeafReaderContext currentLeafReaderContext = null;
+    private PartialLeafReaderContext currentLeafReaderContext = null;
     private BulkScorer currentScorer = null;
 
     private int currentPagePos;
-    private int[] currentPage;
+    private final int[] currentPage;
 
     private int currentScorerPos;
 
@@ -54,10 +61,22 @@ public LuceneSourceOperator(IndexReader reader, Query query) {
     }
 
     public LuceneSourceOperator(IndexReader reader, Query query, int maxPageSize) {
-        this.reader = reader;
+        this.indexReader = reader;
+        this.leaves = reader.leaves().stream().map(PartialLeafReaderContext::new).collect(Collectors.toList());
         this.query = query;
         this.maxPageSize = maxPageSize;
         this.minPageSize = maxPageSize / 2;
+        currentPage = new int[maxPageSize];
+    }
+
+    private LuceneSourceOperator(Weight weight, List<PartialLeafReaderContext> leaves, int maxPageSize) {
+        this.indexReader = null;
+        this.leaves = leaves;
+        this.query = null;
+        this.weight = weight;
+        this.maxPageSize = maxPageSize;
+        this.minPageSize = maxPageSize / 2;
+        currentPage = new int[maxPageSize];
     }
 
     @Override
@@ -77,36 +96,99 @@ public void finish() {
 
     @Override
     public boolean isFinished() {
-        return currentLeaf >= reader.leaves().size();
+        return currentLeaf >= leaves.size();
     }
 
+    /**
+     * Split this source operator into a given number of slices
+     */
+    public List<LuceneSourceOperator> slice(int numSlices) {
+        if (weight != null) {
+            throw new IllegalStateException("can only call slice method once");
+        }
+        initializeWeightIfNecessary();
+        final int totalDocCount = indexReader.maxDoc();
+        final int maxDocsPerSlice = (totalDocCount / numSlices) + 1;
+
+        final List<List<PartialLeafReaderContext>> slices = new ArrayList<>();
+        int docsAllocatedInCurrentSlice = 0;
+        List<PartialLeafReaderContext> currentSlice = null;
+        for (LeafReaderContext ctx : indexReader.leaves()) {
+            int minDoc = 0;
+            int numDocsInLeaf = ctx.reader().maxDoc();
+            while (minDoc < numDocsInLeaf) {
+                int numDocsToUse = Math.min(maxDocsPerSlice - docsAllocatedInCurrentSlice, numDocsInLeaf);
+                if (numDocsToUse <= 0) {
+                    break;
+                }
+                if (currentSlice == null) {
+                    currentSlice = new ArrayList<>();
+                }
+                currentSlice.add(new PartialLeafReaderContext(ctx, minDoc, minDoc + numDocsToUse));
+                minDoc += numDocsToUse;
+                docsAllocatedInCurrentSlice += numDocsToUse;
+                if (docsAllocatedInCurrentSlice >= maxDocsPerSlice) {
+                    slices.add(currentSlice);
+                    currentSlice = null;
+                    docsAllocatedInCurrentSlice = 0;
+                }
+            }
+        }
+        if (currentSlice != null) {
+            slices.add(currentSlice);
+        }
+
+        List<LuceneSourceOperator> operators = new ArrayList<>();
+        for (List<PartialLeafReaderContext> slice : slices) {
+            operators.add(new LuceneSourceOperator(weight, slice, maxPageSize));
+        }
+        return operators;
+    }
+
+    /**
+     * Uses Lucene's own slicing method, which creates per-segment level slices
+     */
+    public List<LuceneSourceOperator> segmentSlice() {
+        if (weight != null) {
+            throw new IllegalStateException("can only call slice method once");
+        }
+        initializeWeightIfNecessary();
+        List<LuceneSourceOperator> operators = new ArrayList<>();
+        for (IndexSearcher.LeafSlice leafSlice : IndexSearcher.slices(indexReader.leaves(), MAX_DOCS_PER_SLICE, MAX_SEGMENTS_PER_SLICE)) {
+            operators.add(
+                new LuceneSourceOperator(
+                    weight,
+                    Arrays.asList(leafSlice.leaves).stream().map(PartialLeafReaderContext::new).collect(Collectors.toList()),
+                    maxPageSize
+                )
+            );
+        }
+        return operators;
+    }
+
+    private static final int MAX_DOCS_PER_SLICE = 250_000; // copied from IndexSearcher
+    private static final int MAX_SEGMENTS_PER_SLICE = 5; // copied from IndexSearcher
+
     @Override
     public Page getOutput() {
         if (isFinished()) {
             return null;
         }
 
         // initialize weight if not done yet
-        if (weight == null) {
-            IndexSearcher indexSearcher = new IndexSearcher(reader);
-            try {
-                weight = indexSearcher.createWeight(indexSearcher.rewrite(new ConstantScoreQuery(query)), ScoreMode.COMPLETE_NO_SCORES, 1);
-            } catch (IOException e) {
-                throw new UncheckedIOException(e);
-            }
-        }
+        initializeWeightIfNecessary();
 
         Page page = null;
 
         // initializes currentLeafReaderContext, currentScorer, and currentScorerPos when we switch to a new leaf reader
         if (currentLeafReaderContext == null) {
-            currentLeafReaderContext = reader.leaves().get(currentLeaf);
+            currentLeafReaderContext = leaves.get(currentLeaf);
             try {
-                currentScorer = weight.bulkScorer(currentLeafReaderContext);
+                currentScorer = weight.bulkScorer(currentLeafReaderContext.leafReaderContext);
             } catch (IOException e) {
                 throw new UncheckedIOException(e);
             }
-            currentScorerPos = 0;
+            currentScorerPos = currentLeafReaderContext.minDoc;
         }
 
         try {
@@ -118,26 +200,25 @@ public void setScorer(Scorable scorer) {
 
                 @Override
                 public void collect(int doc) {
-                    if (currentPage == null) {
-                        currentPage = new int[maxPageSize];
-                        currentPagePos = 0;
-                    }
                     currentPage[currentPagePos] = doc;
                     currentPagePos++;
                 }
-            }, currentLeafReaderContext.reader().getLiveDocs(), currentScorerPos, currentScorerPos + maxPageSize - currentPagePos);
+            },
+                currentLeafReaderContext.leafReaderContext.reader().getLiveDocs(),
+                currentScorerPos,
+                Math.min(currentLeafReaderContext.maxDoc, currentScorerPos + maxPageSize - currentPagePos)
+            );
 
-            if (currentPagePos >= minPageSize || currentScorerPos == DocIdSetIterator.NO_MORE_DOCS) {
+            if (currentPagePos >= minPageSize || currentScorerPos >= currentLeafReaderContext.maxDoc) {
                 page = new Page(
                     currentPagePos,
-                    new IntBlock(currentPage, currentPagePos),
-                    new ConstantIntBlock(currentPagePos, currentLeafReaderContext.ord)
+                    new IntBlock(Arrays.copyOf(currentPage, currentPagePos), currentPagePos),
+                    new ConstantIntBlock(currentPagePos, currentLeafReaderContext.leafReaderContext.ord)
                 );
-                currentPage = null;
                 currentPagePos = 0;
             }
 
-            if (currentScorerPos == DocIdSetIterator.NO_MORE_DOCS) {
+            if (currentScorerPos >= currentLeafReaderContext.maxDoc) {
                 currentLeaf++;
                 currentLeafReaderContext = null;
                 currentScorer = null;
@@ -150,6 +231,35 @@ public void collect(int doc) {
         return page;
     }
 
+    private void initializeWeightIfNecessary() {
+        if (weight == null) {
+            try {
+                IndexSearcher indexSearcher = new IndexSearcher(indexReader);
+                weight = indexSearcher.createWeight(indexSearcher.rewrite(new ConstantScoreQuery(query)), ScoreMode.COMPLETE_NO_SCORES, 1);
+            } catch (IOException e) {
+                throw new UncheckedIOException(e);
+            }
+        }
+    }
+
+    static class PartialLeafReaderContext {
+
+        final LeafReaderContext leafReaderContext;
+        final int minDoc; // incl
+        final int maxDoc; // excl
+
+        PartialLeafReaderContext(LeafReaderContext leafReaderContext, int minDoc, int maxDoc) {
+            this.leafReaderContext = leafReaderContext;
+            this.minDoc = minDoc;
+            this.maxDoc = maxDoc;
+        }
+
+        PartialLeafReaderContext(LeafReaderContext leafReaderContext) {
+            this(leafReaderContext, 0, leafReaderContext.reader().maxDoc());
+        }
+
+    }
+
     @Override
     public void close() {