Skip to content

Commit 0b773f2

Browse files
committed
add reindex extension using scroll api
1 parent a5ed3f6 commit 0b773f2

File tree

3 files changed

+102
-0
lines changed

3 files changed

+102
-0
lines changed

elasticsearch-extensions/lib/elasticsearch/extensions.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
require 'elasticsearch'
22
require 'elasticsearch/extensions/version'
3+
require 'elasticsearch/extensions/reindex'
34

45
module Elasticsearch
56
module Extensions
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
module Elasticsearch
2+
module Extensions
3+
# Reindex using the scroll api. This moves data (not mappings) from one index
4+
# to another. The target index can be on a different cluster.
5+
#
6+
# This is useful when updating mappings on existing fields in an index (eg with
7+
# new analyzers).
8+
#
9+
# @example Reindex all documents under a new index name
10+
#
11+
# Elasticsearch::Extensions::Reindex.new client: client, src_index: 'foo', target_index: 'bar'
12+
#
13+
# @see https://www.elastic.co/guide/en/elasticsearch/guide/current/reindex.html
14+
#
15+
# @option arguments [Client] :client (*Required*)
16+
# @option arguments [String] :src_index (*Required*)
17+
# @option arguments [String] :target_index (*Required*)
18+
# @option arguments [Client] :target_client
19+
# @option arguments [Int] :chunk_size
20+
# @option arguments [String] :period period to ask es to keep scroll buffer open '5m'
21+
#
22+
class Reindex
23+
def initialize(opts = {})
24+
raise ArgumentError, "Required argument 'client' missing" unless opts[:client]
25+
raise ArgumentError, "Required argument 'src_index' missing" unless opts[:src_index]
26+
raise ArgumentError, "Required argument 'target_index' missing" unless opts[:target_index]
27+
28+
valid_params = [
29+
:client,
30+
:src_index,
31+
:target_index,
32+
:target_client,
33+
:chunk_size,
34+
:period
35+
]
36+
37+
default_params = {
38+
chunk_size: 500,
39+
period: '5m'
40+
}
41+
42+
opts.each { |k, v| raise ArgumentError unless valid_params.include?(k) }
43+
params = default_params.merge(opts)
44+
client = params[:client]
45+
target_client = params[:target_client] || client
46+
47+
r = client.search(index: params[:src_index],
48+
search_type: 'scan',
49+
scroll: params[:period],
50+
size: params[:chunk_size])
51+
52+
while r = client.scroll(scroll_id: r['_scroll_id'], scroll: params[:period]) do
53+
docs = r['hits']['hits']
54+
break if docs.empty?
55+
body = docs.map do |doc|
56+
doc['_index'] = params[:target_index]
57+
doc['data'] = doc['_source']
58+
doc.delete('_score')
59+
doc.delete('_source')
60+
{ index: doc }
61+
end
62+
target_client.bulk body: body
63+
end
64+
end
65+
end
66+
end
67+
end
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
require 'elasticsearch'
2+
require 'test_helper'
3+
4+
class Elasticsearch::Extensions::ReindexTest < Test::Unit::TestCase
5+
context "reindex" do
6+
should "scroll and bulk insert" do
7+
@subject = Elasticsearch::Client.new
8+
search_opts = { index: 'foo-index',
9+
search_type: 'scan',
10+
scroll: '5m',
11+
size: 500 }
12+
scroll_opts = { scroll_id: 'bar-id',
13+
scroll: '5m' }
14+
doc = { '_id' => 'quux',
15+
'_type' => 'foo-type',
16+
'_source' => { 'field1' => 'foobar' } }
17+
scroll_rsp = { 'hits' => { 'hits' => [doc] } }
18+
empty_scroll_rsp = { 'hits' => { 'hits' => [] } }
19+
bulk_body = [{ index: { '_index' => 'bar-index',
20+
'_type' => doc['_type'],
21+
'_id' => doc['_id'],
22+
'data' => doc['_source'] } }]
23+
24+
@subject.expects(:search).with(search_opts).returns({ '_scroll_id' => 'bar-id' })
25+
@subject.expects(:scroll).with(scroll_opts).returns(scroll_rsp)
26+
@subject.expects(:scroll).with({ scroll_id: nil, scroll: '5m' }).returns(empty_scroll_rsp)
27+
@subject.expects(:bulk).with(body: bulk_body).returns([])
28+
29+
Elasticsearch::Extensions::Reindex.new(client: @subject,
30+
src_index: 'foo-index',
31+
target_index: 'bar-index')
32+
end
33+
end
34+
end

0 commit comments

Comments
 (0)