1
+ from __future__ import unicode_literals
2
+
1
3
import logging
2
4
from itertools import islice
3
5
from operator import methodcaller
@@ -38,13 +40,13 @@ def expand_action(data):
38
40
39
41
return action , data .get ('_source' , data )
40
42
41
- def _chunk_actions (actions , chunk_size ):
43
+ def _chunk_actions (actions , chunk_size , serializer ):
42
44
while True :
43
45
bulk_actions = []
44
46
for action , data in islice (actions , chunk_size ):
45
- bulk_actions .append (action )
47
+ bulk_actions .append (serializer . dumps ( action ) )
46
48
if data is not None :
47
- bulk_actions .append (data )
49
+ bulk_actions .append (serializer . dumps ( data ) )
48
50
49
51
if not bulk_actions :
50
52
return
@@ -107,17 +109,16 @@ def streaming_bulk(client, actions, chunk_size=500, raise_on_error=True,
107
109
should return a tuple containing the action line and the data line
108
110
(`None` if data line should be omitted).
109
111
"""
112
+ serializer = client .transport .serializer
110
113
actions = map (expand_action_callback , actions )
111
114
112
115
# if raise on error is set, we need to collect errors per chunk before raising them
113
116
errors = []
114
117
115
- for bulk_actions in _chunk_actions (actions , chunk_size ):
116
-
117
-
118
+ for bulk_actions in _chunk_actions (actions , chunk_size , serializer ):
118
119
try :
119
120
# send the actual request
120
- resp = client .bulk (bulk_actions , ** kwargs )
121
+ resp = client .bulk (' \n ' . join ( bulk_actions ) + ' \n ' , ** kwargs )
121
122
except TransportError as e :
122
123
# default behavior - just propagate exception
123
124
if raise_on_exception :
@@ -126,7 +127,11 @@ def streaming_bulk(client, actions, chunk_size=500, raise_on_error=True,
126
127
# if we are not propagating, mark all actions in current chunk as failed
127
128
err_message = str (e )
128
129
exc_errors = []
129
- bulk_data = iter (bulk_actions )
130
+
131
+ # deserialize the data back, thisis expensive but only run on
132
+ # errors if raise_on_exception is false, so shouldn't be a real
133
+ # issue
134
+ bulk_data = iter (map (serializer .loads , bulk_actions ))
130
135
while True :
131
136
try :
132
137
# collect all the information about failed actions
0 commit comments