Skip to content

Commit b115825

Browse files
committed
[Storage][Blob][QuickQuery]Arrow Format (Azure#13750)
* [Storage][Blob][DataLake]Quick Query Arrow Format * fix pylint * fix pylint * fix pylint * fix pylint
1 parent 3829bc3 commit b115825

File tree

10 files changed

+632
-12
lines changed

10 files changed

+632
-12
lines changed

sdk/storage/azure-storage-blob/azure/storage/blob/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@
5454
BlobQueryError,
5555
DelimitedJsonDialect,
5656
DelimitedTextDialect,
57+
ArrowDialect,
58+
ArrowType,
5759
ObjectReplicationPolicy,
5860
ObjectReplicationRule
5961
)
@@ -219,6 +221,8 @@ def download_blob_from_url(
219221
'BlobQueryError',
220222
'DelimitedJsonDialect',
221223
'DelimitedTextDialect',
224+
'ArrowDialect',
225+
'ArrowType',
222226
'BlobQueryReader',
223227
'ObjectReplicationPolicy',
224228
'ObjectReplicationRule'

sdk/storage/azure-storage-blob/azure/storage/blob/_blob_client.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -682,13 +682,19 @@ def _quick_query_options(self, query_expression,
682682
try:
683683
delimiter = input_format.lineterminator
684684
except AttributeError:
685-
delimiter = input_format.delimiter
685+
try:
686+
delimiter = input_format.delimiter
687+
except AttributeError:
688+
raise ValueError("The Type of blob_format can only be DelimitedTextDialect or DelimitedJsonDialect")
686689
output_format = kwargs.pop('output_format', None)
687690
if output_format:
688691
try:
689692
delimiter = output_format.lineterminator
690693
except AttributeError:
691-
delimiter = output_format.delimiter
694+
try:
695+
delimiter = output_format.delimiter
696+
except AttributeError:
697+
pass
692698
else:
693699
output_format = input_format
694700
query_request = QueryRequest(

sdk/storage/azure-storage-blob/azure/storage/blob/_models.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from enum import Enum
1010

1111
from azure.core.paging import PageIterator
12-
from azure.storage.blob._generated.models import FilterBlobItem
12+
from azure.storage.blob._generated.models import FilterBlobItem, ArrowField
1313

1414
from ._shared import decode_base64_to_text
1515
from ._shared.response_handlers import return_context_and_deserialized, process_storage_error
@@ -1099,6 +1099,30 @@ def __init__(self, **kwargs):
10991099
self.has_header = kwargs.pop('has_header', False)
11001100

11011101

1102+
class ArrowDialect(ArrowField):
1103+
"""field of an arrow schema.
1104+
1105+
All required parameters must be populated in order to send to Azure.
1106+
1107+
:param ~azure.storage.blob.ArrowType type: Arrow field type.
1108+
:keyword str name: The name of the field.
1109+
:keyword int precision: The precision of the field.
1110+
:keyword int scale: The scale of the field.
1111+
"""
1112+
def __init__(self, type, **kwargs): # pylint: disable=redefined-builtin
1113+
super(ArrowDialect, self).__init__(type=type, **kwargs)
1114+
1115+
1116+
class ArrowType(str, Enum):
1117+
1118+
INT64 = "int64"
1119+
BOOL = "bool"
1120+
TIMESTAMP_MS = "timestamp[ms]"
1121+
STRING = "string"
1122+
DOUBLE = "double"
1123+
DECIMAL = 'decimal'
1124+
1125+
11021126
class ObjectReplicationPolicy(DictMixin):
11031127
"""Policy id and rule ids applied to a blob.
11041128

sdk/storage/azure-storage-blob/azure/storage/blob/_serialize.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@
1313

1414
from ._models import (
1515
ContainerEncryptionScope,
16-
DelimitedJsonDialect
17-
)
16+
DelimitedJsonDialect)
1817
from ._generated.models import (
1918
ModifiedAccessConditions,
2019
SourceModifiedAccessConditions,
@@ -24,6 +23,7 @@
2423
QuerySerialization,
2524
DelimitedTextConfiguration,
2625
JsonTextConfiguration,
26+
ArrowConfiguration,
2727
QueryFormatType,
2828
BlobTag,
2929
BlobTags, LeaseAccessConditions
@@ -182,6 +182,13 @@ def serialize_query_format(formater):
182182
type=QueryFormatType.delimited,
183183
delimited_text_configuration=serialization_settings
184184
)
185+
elif isinstance(formater, list):
186+
serialization_settings = ArrowConfiguration(
187+
schema=formater
188+
)
189+
qq_format = QueryFormat(
190+
type=QueryFormatType.arrow,
191+
arrow_configuration=serialization_settings)
185192
elif not formater:
186193
return None
187194
else:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Accept:
6+
- '*/*'
7+
Accept-Encoding:
8+
- gzip, deflate
9+
Connection:
10+
- keep-alive
11+
Content-Length:
12+
- '0'
13+
User-Agent:
14+
- azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
15+
x-ms-date:
16+
- Fri, 11 Sep 2020 20:58:27 GMT
17+
x-ms-version:
18+
- '2020-02-10'
19+
method: PUT
20+
uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container
21+
response:
22+
body:
23+
string: ''
24+
headers:
25+
date:
26+
- Fri, 11 Sep 2020 20:58:28 GMT
27+
etag:
28+
- '"0x8D856956EBF3C36"'
29+
last-modified:
30+
- Fri, 11 Sep 2020 20:58:28 GMT
31+
transfer-encoding:
32+
- chunked
33+
x-ms-version:
34+
- '2020-02-10'
35+
status:
36+
code: 201
37+
message: Created
38+
- request:
39+
body: '100,200,300,400
40+
41+
300,400,500,600
42+
43+
'
44+
headers:
45+
Accept:
46+
- '*/*'
47+
Accept-Encoding:
48+
- gzip, deflate
49+
Connection:
50+
- keep-alive
51+
Content-Length:
52+
- '32'
53+
Content-Type:
54+
- application/octet-stream
55+
User-Agent:
56+
- azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
57+
x-ms-blob-type:
58+
- BlockBlob
59+
x-ms-date:
60+
- Fri, 11 Sep 2020 20:58:28 GMT
61+
x-ms-version:
62+
- '2020-02-10'
63+
method: PUT
64+
uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789
65+
response:
66+
body:
67+
string: ''
68+
headers:
69+
content-md5:
70+
- /hmKXD7m7tyfn12eEsFvyQ==
71+
date:
72+
- Fri, 11 Sep 2020 20:58:28 GMT
73+
etag:
74+
- '"0x8D856956ED0E86F"'
75+
last-modified:
76+
- Fri, 11 Sep 2020 20:58:28 GMT
77+
transfer-encoding:
78+
- chunked
79+
x-ms-content-crc64:
80+
- Dn1U+tgM/4c=
81+
x-ms-request-server-encrypted:
82+
- 'false'
83+
x-ms-version:
84+
- '2020-02-10'
85+
status:
86+
code: 201
87+
message: Created
88+
- request:
89+
body: '<?xml version=''1.0'' encoding=''utf-8''?>
90+
91+
<QueryRequest><QueryType>SQL</QueryType><Expression>SELECT _2 from BlobStorage
92+
WHERE _1 &gt; 250</Expression><OutputSerialization><Format><Type>arrow</Type><ArrowConfiguration><Schema><Field><Type>decimal</Type><Name>abc</Name><Precision>4</Precision><Scale>2</Scale></Field></Schema></ArrowConfiguration></Format></OutputSerialization></QueryRequest>'
93+
headers:
94+
Accept:
95+
- application/xml
96+
Accept-Encoding:
97+
- gzip, deflate
98+
Connection:
99+
- keep-alive
100+
Content-Length:
101+
- '390'
102+
Content-Type:
103+
- application/xml; charset=utf-8
104+
User-Agent:
105+
- azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
106+
x-ms-date:
107+
- Fri, 11 Sep 2020 20:58:28 GMT
108+
x-ms-version:
109+
- '2020-02-10'
110+
method: POST
111+
uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789/csvfile9d4d1789?comp=query
112+
response:
113+
body:
114+
string: !!binary |
115+
T2JqAQIWYXZyby5zY2hlbWG+HlsKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUi
116+
OiAiY29tLm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLnJlc3VsdERh
117+
dGEiLAogICAgImRvYyI6ICJIb2xkcyByZXN1bHQgZGF0YSBpbiB0aGUgZm9ybWF0IHNwZWNpZmll
118+
ZCBmb3IgdGhpcyBxdWVyeSAoQ1NWLCBKU09OLCBldGMuKS4iLAogICAgImZpZWxkcyI6IFsKICAg
119+
ICAgewogICAgICAgICJuYW1lIjogImRhdGEiLAogICAgICAgICJ0eXBlIjogImJ5dGVzIgogICAg
120+
ICB9CiAgICBdCiAgfSwKICB7CiAgICAidHlwZSI6ICJyZWNvcmQiLAogICAgIm5hbWUiOiAiY29t
121+
Lm1pY3Jvc29mdC5henVyZS5zdG9yYWdlLnF1ZXJ5QmxvYkNvbnRlbnRzLmVycm9yIiwKICAgICJk
122+
b2MiOiAiQW4gZXJyb3IgdGhhdCBvY2N1cnJlZCB3aGlsZSBwcm9jZXNzaW5nIHRoZSBxdWVyeS4i
123+
LAogICAgImZpZWxkcyI6IFsKICAgICAgewogICAgICAgICJuYW1lIjogImZhdGFsIiwKICAgICAg
124+
ICAidHlwZSI6ICJib29sZWFuIiwKICAgICAgICAiZG9jIjogIklmIHRydWUsIHRoaXMgZXJyb3Ig
125+
cHJldmVudHMgZnVydGhlciBxdWVyeSBwcm9jZXNzaW5nLiAgTW9yZSByZXN1bHQgZGF0YSBtYXkg
126+
YmUgcmV0dXJuZWQsIGJ1dCB0aGVyZSBpcyBubyBndWFyYW50ZWUgdGhhdCBhbGwgb2YgdGhlIG9y
127+
aWdpbmFsIGRhdGEgd2lsbCBiZSBwcm9jZXNzZWQuICBJZiBmYWxzZSwgdGhpcyBlcnJvciBkb2Vz
128+
IG5vdCBwcmV2ZW50IGZ1cnRoZXIgcXVlcnkgcHJvY2Vzc2luZy4iCiAgICAgIH0sCiAgICAgIHsK
129+
ICAgICAgICAibmFtZSI6ICJuYW1lIiwKICAgICAgICAidHlwZSI6ICJzdHJpbmciLAogICAgICAg
130+
ICJkb2MiOiAiVGhlIG5hbWUgb2YgdGhlIGVycm9yIgogICAgICB9LAogICAgICB7CiAgICAgICAg
131+
Im5hbWUiOiAiZGVzY3JpcHRpb24iLAogICAgICAgICJ0eXBlIjogInN0cmluZyIsCiAgICAgICAg
132+
ImRvYyI6ICJBIGRlc2NyaXB0aW9uIG9mIHRoZSBlcnJvciIKICAgICAgfSwKICAgICAgewogICAg
133+
ICAgICJuYW1lIjogInBvc2l0aW9uIiwKICAgICAgICAidHlwZSI6ICJsb25nIiwKICAgICAgICAi
134+
ZG9jIjogIlRoZSBibG9iIG9mZnNldCBhdCB3aGljaCB0aGUgZXJyb3Igb2NjdXJyZWQiCiAgICAg
135+
IH0KICAgIF0KICB9LAogIHsKICAgICJ0eXBlIjogInJlY29yZCIsCiAgICAibmFtZSI6ICJjb20u
136+
bWljcm9zb2Z0LmF6dXJlLnN0b3JhZ2UucXVlcnlCbG9iQ29udGVudHMucHJvZ3Jlc3MiLAogICAg
137+
ImRvYyI6ICJJbmZvcm1hdGlvbiBhYm91dCB0aGUgcHJvZ3Jlc3Mgb2YgdGhlIHF1ZXJ5IiwKICAg
138+
ICJmaWVsZHMiOiBbCiAgICAgIHsKICAgICAgICAibmFtZSI6ICJieXRlc1NjYW5uZWQiLAogICAg
139+
ICAgICJ0eXBlIjogImxvbmciLAogICAgICAgICJkb2MiOiAiVGhlIG51bWJlciBvZiBieXRlcyB0
140+
aGF0IGhhdmUgYmVlbiBzY2FubmVkIgogICAgICB9LAogICAgICB7CiAgICAgICAgIm5hbWUiOiAi
141+
dG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAgICAgICAgImRvYyI6ICJUaGUg
142+
dG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4gdGhpcyBxdWVyeSIKICAgICAg
143+
fQogICAgXQogIH0sCiAgewogICAgInR5cGUiOiAicmVjb3JkIiwKICAgICJuYW1lIjogImNvbS5t
144+
aWNyb3NvZnQuYXp1cmUuc3RvcmFnZS5xdWVyeUJsb2JDb250ZW50cy5lbmQiLAogICAgImRvYyI6
145+
ICJTZW50IGFzIHRoZSBmaW5hbCBtZXNzYWdlIG9mIHRoZSByZXNwb25zZSwgaW5kaWNhdGluZyB0
146+
aGF0IGFsbCByZXN1bHRzIGhhdmUgYmVlbiBzZW50LiIsCiAgICAiZmllbGRzIjogWwogICAgICB7
147+
CiAgICAgICAgIm5hbWUiOiAidG90YWxCeXRlcyIsCiAgICAgICAgInR5cGUiOiAibG9uZyIsCiAg
148+
ICAgICAgImRvYyI6ICJUaGUgdG90YWwgbnVtYmVyIG9mIGJ5dGVzIHRvIGJlIHNjYW5uZWQgaW4g
149+
dGhpcyBxdWVyeSIKICAgICAgfQogICAgXQogIH0KXQoAQmgjmNsu90Ck/YQ3d6WMowL2AwDwA///
150+
//94AAAAEAAAAAAACgAMAAYABQAIAAoAAAAAAQMADAAAAAgACAAAAAQACAAAAAQAAAABAAAAFAAA
151+
ABAAFAAIAAYABwAMAAAAEAAQAAAAAAABByQAAAAUAAAABAAAAAAAAAAIAAwABAAIAAgAAAAEAAAA
152+
AgAAAAMAAABhYmMA/////3AAAAAQAAAAAAAKAA4ABgAFAAgACgAAAAADAwAQAAAAAAAKAAwAAAAE
153+
AAgACgAAADAAAAAEAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEA
154+
AAAAAAAAAAAAAAAAAAAAAAAAQmgjmNsu90Ck/YQ3d6WMowLGAgDAAv////+IAAAAFAAAAAAAAAAM
155+
ABYABgAFAAgADAAMAAAAAAMDABgAAAAQAAAAAAAAAAAACgAYAAwABAAIAAoAAAA8AAAAEAAAAAEA
156+
AAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAQAAAAAAAAAAAAAAABAAAAAQAA
157+
AAAAAAAAAAAAAAAAAJABAAAAAAAAAAAAAAAAAABCaCOY2y73QKT9hDd3pYyjAgYEQEBCaCOY2y73
158+
QKT9hDd3pYyjAgQGQEJoI5jbLvdApP2EN3eljKM=
159+
headers:
160+
accept-ranges:
161+
- bytes
162+
content-type:
163+
- avro/binary
164+
date:
165+
- Fri, 11 Sep 2020 20:58:28 GMT
166+
etag:
167+
- '"0x8D856956ED0E86F"'
168+
last-modified:
169+
- Fri, 11 Sep 2020 20:58:28 GMT
170+
transfer-encoding:
171+
- chunked
172+
x-ms-blob-type:
173+
- BlockBlob
174+
x-ms-creation-time:
175+
- Fri, 11 Sep 2020 20:58:28 GMT
176+
x-ms-lease-state:
177+
- available
178+
x-ms-lease-status:
179+
- unlocked
180+
x-ms-version:
181+
- '2020-02-10'
182+
status:
183+
code: 200
184+
message: OK
185+
- request:
186+
body: null
187+
headers:
188+
Accept:
189+
- '*/*'
190+
Accept-Encoding:
191+
- gzip, deflate
192+
Connection:
193+
- keep-alive
194+
Content-Length:
195+
- '0'
196+
User-Agent:
197+
- azsdk-python-storage-blob/12.4.0 Python/3.7.3 (Windows-10-10.0.19041-SP0)
198+
x-ms-date:
199+
- Fri, 11 Sep 2020 20:58:28 GMT
200+
x-ms-version:
201+
- '2020-02-10'
202+
method: DELETE
203+
uri: https://storagename.blob.core.windows.net/utqqcontainer9d4d1789?restype=container
204+
response:
205+
body:
206+
string: ''
207+
headers:
208+
date:
209+
- Fri, 11 Sep 2020 20:58:28 GMT
210+
transfer-encoding:
211+
- chunked
212+
x-ms-version:
213+
- '2020-02-10'
214+
status:
215+
code: 202
216+
message: Accepted
217+
version: 1

0 commit comments

Comments
 (0)