Skip to content

Commit 39ddae3

Browse files
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. Tarantool datetime objects are decoded to `tarantool.Datetime` type. `tarantool.Datetime` objects may be encoded to Tarantool datetime objects. `tarantool.Datetime` is basically a `pandas.Timestamp` wrapper. You can create `tarantool.Datetime` objects - from `pandas.Timestamp` object, - by using the same API as in `pandas.Timestamp()` [2], - from another `tarantool.Datetime` object. To work with datetime data as a `pandas.Timestamp`, convert `tarantool.Datetime` object to a `pandas.Timestamp` with `to_pd_timestamp()` method call. You can use this `pandas.Timestamp` object to build a `tarantool.Datetime` object before sending data to Tarantool. To work with data as `numpy.datetime64` or `datetime.datetime`, convert to a `pandas.Timestamp` and then use `to_datetime64()` or `to_datetime()` converter. pandas.Timestamp was chosen to store data because it could be used to store both nanoseconds and timezone information. In-build Python datetime.datetime supports microseconds at most, numpy.datetime64 do not support timezones. Tarantool datetime interval type is planned to be stored in custom type tarantool.Interval and we'll need a way to support arithmetic between datetime and interval. This is the reason we use custom class instead of plain pandas.Timestamp. This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204
1 parent c70dfa6 commit 39ddae3

File tree

10 files changed

+345
-6
lines changed

10 files changed

+345
-6
lines changed

CHANGELOG.md

+21
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
### Added
1010
- Decimal type support (#203).
1111
- UUID type support (#202).
12+
- Datetime type support and tarantool.Datetime type (#204).
13+
14+
Tarantool datetime objects are decoded to `tarantool.Datetime`
15+
type. `tarantool.Datetime` may be encoded to Tarantool datetime
16+
objects.
17+
18+
`tarantool.Datetime` is basically a `pandas.Timestamp` wrapper.
19+
You can create `tarantool.Datetime` objects
20+
- from `pandas.Timestamp` object,
21+
- by using the same API as in `pandas.Timestamp()`,
22+
- from another `tarantool.Datetime` object.
23+
24+
To work with datetime data as a `pandas.Timestamp`, convert
25+
`tarantool.Datetime` object to a `pandas.Timestamp` with
26+
`to_pd_timestamp()` method call. You can use this
27+
`pandas.Timestamp` object to build a `tarantool.Datetime`
28+
object before sending data to Tarantool.
29+
30+
To work with data as `numpy.datetime64` or `datetime.datetime`,
31+
convert to a `pandas.Timestamp` and then use `to_datetime64()`
32+
or `to_datetime()` converter.
1233

1334
### Changed
1435
- Bump msgpack requirement to 1.0.4 (PR #223).

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
msgpack>=1.0.4
2+
pandas

tarantool/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
ENCODING_DEFAULT,
3333
)
3434

35+
from tarantool.msgpack_ext.types.datetime import (
36+
Datetime,
37+
)
38+
3539
__version__ = "0.9.0"
3640

3741

@@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None,
9195

9296
__all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema',
9397
'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning',
94-
'SchemaError', 'dbapi']
98+
'SchemaError', 'dbapi', 'Datetime']
9599

96100
# ConnectionPool is supported only for Python 3.7 or newer.
97101
if sys.version_info.major >= 3 and sys.version_info.minor >= 7:

tarantool/msgpack_ext/datetime.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.datetime import Datetime
2+
3+
EXT_ID = 4
4+
5+
def encode(obj):
6+
return obj.msgpack_encode()
7+
8+
def decode(data):
9+
return Datetime(data)

tarantool/msgpack_ext/packer.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
from uuid import UUID
33
from msgpack import ExtType
44

5+
from tarantool.msgpack_ext.types.datetime import Datetime
6+
57
import tarantool.msgpack_ext.decimal as ext_decimal
68
import tarantool.msgpack_ext.uuid as ext_uuid
9+
import tarantool.msgpack_ext.datetime as ext_datetime
710

811
encoders = [
9-
{'type': Decimal, 'ext': ext_decimal},
10-
{'type': UUID, 'ext': ext_uuid },
12+
{'type': Decimal, 'ext': ext_decimal },
13+
{'type': UUID, 'ext': ext_uuid },
14+
{'type': Datetime, 'ext': ext_datetime},
1115
]
1216

1317
def default(obj):
+126
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
from copy import deepcopy
2+
3+
import pandas
4+
5+
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
6+
#
7+
# The datetime MessagePack representation looks like this:
8+
# +---------+----------------+==========+-----------------+
9+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
10+
# | = d7/d8 | = 4 | | tzindex; |
11+
# +---------+----------------+==========+-----------------+
12+
# MessagePack data contains:
13+
#
14+
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
15+
# little-endian order.
16+
# * The optional fields (8 bytes), if any of them have a non-zero value.
17+
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
18+
# tzindex (2 bytes) packed in the little-endian order.
19+
#
20+
# seconds is seconds since Epoch, where the epoch is the point where the time
21+
# starts, and is platform dependent. For Unix, the epoch is January 1,
22+
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
23+
# definition in src/lib/core/datetime.h and reasons in
24+
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
25+
#
26+
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
27+
# a definition in src/lib/core/datetime.h.
28+
#
29+
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
30+
# see a structure definition in src/lib/core/datetime.h.
31+
#
32+
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
33+
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
34+
# specified, tzindex has the preference and the tzoffset value is ignored.
35+
36+
SECONDS_SIZE_BYTES = 8
37+
NSEC_SIZE_BYTES = 4
38+
TZOFFSET_SIZE_BYTES = 2
39+
TZINDEX_SIZE_BYTES = 2
40+
41+
BYTEORDER = 'little'
42+
43+
NSEC_IN_SEC = 1000000000
44+
45+
46+
def get_bytes_as_int(data, cursor, size):
47+
part = data[cursor:cursor + size]
48+
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size
49+
50+
def get_int_as_bytes(data, size):
51+
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)
52+
53+
def msgpack_decode(data):
54+
cursor = 0
55+
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
56+
57+
if len(data) > SECONDS_SIZE_BYTES:
58+
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
59+
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
60+
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
61+
elif len(data) == SECONDS_SIZE_BYTES:
62+
nsec = 0
63+
tzoffset = 0
64+
tzindex = 0
65+
else:
66+
raise MsgpackError('Unexpected datetime payload length')
67+
68+
if (tzoffset != 0) or (tzindex != 0):
69+
raise NotImplementedError
70+
71+
total_nsec = seconds * NSEC_IN_SEC + nsec
72+
73+
return pandas.to_datetime(total_nsec, unit='ns')
74+
75+
class Datetime():
76+
def __init__(self, *args, **kwargs):
77+
if len(args) > 0:
78+
data = args[0]
79+
if isinstance(data, bytes):
80+
self._timestamp = msgpack_decode(data)
81+
return
82+
83+
if isinstance(data, pandas.Timestamp):
84+
self._timestamp = = deepcopy(data)
85+
return
86+
87+
if isinstance(data, Datetime):
88+
self._timestamp = deepcopy(data._timestamp)
89+
return
90+
else:
91+
self._timestamp = pandas.Timestamp(*args, **kwargs)
92+
return
93+
94+
def __eq__(self, other):
95+
if isinstance(other, Datetime):
96+
return self._timestamp == other._timestamp
97+
elif isinstance(other, pandas.Timestamp):
98+
return self._timestamp == other
99+
else:
100+
return False
101+
102+
def __str__(self):
103+
return self._timestamp.__str__()
104+
105+
def __repr__(self):
106+
return self._timestamp.__repr__()
107+
108+
def to_pd_timestamp(self):
109+
return deepcopy(self._timestamp)
110+
111+
def msgpack_encode(self):
112+
ts_value = self._timestamp.value
113+
114+
seconds = ts_value // NSEC_IN_SEC
115+
nsec = ts_value % NSEC_IN_SEC
116+
tzoffset = 0
117+
tzindex = 0
118+
119+
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
120+
121+
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
122+
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
123+
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
124+
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)
125+
126+
return buf

tarantool/msgpack_ext/unpacker.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import tarantool.msgpack_ext.decimal as ext_decimal
22
import tarantool.msgpack_ext.uuid as ext_uuid
3+
import tarantool.msgpack_ext.datetime as ext_datetime
34

45
decoders = {
5-
ext_decimal.EXT_ID: ext_decimal.decode,
6-
ext_uuid.EXT_ID : ext_uuid.decode ,
6+
ext_decimal.EXT_ID : ext_decimal.decode ,
7+
ext_uuid.EXT_ID : ext_uuid.decode ,
8+
ext_datetime.EXT_ID: ext_datetime.decode,
79
}
810

911
def ext_hook(code, data):

test/suites/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
from .test_ssl import TestSuite_Ssl
1818
from .test_decimal import TestSuite_Decimal
1919
from .test_uuid import TestSuite_UUID
20+
from .test_datetime import TestSuite_Datetime
2021

2122
test_cases = (TestSuite_Schema_UnicodeConnection,
2223
TestSuite_Schema_BinaryConnection,
2324
TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect,
2425
TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI,
2526
TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl,
26-
TestSuite_Decimal, TestSuite_UUID)
27+
TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime)
2728

2829
def load_tests(loader, tests, pattern):
2930
suite = unittest.TestSuite()

test/suites/lib/skip.py

+11
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):
154154

155155
return skip_or_run_test_tarantool(func, '2.4.1',
156156
'does not support UUID type')
157+
158+
def skip_or_run_datetime_test(func):
159+
"""Decorator to skip or run datetime-related tests depending on
160+
the tarantool version.
161+
162+
Tarantool supports datetime type only since 2.10.0 version.
163+
See https://github.com/tarantool/tarantool/issues/5941
164+
"""
165+
166+
return skip_or_run_test_pcall_require(func, 'datetime',
167+
'does not support datetime type')

0 commit comments

Comments
 (0)