Skip to content

Commit 77761ca

Browse files
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. The Tarantool datetime type is mapped to the pandas.Timestamp type. pandas.Timestamp was chosen because it could be used to store both nanoseconds and timezone information. In-build Python datetime.datetime supports microseconds at most, numpy.datetime64 do not support timezones. If you want to use numpy.datetime64 or datetime.datetime in your logic, you can use pandas converters in your code: - pandas.to_datetime(): numpy.datetime64 -> pandas.Timestamp - pandas.to_datetime(): datetime.datetime -> pandas.Timestamp - pandas.Timestamp.to_datetime64(): pandas.Timestamp -> numpy.datetime64 - pandas.Timestamp.to_pydatetime(): pandas.Timestamp -> datetime.datetime This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 Part of #204
1 parent 3ac6206 commit 77761ca

File tree

6 files changed

+209
-5
lines changed

6 files changed

+209
-5
lines changed

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
msgpack>=1.0.4
2+
pandas>=1.0.0
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import time
2+
import math
3+
import pandas
4+
5+
# https://www.tarantool.io/ru/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
6+
#
7+
# The datetime MessagePack representation looks like this:
8+
# +---------+----------------+==========+-----------------+
9+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
10+
# | = d7/d8 | = 4 | | tzindex; |
11+
# +---------+----------------+==========+-----------------+
12+
# MessagePack data contains:
13+
#
14+
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
15+
# little-endian order.
16+
# * The optional fields (8 bytes), if any of them have a non-zero value.
17+
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
18+
# tzindex (2 bytes) packed in the little-endian order.
19+
#
20+
# seconds is seconds since Epoch, where the epoch is the point where the time
21+
# starts, and is platform dependent. For Unix, the epoch is January 1,
22+
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
23+
# definition in src/lib/core/datetime.h and reasons in
24+
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
25+
#
26+
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
27+
# a definition in src/lib/core/datetime.h.
28+
#
29+
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
30+
# see a structure definition in src/lib/core/datetime.h.
31+
#
32+
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
33+
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
34+
# specified, tzindex has the preference and the tzoffset value is ignored.
35+
36+
EXT_ID = 4
37+
38+
SECONDS_SIZE_BYTES = 8
39+
NSEC_SIZE_BYTES = 4
40+
TZOFFSET_SIZE_BYTES = 2
41+
TZINDEX_SIZE_BYTES = 2
42+
43+
BYTEORDER = 'little'
44+
45+
NSEC_IN_SEC = 1000000000
46+
assert isinstance(NSEC_IN_SEC, int)
47+
48+
def get_int_as_bytes(data, size):
49+
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)
50+
51+
def encode(obj):
52+
seconds = obj.value // NSEC_IN_SEC
53+
nsec = obj.value % NSEC_IN_SEC
54+
tzoffset = 0
55+
tzindex = 0
56+
57+
bytes_buffer = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
58+
59+
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
60+
bytes_buffer = bytes_buffer + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
61+
bytes_buffer = bytes_buffer + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
62+
bytes_buffer = bytes_buffer + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)
63+
64+
return bytes_buffer
65+
66+
def get_bytes_as_int(data, cursor, size):
67+
part = data[cursor:cursor + size]
68+
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size
69+
70+
def decode(data):
71+
cursor = 0
72+
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
73+
74+
if len(data) > SECONDS_SIZE_BYTES:
75+
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
76+
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
77+
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
78+
else:
79+
nsec = 0
80+
tzoffset = 0
81+
tzindex = 0
82+
83+
if (tzoffset != 0) or (tzindex != 0):
84+
raise NotImplementedError
85+
86+
total_nsec = seconds * NSEC_IN_SEC + nsec
87+
88+
return pandas.to_datetime(total_nsec, unit='ns')

tarantool/msgpack_ext_types/packer.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1+
import pandas
12
from decimal import Decimal
23
from uuid import UUID
34
from msgpack import ExtType
45

56
import tarantool.msgpack_ext_types.decimal as ext_decimal
67
import tarantool.msgpack_ext_types.uuid as ext_uuid
8+
import tarantool.msgpack_ext_types.datetime as ext_datetime
79

810
encoders = [
9-
{'type': Decimal, 'ext': ext_decimal},
10-
{'type': UUID, 'ext': ext_uuid },
11+
{'type': Decimal, 'ext': ext_decimal },
12+
{'type': UUID, 'ext': ext_uuid },
13+
{'type': pandas.Timestamp, 'ext': ext_datetime},
1114
]
1215

1316
def default(obj):

tarantool/msgpack_ext_types/unpacker.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import tarantool.msgpack_ext_types.decimal as ext_decimal
22
import tarantool.msgpack_ext_types.uuid as ext_uuid
3+
import tarantool.msgpack_ext_types.datetime as ext_datetime
34

45
decoders = {
5-
ext_decimal.EXT_ID: ext_decimal.decode,
6-
ext_uuid.EXT_ID : ext_uuid.decode ,
6+
ext_decimal.EXT_ID : ext_decimal.decode ,
7+
ext_uuid.EXT_ID : ext_uuid.decode ,
8+
ext_datetime.EXT_ID : ext_datetime.decode,
79
}
810

911
def ext_hook(code, data):

test/suites/lib/skip.py

+11
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):
154154

155155
return skip_or_run_test_tarantool(func, '2.4.1',
156156
'does not support UUID type')
157+
158+
def skip_or_run_datetime_test(func):
159+
"""Decorator to skip or run datetime-related tests depending on
160+
the tarantool version.
161+
162+
Tarantool supports datetime type only since 2.10.0 version.
163+
See https://github.com/tarantool/tarantool/issues/5941
164+
"""
165+
166+
return skip_or_run_test_pcall_require(func, 'datetime',
167+
'does not support datetime type')

test/suites/test_msgpack_ext_types.py

+100-1
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@
99
import msgpack
1010
import warnings
1111
import tarantool
12+
import pandas
1213

1314
from tarantool.msgpack_ext_types.packer import default as packer_default
1415
from tarantool.msgpack_ext_types.unpacker import ext_hook as unpacker_ext_hook
1516

1617
from .lib.tarantool_server import TarantoolServer
17-
from .lib.skip import skip_or_run_decimal_test, skip_or_run_UUID_test
18+
from .lib.skip import (
19+
skip_or_run_datetime_test, skip_or_run_decimal_test,
20+
skip_or_run_UUID_test,)
1821
from tarantool.error import MsgpackError, MsgpackWarning
1922

2023
class TestSuite_MsgpackExtTypes(unittest.TestCase):
@@ -30,6 +33,7 @@ def setUpClass(self):
3033
self.adm(r"""
3134
_, decimal = pcall(require, 'decimal')
3235
_, uuid = pcall(require, 'uuid')
36+
_, datetime = pcall(require, 'datetime')
3337
3438
box.schema.space.create('test')
3539
box.space['test']:create_index('primary', {
@@ -499,6 +503,101 @@ def test_UUID_tarantool_encode(self):
499503

500504
self.assertSequenceEqual(self.con.eval(lua_eval), [True])
501505

506+
507+
datetime_cases = [
508+
{
509+
'python': pandas.Timestamp(year=1970, month=1, day=1),
510+
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'),
511+
'tarantool': r"datetime.new({year=1970, month=1, day=1})",
512+
},
513+
{
514+
'python': pandas.Timestamp(year=2022, month=8, day=31),
515+
'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'),
516+
'tarantool': r"datetime.new({year=2022, month=8, day=31})",
517+
},
518+
{
519+
'python': pandas.Timestamp(year=1900, month=1, day=1),
520+
'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'),
521+
'tarantool': r"datetime.new({year=1900, month=1, day=1})",
522+
},
523+
{
524+
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7),
525+
'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'),
526+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})",
527+
},
528+
{
529+
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54),
530+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'),
531+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})",
532+
},
533+
{
534+
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54,
535+
microsecond=308543),
536+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'),
537+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
538+
r"nsec=308543000})",
539+
},
540+
{
541+
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54,
542+
microsecond=308543, nanosecond=321),
543+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
544+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
545+
r"nsec=308543321})",
546+
},
547+
]
548+
549+
def test_datetime_msgpack_decode(self):
550+
for i in range(len(self.datetime_cases)):
551+
with self.subTest(msg=str(i)):
552+
datetime_case = self.datetime_cases[i]
553+
554+
self.assertEqual(unpacker_ext_hook(4, datetime_case['msgpack']),
555+
datetime_case['python'])
556+
557+
@skip_or_run_datetime_test
558+
def test_datetime_tarantool_decode(self):
559+
for i in range(len(self.datetime_cases)):
560+
with self.subTest(msg=str(i)):
561+
datetime_case = self.datetime_cases[i]
562+
563+
self.adm(f"box.space['test']:replace{{{i}, {datetime_case['tarantool']}}}")
564+
565+
self.assertSequenceEqual(self.con.select('test', i),
566+
[[i, datetime_case['python']]])
567+
568+
def test_datetime_msgpack_encode(self):
569+
for i in range(len(self.datetime_cases)):
570+
with self.subTest(msg=str(i)):
571+
datetime_case = self.datetime_cases[i]
572+
573+
self.assertEqual(packer_default(datetime_case['python']),
574+
msgpack.ExtType(code=4, data=datetime_case['msgpack']))
575+
576+
@skip_or_run_datetime_test
577+
def test_datetime_tarantool_encode(self):
578+
for i in range(len(self.datetime_cases)):
579+
with self.subTest(msg=str(i)):
580+
datetime_case = self.datetime_cases[i]
581+
582+
self.con.insert('test', [i, datetime_case['python']])
583+
584+
lua_eval = f"""
585+
local dt = {datetime_case['tarantool']}
586+
587+
local tuple = box.space['test']:get({i})
588+
assert(tuple ~= nil)
589+
590+
if tuple[2] == dt then
591+
return true
592+
else
593+
return nil, ('%s is not equal to expected %s'):format(
594+
tostring(tuple[2]), tostring(dt))
595+
end
596+
"""
597+
598+
self.assertSequenceEqual(self.adm(lua_eval), [True])
599+
600+
502601
@classmethod
503602
def tearDownClass(self):
504603
self.con.close()

0 commit comments

Comments
 (0)