diff --git a/Cargo.lock b/Cargo.lock index d001a5cfdaf9c..c6bedc4f3b673 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -631,25 +631,6 @@ dependencies = [ "alloc-stdlib", ] -[[package]] -name = "bson" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d76085681585d39016f4d3841eb019201fc54d2dd0d92ad1e4fab3bfb32754" -dependencies = [ - "ahash", - "base64 0.13.0", - "hex", - "indexmap", - "lazy_static", - "rand 0.8.5", - "serde", - "serde_bytes", - "serde_json", - "time 0.3.14", - "uuid", -] - [[package]] name = "bstr" version = "0.2.17" @@ -1198,13 +1179,13 @@ dependencies = [ name = "common-expression" version = "0.1.0" dependencies = [ - "bson", "chrono", "chrono-tz", "comfy-table", "common-arrow", "common-ast", "common-exception", + "common-jsonb", "educe", "enum-as-inner", "goldenfile", @@ -1286,13 +1267,13 @@ name = "common-functions-v2" version = "0.1.0" dependencies = [ "base64 0.13.0", - "bson", "bstr", "comfy-table", "common-arrow", "common-ast", "common-base", "common-expression", + "common-jsonb", "crc32fast", "goldenfile", "hex", @@ -1301,7 +1282,6 @@ dependencies = [ "num-traits", "ordered-float 3.0.0 (git+https://github.com/andylokandy/rust-ordered-float.git?branch=as)", "rand 0.8.5", - "serde_json", "strength_reduce", ] @@ -1387,6 +1367,14 @@ dependencies = [ "time 0.3.14", ] +[[package]] +name = "common-jsonb" +version = "0.1.0" +dependencies = [ + "byteorder", + "decimal-rs", +] + [[package]] name = "common-legacy-parser" version = "0.1.0" @@ -2636,6 +2624,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "decimal-rs" +version = "0.1.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2492291a982ad198a2c3b84b091b48348372ffe8a9f7194cc90a2d8b901762c" +dependencies = [ + "ethnum", + "fast-float", + "stack-buf", + "thiserror", +] + [[package]] name = "der" version = "0.4.5" @@ -2947,6 +2947,12 @@ dependencies = [ "libc", ] +[[package]] +name = "ethnum" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e4a7b7dde9ed6aed8eb4dd7474d22fb1713a4b05ac5071cdb60d9903248ad3" + [[package]] name = "event-listener" version = "2.5.3" @@ -6696,15 +6702,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_bytes" -version = "0.11.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfc50e8183eeeb6178dcb167ae34a8051d63535023ae38b5d8d12beae193d37b" -dependencies = [ - "serde", -] - [[package]] name = "serde_cbor" version = "0.11.2" @@ -7081,6 +7078,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "stack-buf" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7386b49cb287f6fafbfd3bd604914bccb99fb8d53483f40e1ecfda5d45f3370" + [[package]] name = "static_assertions" version = "1.1.0" diff --git a/docs/doc/30-reference/10-data-types/60-data-type-nullable-types.md b/docs/doc/30-reference/10-data-types/60-data-type-nullable-types.md index ac5de81909c90..063be56b050ea 100644 --- a/docs/doc/30-reference/10-data-types/60-data-type-nullable-types.md +++ b/docs/doc/30-reference/10-data-types/60-data-type-nullable-types.md @@ -33,6 +33,7 @@ Using `Nullable` will almost always have a negative impact on performance. If th Check whether the value is `NULL` or `NOT NULL`. [IS NULL](/doc/reference/functions/conditional-functions/isnull) + [IS NOT NULL](/doc/reference/functions/conditional-functions/isnotnull) ### Example diff --git a/src/common/jsonb/Cargo.toml b/src/common/jsonb/Cargo.toml new file mode 100644 index 0000000000000..591d83ffd82e0 --- /dev/null +++ b/src/common/jsonb/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "common-jsonb" +version = "0.1.0" +authors = ["Databend Authors "] +license = "Apache-2.0" +publish = false +edition = "2021" + +[dependencies] +byteorder = "1.4.3" +decimal-rs = "0.1.39" diff --git a/src/common/jsonb/src/constants.rs b/src/common/jsonb/src/constants.rs new file mode 100644 index 0000000000000..97cadcd13c688 --- /dev/null +++ b/src/common/jsonb/src/constants.rs @@ -0,0 +1,51 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// JSONB header constants +pub(crate) const ARRAY_CONTAINER_TAG: u32 = 0x80000000; +pub(crate) const OBJECT_CONTAINER_TAG: u32 = 0x40000000; +pub(crate) const SCALAR_CONTAINER_TAG: u32 = 0x20000000; + +pub(crate) const CONTAINER_HEADER_TYPE_MASK: u32 = 0xE0000000; +pub(crate) const CONTAINER_HEADER_LEN_MASK: u32 = 0x1FFFFFFF; + +// JSONB JEntry constants +pub(crate) const NULL_TAG: u32 = 0x00000000; +pub(crate) const STRING_TAG: u32 = 0x10000000; +pub(crate) const NUMBER_TAG: u32 = 0x20000000; +pub(crate) const FALSE_TAG: u32 = 0x30000000; +pub(crate) const TRUE_TAG: u32 = 0x40000000; +pub(crate) const CONTAINER_TAG: u32 = 0x50000000; + +// @todo support offset mode +#[allow(dead_code)] +pub(crate) const JENTRY_IS_OFF_FLAG: u32 = 0x80000000; +pub(crate) const JENTRY_TYPE_MASK: u32 = 0x70000000; +pub(crate) const JENTRY_OFF_LEN_MASK: u32 = 0x0FFFFFFF; + +// JSON text constants +pub(crate) const NULL_LEN: usize = 4; +pub(crate) const TRUE_LEN: usize = 4; +pub(crate) const FALSE_LEN: usize = 5; +pub(crate) const UNICODE_LEN: usize = 4; + +// JSON text escape characters constants +pub(crate) const BS: char = '\x5C'; // \\ Backslash +pub(crate) const QU: char = '\x22'; // \" Double quotation mark +pub(crate) const SD: char = '\x2F'; // \/ Slash or divide +pub(crate) const BB: char = '\x08'; // \b Backspace +pub(crate) const FF: char = '\x0C'; // \f Formfeed Page Break +pub(crate) const NN: char = '\x0A'; // \n Newline +pub(crate) const RR: char = '\x0D'; // \r Carriage Return +pub(crate) const TT: char = '\x09'; // \t Horizontal Tab diff --git a/src/common/jsonb/src/de.rs b/src/common/jsonb/src/de.rs new file mode 100644 index 0000000000000..2ab85caca97b8 --- /dev/null +++ b/src/common/jsonb/src/de.rs @@ -0,0 +1,183 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::collections::VecDeque; + +use byteorder::BigEndian; +use byteorder::ReadBytesExt; +use decimal_rs::Decimal; + +use super::constants::*; +use super::error::*; +use super::jentry::JEntry; +use super::parser::parse_value; +use super::value::Object; +use super::value::Value; + +/// The binary `JSONB` contains three parts, `Header`, `JEntry` and `RawData`. +/// This structure can be nested. Each group of structures starts with a `Header`. +/// The upper-level `Value` will store the `Header` length or offset of +/// the lower-level `Value`. + +/// `Header` stores the type of the `Value`, include `Array`, `Object` and `Scalar`, +/// `Scalar` has only one `Value`, and a corresponding `JEntry`. +/// `Array` and `Object` are nested type, they have multiple lower-level `Values`. +/// So the `Header` also stores the number of lower-level `Values`. + +/// `JEntry` stores the types of `Scalar Value`, including `Null`, `True`, `False`, +/// `Number`, `String` and `Container`. They have three different decode methods. +/// 1. `Null`, `True` and `False` can be obtained by `JEntry`, no extra work required. +/// 2. `Number` and `String` has related `RawData`, `JEntry` store the length +/// or offset of this data, the `Value` can be read out and then decoded. +/// 3. `Container` is actually a nested `Array` or `Object` with the same structure, +/// `JEntry` store the length or offset of the lower-level `Header`, +/// from where the same decode process can begin. + +/// `RawData` is the encoded `Value`. +/// `Number` is a variable-length `Decimal`, store both int and float value. +/// `String` is the original string, can be borrowed directly without extra decode. +/// `Array` and `Object` is a lower-level encoded `JSONB` value. +/// The upper-level doesn't care about the specific content. +/// Decode can be executed recursively. + +/// Decode `JSONB` Value from binary bytes. +pub fn from_slice(buf: &[u8]) -> Result, Error> { + let mut decoder = Decoder::new(buf); + match decoder.decode() { + Ok(value) => Ok(value), + // for compatible with the first version of `JSON` text, parse it again + Err(_) => parse_value(buf), + } +} + +#[repr(transparent)] +pub struct Decoder<'a> { + buf: &'a [u8], +} + +impl<'a> Decoder<'a> { + pub fn new(buf: &'a [u8]) -> Decoder<'a> { + Self { buf } + } + + pub fn decode(&mut self) -> Result, Error> { + // Valid `JSONB` Value has at least one `Header` + if self.buf.len() < 4 { + return Err(Error::InvalidJsonb); + } + let value = self.decode_jsonb()?; + Ok(value) + } + + // Read value type from the `Header` + // `Scalar` has one `JEntry` + // `Array` and `Object` store the numbers of elements + fn decode_jsonb(&mut self) -> Result, Error> { + let container_header = self.buf.read_u32::()?; + + match container_header & CONTAINER_HEADER_TYPE_MASK { + SCALAR_CONTAINER_TAG => { + let encoded = self.buf.read_u32::()?; + let jentry = JEntry::decode_jentry(encoded); + self.decode_scalar(jentry) + } + ARRAY_CONTAINER_TAG => self.decode_array(container_header), + OBJECT_CONTAINER_TAG => self.decode_object(container_header), + _ => Err(Error::InvalidJsonbHeader), + } + } + + // Decode `Value` based on the `JEntry` + // `Null` and `Boolean` don't need to read extra data + // `Number` and `String` `JEntry` stores the length or offset of the data, + // read them and decode to the `Value` + // `Array` and `Object` need to read nested data from the lower-level `Header` + fn decode_scalar(&mut self, jentry: JEntry) -> Result, Error> { + match jentry.type_code { + NULL_TAG => Ok(Value::Null), + TRUE_TAG => Ok(Value::Bool(true)), + FALSE_TAG => Ok(Value::Bool(false)), + STRING_TAG => { + let offset = jentry.length as usize; + let s = std::str::from_utf8(&self.buf[..offset]).unwrap(); + self.buf = &self.buf[offset..]; + Ok(Value::String(Cow::Borrowed(s))) + } + NUMBER_TAG => { + let offset = jentry.length as usize; + let d = Decimal::decode(&self.buf[..offset]); + self.buf = &self.buf[offset..]; + Ok(Value::Number(d)) + } + CONTAINER_TAG => self.decode_jsonb(), + _ => Err(Error::InvalidJsonbJEntry), + } + } + + // Decode the numbers of values from the `Header`, + // then read all `JEntries`, finally decode the `Value` by `JEntry` + fn decode_array(&mut self, container_header: u32) -> Result, Error> { + let length = (container_header & CONTAINER_HEADER_LEN_MASK) as usize; + let jentries = self.decode_jentries(length)?; + let mut values: Vec = Vec::with_capacity(length); + // decode all values + for jentry in jentries.into_iter() { + let value = self.decode_scalar(jentry)?; + values.push(value); + } + + let value = Value::Array(values); + Ok(value) + } + + // The basic process is the same as that of `Array` + // but first decode the keys and then decode the values + fn decode_object(&mut self, container_header: u32) -> Result, Error> { + let length = (container_header & CONTAINER_HEADER_LEN_MASK) as usize; + let mut jentries = self.decode_jentries(length * 2)?; + + let mut keys: VecDeque = VecDeque::with_capacity(length); + // decode all keys first + for _ in 0..length { + let jentry = jentries.pop_front().unwrap(); + let key = self.decode_scalar(jentry)?; + keys.push_back(key); + } + + let mut obj = Object::new(); + // decode all values + for _ in 0..length { + let key = keys.pop_front().unwrap(); + let k = key.as_str().unwrap(); + let jentry = jentries.pop_front().unwrap(); + let value = self.decode_scalar(jentry)?; + obj.insert(k.to_string(), value); + } + + let value = Value::Object(obj); + Ok(value) + } + + // Decode `JEntries` for `Array` and `Object` + fn decode_jentries(&mut self, length: usize) -> Result, Error> { + let mut jentries: VecDeque = VecDeque::with_capacity(length); + for _ in 0..length { + let encoded = self.buf.read_u32::()?; + let jentry = JEntry::decode_jentry(encoded); + jentries.push_back(jentry); + } + Ok(jentries) + } +} diff --git a/src/common/jsonb/src/error.rs b/src/common/jsonb/src/error.rs new file mode 100644 index 0000000000000..ed25e836196ae --- /dev/null +++ b/src/common/jsonb/src/error.rs @@ -0,0 +1,61 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use core::fmt::Display; + +/// List of possible errors +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum Error { + InvalidUtf8, + InvalidEOF, + + InvalidJsonb, + InvalidJsonbHeader, + InvalidJsonbJEntry, + + InvalidValue, + InvalidNullValue, + InvalidFalseValue, + InvalidTrueValue, + InvalidNumberValue, + InvalidStringValue, + InvalidArrayValue, + InvalidObjectValue, + + InvalidEscaped(u8), + InvalidHex(u8), + InvalidLoneLeadingSurrogateInHexEscape(u16), + InvalidSurrogateInHexEscape(u16), + UnexpectedEndOfHexEscape, + UnexpectedTrailingCharacters, +} + +impl Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "{:?}", self) + } +} + +impl From for Error { + fn from(_error: std::io::Error) -> Self { + Error::InvalidUtf8 + } +} + +impl From for Error { + fn from(_error: decimal_rs::DecimalConvertError) -> Self { + Error::InvalidUtf8 + } +} diff --git a/src/common/jsonb/src/from.rs b/src/common/jsonb/src/from.rs new file mode 100644 index 0000000000000..1e6160deca3c4 --- /dev/null +++ b/src/common/jsonb/src/from.rs @@ -0,0 +1,124 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use core::iter::FromIterator; +use std::borrow::Cow; + +use decimal_rs::Decimal; + +use super::value::Object; +use super::value::Value; + +macro_rules! from_integer { + ($($ty:ident)*) => { + $( + impl<'a> From<$ty> for Value<'a> { + fn from(n: $ty) -> Self { + Value::Number(n.into()) + } + } + )* + }; +} + +macro_rules! from_float { + ($($ty:ident)*) => { + $( + impl<'a> From<$ty> for Value<'a> { + fn from(n: $ty) -> Self { + Value::Number(n.try_into().unwrap()) + } + } + )* + }; +} + +from_integer! { + i8 i16 i32 i64 isize + u8 u16 u32 u64 usize +} + +from_float! { + f32 f64 +} + +impl<'a> From for Value<'a> { + fn from(f: bool) -> Self { + Value::Bool(f) + } +} + +impl<'a> From for Value<'a> { + fn from(f: String) -> Self { + Value::String(f.into()) + } +} + +impl<'a> From<&'a str> for Value<'a> { + fn from(f: &'a str) -> Self { + Value::String(Cow::from(f)) + } +} + +impl<'a> From> for Value<'a> { + fn from(f: Cow<'a, str>) -> Self { + Value::String(f) + } +} + +impl<'a> From for Value<'a> { + fn from(d: Decimal) -> Self { + Value::Number(d) + } +} + +impl<'a> From> for Value<'a> { + fn from(o: Object<'a>) -> Self { + Value::Object(o) + } +} + +impl<'a, T: Into>> From> for Value<'a> { + fn from(f: Vec) -> Self { + Value::Array(f.into_iter().map(Into::into).collect()) + } +} + +impl<'a, T: Clone + Into>> From<&'a [T]> for Value<'a> { + fn from(f: &'a [T]) -> Self { + Value::Array(f.iter().cloned().map(Into::into).collect()) + } +} + +impl<'a, T: Into>> FromIterator for Value<'a> { + fn from_iter>(iter: I) -> Self { + Value::Array(iter.into_iter().map(Into::into).collect()) + } +} + +impl<'a, K: Into, V: Into>> FromIterator<(K, V)> for Value<'a> { + fn from_iter>(iter: I) -> Self { + Value::Object( + iter.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(), + ) + } +} + +impl<'a> From<()> for Value<'a> { + fn from((): ()) -> Self { + Value::Null + } +} diff --git a/src/common/jsonb/src/jentry.rs b/src/common/jsonb/src/jentry.rs new file mode 100644 index 0000000000000..9a3dc0c7b1dad --- /dev/null +++ b/src/common/jsonb/src/jentry.rs @@ -0,0 +1,75 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::constants::*; + +#[derive(Debug)] +pub(crate) struct JEntry { + pub(crate) type_code: u32, + pub(crate) length: u32, +} + +impl JEntry { + pub(crate) fn decode_jentry(encoded: u32) -> JEntry { + let type_code = encoded & JENTRY_TYPE_MASK; + let length = encoded & JENTRY_OFF_LEN_MASK; + JEntry { type_code, length } + } + + pub(crate) fn make_null_jentry() -> JEntry { + JEntry { + type_code: NULL_TAG, + length: 0, + } + } + + pub(crate) fn make_true_jentry() -> JEntry { + JEntry { + type_code: TRUE_TAG, + length: 0, + } + } + + pub(crate) fn make_false_jentry() -> JEntry { + JEntry { + type_code: FALSE_TAG, + length: 0, + } + } + + pub(crate) fn make_string_jentry(length: usize) -> JEntry { + JEntry { + type_code: STRING_TAG, + length: length as u32, + } + } + + pub(crate) fn make_number_jentry(length: usize) -> JEntry { + JEntry { + type_code: NUMBER_TAG, + length: length as u32, + } + } + + pub(crate) fn make_container_jentry(length: usize) -> JEntry { + JEntry { + type_code: CONTAINER_TAG, + length: length as u32, + } + } + + pub(crate) fn encoded(&self) -> u32 { + self.type_code | self.length + } +} diff --git a/src/common/jsonb/src/lib.rs b/src/common/jsonb/src/lib.rs new file mode 100644 index 0000000000000..1b213f8bfc5f4 --- /dev/null +++ b/src/common/jsonb/src/lib.rs @@ -0,0 +1,28 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod constants; +mod de; +mod error; +mod from; +mod jentry; +mod parser; +mod ser; +mod util; +mod value; + +pub use de::from_slice; +pub use error::Error; +pub use parser::parse_value; +pub use value::*; diff --git a/src/common/jsonb/src/parser.rs b/src/common/jsonb/src/parser.rs new file mode 100644 index 0000000000000..0ed1f4c55ab0b --- /dev/null +++ b/src/common/jsonb/src/parser.rs @@ -0,0 +1,281 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; + +use super::constants::*; +use super::error::Error; +use super::util::parse_escaped_string; +use super::value::Object; +use super::value::Value; + +// Parse JSON text to JSONB Value. +// Inspired by `https://github.com/jorgecarleitao/json-deserializer` +// Thanks Jorge Leitao. +pub fn parse_value(buf: &[u8]) -> Result, Error> { + let mut parser = Parser::new(buf); + parser.parse() +} + +#[repr(transparent)] +struct Parser<'a> { + buf: &'a [u8], +} + +impl<'a> Parser<'a> { + fn new(buf: &'a [u8]) -> Parser<'a> { + Self { buf } + } + + fn parse(&mut self) -> Result, Error> { + let val = self.parse_json_value()?; + self.skip_unused(); + if !self.buf.is_empty() { + return Err(Error::UnexpectedTrailingCharacters); + } + Ok(val) + } + + fn parse_json_value(&mut self) -> Result, Error> { + self.skip_unused(); + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + match byte { + b'n' => self.parse_json_null(), + b't' => self.parse_json_true(), + b'f' => self.parse_json_false(), + b'0'..=b'9' | b'-' => self.parse_json_number(), + b'"' => self.parse_json_string(), + b'[' => self.parse_json_array(), + b'{' => self.parse_json_object(), + _ => Err(Error::InvalidValue), + } + } + + fn skip_unused(&mut self) { + let mut idx = 0; + while let Some(byte) = self.buf.get(idx) { + if !matches!(byte, b'\n' | b' ' | b'\r' | b'\t') { + break; + } else { + idx += 1; + } + } + self.buf = &self.buf[idx..] + } + + fn parse_json_null(&mut self) -> Result, Error> { + let data: [u8; NULL_LEN] = self + .buf + .get(..NULL_LEN) + .ok_or(Error::InvalidEOF)? + .try_into() + .unwrap(); + self.buf = &self.buf[NULL_LEN..]; + if data != [b'n', b'u', b'l', b'l'] { + return Err(Error::InvalidNullValue); + } + Ok(Value::Null) + } + + fn parse_json_true(&mut self) -> Result, Error> { + let data: [u8; TRUE_LEN] = self + .buf + .get(..TRUE_LEN) + .ok_or(Error::InvalidEOF)? + .try_into() + .unwrap(); + self.buf = &self.buf[TRUE_LEN..]; + if data != [b't', b'r', b'u', b'e'] { + return Err(Error::InvalidTrueValue); + } + Ok(Value::Bool(true)) + } + + fn parse_json_false(&mut self) -> Result, Error> { + let data: [u8; FALSE_LEN] = self + .buf + .get(..FALSE_LEN) + .ok_or(Error::InvalidEOF)? + .try_into() + .unwrap(); + self.buf = &self.buf[FALSE_LEN..]; + if data != [b'f', b'a', b'l', b's', b'e'] { + return Err(Error::InvalidFalseValue); + } + Ok(Value::Bool(false)) + } + + fn parse_json_number(&mut self) -> Result, Error> { + let mut idx = 0; + let mut has_point = false; + let mut has_exponential = false; + + while let Some(byte) = self.buf.get(idx) { + if idx == 0 && *byte == b'-' { + idx += 1; + continue; + } + match byte { + b'0'..=b'9' => {} + b'.' => { + if has_point || has_exponential { + return Err(Error::InvalidNumberValue); + } + has_point = true; + } + b'e' | b'E' => { + if has_exponential { + return Err(Error::InvalidNumberValue); + } + has_exponential = true; + if let Some(next_byte) = self.buf.get(idx + 1) { + if *next_byte == b'+' || *next_byte == b'-' { + idx += 1; + } + } + } + _ => break, + } + idx += 1 + } + if idx == 0 { + return Err(Error::InvalidNumberValue); + } + let data = &self.buf[..idx]; + self.buf = &self.buf[idx..]; + + let s = std::str::from_utf8(data).unwrap(); + match s.parse() { + Ok(dec) => Ok(Value::Number(dec)), + Err(_) => Err(Error::InvalidNumberValue), + } + } + + fn parse_json_string(&mut self) -> Result, Error> { + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte != b'"' { + return Err(Error::InvalidStringValue); + } + + let mut idx = 1; + let mut escapes = 0; + loop { + let byte = self.buf.get(idx).ok_or(Error::InvalidEOF)?; + idx += 1; + match byte { + b'\\' => { + escapes += 1; + let next_byte = self.buf.get(idx).ok_or(Error::InvalidEOF)?; + if *next_byte == b'u' { + idx += UNICODE_LEN + 1; + } else { + idx += 1; + } + } + b'"' => { + break; + } + _ => {} + } + } + + let mut data = &self.buf[1..idx - 1]; + self.buf = &self.buf[idx..]; + let val = if escapes > 0 { + let mut str_buf = String::with_capacity(idx - 2 - escapes); + while !data.is_empty() { + let byte = data[0]; + if byte == b'\\' { + data = &data[1..]; + data = parse_escaped_string(data, &mut str_buf)?; + } else { + str_buf.push(byte as char); + data = &data[1..]; + } + } + Cow::Owned(str_buf) + } else { + std::str::from_utf8(data) + .map(Cow::Borrowed) + .map_err(|_| Error::InvalidStringValue)? + }; + Ok(Value::String(val)) + } + + fn parse_json_array(&mut self) -> Result, Error> { + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte != b'[' { + return Err(Error::InvalidArrayValue); + } + self.buf = &self.buf[1..]; + let mut first = true; + let mut values = Vec::new(); + loop { + self.skip_unused(); + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte == b']' { + self.buf = &self.buf[1..]; + break; + } + if !first { + if *byte != b',' { + return Err(Error::InvalidArrayValue); + } + self.buf = &self.buf[1..]; + } + first = false; + let value = self.parse_json_value()?; + values.push(value); + } + Ok(Value::Array(values)) + } + + fn parse_json_object(&mut self) -> Result, Error> { + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte != b'{' { + return Err(Error::InvalidObjectValue); + } + self.buf = &self.buf[1..]; + let mut first = true; + let mut obj = Object::new(); + loop { + self.skip_unused(); + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte == b'}' { + self.buf = &self.buf[1..]; + break; + } + if !first { + if *byte != b',' { + return Err(Error::InvalidObjectValue); + } + self.buf = &self.buf[1..]; + } + first = false; + self.skip_unused(); + let key = self.parse_json_string()?; + self.skip_unused(); + let byte = self.buf.first().ok_or(Error::InvalidEOF)?; + if *byte != b':' { + return Err(Error::InvalidObjectValue); + } + self.buf = &self.buf[1..]; + let value = self.parse_json_value()?; + + let k = key.as_str().unwrap(); + obj.insert(k.to_string(), value); + } + Ok(Value::Object(obj)) + } +} diff --git a/src/common/jsonb/src/ser.rs b/src/common/jsonb/src/ser.rs new file mode 100644 index 0000000000000..8966e99f49b9d --- /dev/null +++ b/src/common/jsonb/src/ser.rs @@ -0,0 +1,162 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use byteorder::BigEndian; +use byteorder::WriteBytesExt; + +use super::constants::*; +use super::error::Error; +use super::jentry::JEntry; +use super::value::Object; +use super::value::Value; + +pub struct Encoder<'a> { + pub buf: &'a mut Vec, +} + +impl<'a> Encoder<'a> { + pub fn new(buf: &'a mut Vec) -> Encoder<'a> { + Self { buf } + } + + // Encode `JSONB` Value to a sequence of bytes + pub fn encode(&mut self, value: &Value<'a>) -> Result<(), Error> { + match value { + Value::Array(array) => self.encode_array(array)?, + Value::Object(obj) => self.encode_object(obj)?, + _ => self.encode_scalar(value)?, + }; + Ok(()) + } + + // Encoded `Scalar` consists of a `Header`, a `JEntry` and encoded data + fn encode_scalar(&mut self, value: &Value<'a>) -> Result { + self.buf + .write_u32::(SCALAR_CONTAINER_TAG) + .unwrap(); + + // Scalar Value only has one JEntry + let mut scalar_len = 4 + 4; + let mut jentry_index = self.reserve_jentries(4); + + let jentry = self.encode_value(value)?; + scalar_len += jentry.length as usize; + self.replace_jentry(jentry, &mut jentry_index); + + Ok(scalar_len) + } + + // Encoded `Array` consists of a `Header`, N `JEntries` and encoded data + // N is the number of `Array` inner values + fn encode_array(&mut self, values: &[Value<'a>]) -> Result { + let header = ARRAY_CONTAINER_TAG | values.len() as u32; + self.buf.write_u32::(header).unwrap(); + + // `Array` has N `JEntries` + let mut array_len = 4 + values.len() * 4; + let mut jentry_index = self.reserve_jentries(values.len() * 4); + + // encode all values + for value in values.iter() { + let jentry = self.encode_value(value)?; + array_len += jentry.length as usize; + self.replace_jentry(jentry, &mut jentry_index); + } + + Ok(array_len) + } + + // Encoded `Object` consists of a `Header`, 2 * N `JEntries` and encoded data + // N is the number of `Object` inner key value pair + fn encode_object(&mut self, obj: &Object<'a>) -> Result { + let header = OBJECT_CONTAINER_TAG | obj.len() as u32; + self.buf.write_u32::(header).unwrap(); + + // `Object` has 2 * N `JEntries` + let mut object_len = 4 + obj.len() * 8; + let mut jentry_index = self.reserve_jentries(obj.len() * 8); + + // encode all keys first + for (key, _) in obj.iter() { + let len = key.len(); + object_len += len; + self.buf.extend_from_slice(key.as_bytes()); + let jentry = JEntry::make_string_jentry(len); + self.replace_jentry(jentry, &mut jentry_index); + } + // encode all values + for (_, value) in obj.iter() { + let jentry = self.encode_value(value)?; + object_len += jentry.length as usize; + self.replace_jentry(jentry, &mut jentry_index); + } + + Ok(object_len) + } + + // Reserve space for `JEntries` and fill them later + // As the length of each `Value` cannot be known until the `Value` encoded + fn reserve_jentries(&mut self, len: usize) -> usize { + let old_len = self.buf.len(); + let new_len = old_len + len; + self.buf.resize(new_len, 0); + old_len + } + + // Write encoded `JEntry` to the corresponding index + fn replace_jentry(&mut self, jentry: JEntry, jentry_index: &mut usize) { + let jentry_bytes = jentry.encoded().to_be_bytes(); + for (i, b) in jentry_bytes.iter().enumerate() { + self.buf[*jentry_index + i] = *b; + } + *jentry_index += 4; + } + + // `Null` and `Boolean` only has a `JEntry` + // `Number` and `String` has a `JEntry` and an encoded data + // `Array` and `Object` has a container `JEntry` and nested encoded data + fn encode_value(&mut self, value: &Value<'a>) -> Result { + let jentry = match value { + Value::Null => JEntry::make_null_jentry(), + Value::Bool(v) => { + if *v { + JEntry::make_true_jentry() + } else { + JEntry::make_false_jentry() + } + } + Value::Number(v) => { + let old_off = self.buf.len(); + let _ = v.compact_encode(&mut self.buf)?; + let len = self.buf.len() - old_off; + JEntry::make_number_jentry(len) + } + Value::String(s) => { + let len = s.len(); + self.buf.extend_from_slice(s.as_ref().as_bytes()); + JEntry::make_string_jentry(len) + } + Value::Array(array) => { + let len = self.encode_array(array)?; + JEntry::make_container_jentry(len) + } + Value::Object(obj) => { + let len = self.encode_object(obj)?; + JEntry::make_container_jentry(len) + } + }; + + Ok(jentry) + } +} diff --git a/src/common/jsonb/src/util.rs b/src/common/jsonb/src/util.rs new file mode 100644 index 0000000000000..1d8cfee94dea6 --- /dev/null +++ b/src/common/jsonb/src/util.rs @@ -0,0 +1,122 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::io::Read; + +use super::constants::*; +use super::error::Error; + +#[allow(clippy::zero_prefixed_literal)] +static HEX: [u8; 256] = { + const __: u8 = 255; // not a hex digit + [ + // 1 2 3 4 5 6 7 8 9 A B C D E F + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 0 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 1 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 2 + 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, __, __, __, __, __, __, // 3 + __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 4 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 5 + __, 10, 11, 12, 13, 14, 15, __, __, __, __, __, __, __, __, __, // 6 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 7 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 8 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // 9 + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // A + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // B + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // C + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // D + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // E + __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, // F + ] +}; + +pub fn parse_escaped_string<'a>( + mut data: &'a [u8], + str_buf: &mut String, +) -> Result<&'a [u8], Error> { + let byte = data[0]; + data = &data[1..]; + match byte { + b'\\' => str_buf.push(BS), + b'"' => str_buf.push(QU), + b'/' => str_buf.push(SD), + b'b' => str_buf.push(BB), + b'f' => str_buf.push(FF), + b'n' => str_buf.push(NN), + b'r' => str_buf.push(RR), + b't' => str_buf.push(TT), + b'u' => { + let mut numbers = vec![0; UNICODE_LEN]; + data.read_exact(numbers.as_mut_slice())?; + let hex = decode_hex_escape(numbers)?; + + let c = match hex { + n @ 0xDC00..=0xDFFF => { + return Err(Error::InvalidLoneLeadingSurrogateInHexEscape(n)); + } + + // Non-BMP characters are encoded as a sequence of two hex + // escapes, representing UTF-16 surrogates. If deserializing a + // utf-8 string the surrogates are required to be paired, + // whereas deserializing a byte string accepts lone surrogates. + n1 @ 0xD800..=0xDBFF => { + let next_byte = data.first().ok_or(Error::InvalidEOF)?; + if *next_byte == b'\\' { + data = &data[1..]; + } else { + return Err(Error::UnexpectedEndOfHexEscape); + } + let next_byte = data.first().ok_or(Error::InvalidEOF)?; + if *next_byte == b'u' { + data = &data[1..]; + } else { + return parse_escaped_string(data, str_buf); + } + let mut numbers = vec![0; UNICODE_LEN]; + data.read_exact(numbers.as_mut_slice())?; + let n2 = decode_hex_escape(numbers)?; + if !(0xDC00..=0xDFFF).contains(&n2) { + return Err(Error::InvalidSurrogateInHexEscape(n2)); + } + + let n = (((n1 - 0xD800) as u32) << 10 | (n2 - 0xDC00) as u32) + 0x1_0000; + char::from_u32(n as u32).unwrap() + } + + // Every u16 outside of the surrogate ranges above is guaranteed + // to be a legal char. + n => char::from_u32(n as u32).unwrap(), + }; + str_buf.push(c); + } + other => return Err(Error::InvalidEscaped(other)), + } + Ok(data) +} + +#[inline] +fn decode_hex_val(val: u8) -> Option { + let n = HEX[val as usize] as u16; + if n == 255 { None } else { Some(n) } +} + +#[inline] +fn decode_hex_escape(numbers: Vec) -> Result { + let mut n = 0; + for number in numbers { + let hex = decode_hex_val(number).ok_or(Error::InvalidHex(number))?; + n = (n << 4) + hex; + } + Ok(n) +} diff --git a/src/common/jsonb/src/value.rs b/src/common/jsonb/src/value.rs new file mode 100644 index 0000000000000..779022e27ebb0 --- /dev/null +++ b/src/common/jsonb/src/value.rs @@ -0,0 +1,224 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::fmt::Display; +use std::fmt::Formatter; +use std::fmt::{self}; +use std::io::Write; +use std::ops::Neg; + +use decimal_rs::Decimal; + +use super::error::Error; +use super::ser::Encoder; + +pub type Object<'a> = BTreeMap>; + +// JSONB value +#[derive(Clone, PartialEq)] +pub enum Value<'a> { + Null, + Bool(bool), + String(Cow<'a, str>), + Number(Decimal), + Array(Vec>), + Object(Object<'a>), +} + +impl<'a> Debug for Value<'a> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + match *self { + Value::Null => formatter.debug_tuple("Null").finish(), + Value::Bool(v) => formatter.debug_tuple("Bool").field(&v).finish(), + Value::Number(ref v) => Debug::fmt(v, formatter), + Value::String(ref v) => formatter.debug_tuple("String").field(v).finish(), + Value::Array(ref v) => { + formatter.write_str("Array(")?; + Debug::fmt(v, formatter)?; + formatter.write_str(")") + } + Value::Object(ref v) => { + formatter.write_str("Object(")?; + Debug::fmt(v, formatter)?; + formatter.write_str(")") + } + } + } +} + +impl<'a> Display for Value<'a> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Value::Null => write!(f, "null"), + Value::Bool(v) => { + if *v { + write!(f, "true") + } else { + write!(f, "false") + } + } + Value::Number(ref v) => write!(f, "{}", v), + Value::String(ref v) => { + write!(f, "{:?}", v) + } + Value::Array(ref vs) => { + let mut first = true; + write!(f, "[")?; + for v in vs.iter() { + if !first { + write!(f, ",")?; + } + first = false; + write!(f, "{v}")?; + } + write!(f, "]") + } + Value::Object(ref vs) => { + let mut first = true; + write!(f, "{{")?; + for (k, v) in vs.iter() { + if !first { + write!(f, ",")?; + } + first = false; + write!(f, "{:?}", k)?; + write!(f, ":")?; + write!(f, "{v}")?; + } + write!(f, "}}") + } + } + } +} + +impl<'a> Value<'a> { + pub fn is_object(&self) -> bool { + self.as_object().is_some() + } + + pub fn as_object(&self) -> Option<&Object<'a>> { + match self { + Value::Object(ref obj) => Some(obj), + _ => None, + } + } + + pub fn is_array(&self) -> bool { + self.as_array().is_some() + } + + pub fn as_array(&self) -> Option<&Vec>> { + match self { + Value::Array(ref array) => Some(array), + _ => None, + } + } + + pub fn is_string(&self) -> bool { + self.as_str().is_some() + } + + pub fn as_str(&self) -> Option<&Cow<'_, str>> { + match self { + Value::String(s) => Some(s), + _ => None, + } + } + + pub fn is_number(&self) -> bool { + matches!(self, Value::Number(_)) + } + + pub fn is_i64(&self) -> bool { + self.as_i64().is_some() + } + + pub fn is_u64(&self) -> bool { + self.as_u64().is_some() + } + + pub fn is_f64(&self) -> bool { + self.as_f64().is_some() + } + + pub fn as_i64(&self) -> Option { + match self { + Value::Number(d) => { + if d.scale() == 0 { + let (v, _, is_neg) = d.into_parts(); + let v = v as i64; + if is_neg { Some(v.neg()) } else { Some(v) } + } else { + None + } + } + _ => None, + } + } + + pub fn as_u64(&self) -> Option { + match self { + Value::Number(d) => { + if d.scale() == 0 && d.is_sign_positive() { + let (v, _, is_neg) = d.into_parts(); + if !is_neg { Some(v as u64) } else { None } + } else { + None + } + } + _ => None, + } + } + + pub fn as_f64(&self) -> Option { + match self { + Value::Number(d) => Some(d.into()), + _ => None, + } + } + + pub fn is_boolean(&self) -> bool { + self.as_bool().is_some() + } + + pub fn as_bool(&self) -> Option { + match self { + Value::Bool(v) => Some(*v), + _ => None, + } + } + + pub fn is_null(&self) -> bool { + self.as_null().is_some() + } + + pub fn as_null(&self) -> Option<()> { + match self { + Value::Null => Some(()), + _ => None, + } + } + + /// Attempts to serialize the JSONB Value into a byte stream. + pub fn to_writer(&self, mut writer: W) -> Result<(), Error> { + let mut buf = Vec::new(); + let mut encoder = Encoder::new(&mut buf); + encoder.encode(self)?; + writer.write_all(&buf)?; + Ok(()) + } +} diff --git a/src/common/jsonb/tests/it/decode.rs b/src/common/jsonb/tests/it/decode.rs new file mode 100644 index 0000000000000..d923ecf68f948 --- /dev/null +++ b/src/common/jsonb/tests/it/decode.rs @@ -0,0 +1,90 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; + +use common_jsonb::from_slice; + +#[test] +fn test_decode_null() { + let s = b"\x20\0\0\0\0\0\0\0"; + let value = from_slice(s).unwrap(); + assert!(value.is_null()); + assert_eq!(value.as_null(), Some(())); +} + +#[test] +fn test_decode_boolean() { + let s = b"\x20\0\0\0\x40\0\0\0"; + let value = from_slice(s).unwrap(); + assert!(value.is_boolean()); + assert_eq!(value.as_bool(), Some(true)); + + let s = b"\x20\0\0\0\x30\0\0\0"; + let value = from_slice(s).unwrap(); + assert!(value.is_boolean()); + assert_eq!(value.as_bool(), Some(false)); +} + +#[test] +fn test_decode_string() { + let s = b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64"; + let value = from_slice(s).unwrap(); + assert!(value.is_string()); + assert_eq!(value.as_str(), Some(&Cow::from("asd"))); +} + +#[test] +fn test_decode_int64() { + let s = b"\x20\0\0\0\x20\0\0\x01\x64"; + let value = from_slice(s).unwrap(); + assert!(value.is_i64()); + assert_eq!(value.as_i64(), Some(100i64)); +} + +#[test] +fn test_decode_float64() { + let s = b"\x20\0\0\0\x20\0\0\x03\x02\x04\x7b"; + let value = from_slice(s).unwrap(); + assert!(value.is_f64()); + assert_eq!(value.as_f64(), Some(0.0123f64)); +} + +#[test] +fn test_decode_array() { + let s = b"\x80\0\0\x02\x30\0\0\0\x40\0\0\0"; + let value = from_slice(s).unwrap(); + assert!(value.is_array()); + let array = value.as_array().unwrap(); + assert_eq!(array.len(), 2); + let val0 = array.get(0).unwrap(); + assert!(val0.is_boolean()); + assert_eq!(val0.as_bool(), Some(false)); + let val1 = array.get(1).unwrap(); + assert!(val1.is_boolean()); + assert_eq!(val1.as_bool(), Some(true)); +} + +#[test] +fn test_decode_object() { + let s = b"\x40\0\0\x01\x10\0\0\x03\x10\0\0\x03\x61\x73\x64\x61\x64\x66"; + let value = from_slice(s).unwrap(); + assert!(value.is_object()); + let obj = value.as_object().unwrap(); + assert_eq!(obj.len(), 1); + + let val = obj.get("asd").unwrap(); + assert!(val.is_string()); + assert_eq!(val.as_str(), Some(&Cow::from("adf"))); +} diff --git a/src/common/jsonb/tests/it/main.rs b/src/common/jsonb/tests/it/main.rs new file mode 100644 index 0000000000000..7c9a0ee9c8625 --- /dev/null +++ b/src/common/jsonb/tests/it/main.rs @@ -0,0 +1,16 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod decode; +mod parser; diff --git a/src/common/jsonb/tests/it/parser.rs b/src/common/jsonb/tests/it/parser.rs new file mode 100644 index 0000000000000..9e5d21d1566ae --- /dev/null +++ b/src/common/jsonb/tests/it/parser.rs @@ -0,0 +1,192 @@ +// Copyright 2022 Datafuse Labs. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; + +use common_jsonb::parse_value; + +#[test] +fn test_parse_null() { + let s = r#"null"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_null()); + assert_eq!(value.as_null(), Some(())); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\0\0\0\0"); +} + +#[test] +fn test_parse_boolean() { + let s = r#"true"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert_eq!(value.as_bool(), Some(true)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x40\0\0\0"); + + let s = r#"false"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_boolean()); + assert_eq!(value.as_bool(), Some(false)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x30\0\0\0"); +} + +#[test] +fn test_parse_number_int64() { + let s = r#"-1234"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_i64()); + assert_eq!(value.as_i64(), Some(-1234)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x20\0\0\x04\x03\0\xd2\x04"); + + let s = r#"34567890"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_i64()); + assert_eq!(value.as_i64(), Some(34567890)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x20\0\0\x06\x02\0\xd2\x76\x0f\x02"); +} + +#[test] +fn test_parse_number_float64() { + let s = r#"0.0123"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_f64()); + assert_eq!(value.as_f64(), Some(0.0123)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x20\0\0\x03\x02\x04\x7b"); + + let s = r#"12.34e5"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_f64()); + assert_eq!(value.as_f64(), Some(1234000.0)); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x20\0\0\x04\0\x03\xd2\x04"); +} + +#[test] +fn test_parse_string() { + let s = r#""asd""#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_string()); + assert_eq!(value.as_str(), Some(&Cow::from("asd"))); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x10\0\0\x03\x61\x73\x64"); + + let s = r#""\\\"abc\\\"""#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_string()); + assert_eq!(value.as_str(), Some(&Cow::from("\\\"abc\\\""))); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x10\0\0\x07\x5c\x22\x61\x62\x63\x5c\x22"); + + let s = r#""测试abc""#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_string()); + assert_eq!(value.as_str(), Some(&Cow::from("测试abc"))); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!( + buf, + b"\x20\0\0\0\x10\0\0\x09\xe6\xb5\x8b\xe8\xaf\x95\x61\x62\x63" + ); + + let s = r#""\u20AC""#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_string()); + assert_eq!(value.as_str(), Some(&Cow::from("€"))); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x20\0\0\0\x10\0\0\x03\xe2\x82\xac"); +} + +#[test] +fn test_parse_array() { + let s = r#"[true,12345,-200,79.1234,"asd",[]]"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_array()); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x80\0\0\x06\x40\0\0\0\x20\0\0\x02\x20\0\0\x03\x20\0\0\x05\x10\0\0\x03\x50\0\0\x04\x39\x30\x03\0\xc8\x02\x04\xc2\x12\x0c\x61\x73\x64\x80\0\0\0"); + + let s = r#"[1,2,3,["a","b","c"]]"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_array()); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x80\0\0\x04\x20\0\0\x01\x20\0\0\x01\x20\0\0\x01\x50\0\0\x13\x01\x02\x03\x80\0\0\x03\x10\0\0\x01\x10\0\0\x01\x10\0\0\x01\x61\x62\x63"); +} + +#[test] +fn test_parse_object() { + let s = r#"{"k1":"v1","k2":"v2"}"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_object()); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!(buf, b"\x40\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x10\0\0\x02\x6b\x31\x6b\x32\x76\x31\x76\x32"); + + let s = r#"{"k":"v","a":"b"}"#; + let value = parse_value(s.as_bytes()).unwrap(); + assert!(value.is_object()); + + let mut buf: Vec = Vec::new(); + value.to_writer(&mut buf).unwrap(); + assert_eq!( + buf, + b"\x40\0\0\x02\x10\0\0\x01\x10\0\0\x01\x10\0\0\x01\x10\0\0\x01\x61\x6b\x62\x76" + ); +} + +#[test] +fn test_parse_invalid() { + let strs = vec![ + r#"nul"#, + r#"fals"#, + r#"123ab"#, + r#""abc"#, + r#"[1,2"#, + r#"[1 2]"#, + r#"{"k":"v""#, + r#"{123:"v"}"#, + r#"{"k" "v"}"#, + ]; + for s in strs { + assert!(parse_value(s.as_bytes()).is_err()); + } +} diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index b01e4456ca1a9..625e2bbcbd472 100755 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -20,19 +20,16 @@ common-exception = { path = "../../common/exception" } chrono = "0.4" chrono-tz = "0.6.1" comfy-table = "6" +common-jsonb = { path = "../../common/jsonb" } educe = "0.4" enum-as-inner = "0.4" hex = "0.4.3" itertools = "0.10" match-template = "0.0.1" num-traits = "0.2.15" -# TODO(andylokandy): Use the version from crates.io once -# https://github.com/reem/rust-ordered-float/pull/110 is released. ordered-float = { git = "https://github.com/andylokandy/rust-ordered-float.git", branch = "as", features = ["serde"] } rust_decimal = "1.26" serde = "1.0" -# TODO: Switch to jsonb. bson is used for placeholder. -bson = "2.4" [dev-dependencies] common-ast = { path = "../ast" } diff --git a/src/query/expression/src/display.rs b/src/query/expression/src/display.rs index ce460a5ad708a..ff62dd79e12b0 100755 --- a/src/query/expression/src/display.rs +++ b/src/query/expression/src/display.rs @@ -18,7 +18,6 @@ use std::fmt::Formatter; use std::time::Duration; use std::time::UNIX_EPOCH; -use bson::Document; use chrono::DateTime; use chrono::Utc; use comfy_table::Cell; @@ -154,9 +153,8 @@ impl<'a> Display for ScalarRef<'a> { ) } ScalarRef::Variant(s) => { - let doc = Document::from_reader(*s).map_err(|_| std::fmt::Error)?; - let bson = doc.get("v").ok_or(std::fmt::Error)?; - write!(f, "{bson}") + let value = common_jsonb::from_slice(*s).map_err(|_| std::fmt::Error)?; + write!(f, "{value}") } } } diff --git a/src/query/expression/src/types/variant.rs b/src/query/expression/src/types/variant.rs index 36fac0736176a..1ec44fedee42a 100644 --- a/src/query/expression/src/types/variant.rs +++ b/src/query/expression/src/types/variant.rs @@ -26,8 +26,8 @@ use crate::values::Column; use crate::values::Scalar; use crate::values::ScalarRef; -/// BSON bytes representation of `{v: null}`. -pub const DEFAULT_BSON: &[u8] = &[0x08, 0x00, 0x00, 0x00, 0x0A, 0x76, 0x00, 0x00]; +/// JSONB bytes representation of `null`. +pub const DEFAULT_JSONB: &[u8] = &[0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; #[derive(Debug, Clone, PartialEq, Eq)] pub struct VariantType; @@ -113,7 +113,7 @@ impl ValueType for VariantType { } fn push_default(builder: &mut Self::ColumnBuilder) { - builder.put_slice(DEFAULT_BSON); + builder.put_slice(DEFAULT_JSONB); builder.commit_row(); } diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index 91cbfb202d593..75af8ca63a8f3 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -46,7 +46,7 @@ use crate::types::timestamp::Timestamp; use crate::types::timestamp::TimestampColumn; use crate::types::timestamp::TimestampColumnBuilder; use crate::types::timestamp::TimestampDomain; -use crate::types::variant::DEFAULT_BSON; +use crate::types::variant::DEFAULT_JSONB; use crate::types::*; use crate::util::append_bitmap; use crate::util::bitmap_into_mut; @@ -1069,7 +1069,7 @@ impl ColumnBuilder { *len += 1; } ColumnBuilder::Variant(builder) => { - builder.put_slice(DEFAULT_BSON); + builder.put_slice(DEFAULT_JSONB); builder.commit_row(); } } diff --git a/src/query/functions-v2/Cargo.toml b/src/query/functions-v2/Cargo.toml index 285e379108673..d538e1ab6a0d6 100644 --- a/src/query/functions-v2/Cargo.toml +++ b/src/query/functions-v2/Cargo.toml @@ -25,15 +25,13 @@ match-template = "0.0.1" num-traits = "0.2.15" # TODO(andylokandy): Use the version from crates.io once # https://github.com/reem/rust-ordered-float/pull/110 is released. +common-jsonb = { path = "../../common/jsonb" } ordered-float = { git = "https://github.com/andylokandy/rust-ordered-float.git", branch = "as", features = [ "serde", "rand", ] } rand = { version = "0.8.5", features = ["small_rng"] } strength_reduce = "0.2.3" -# TODO: Switch to jsonb. bson is used for placeholder. -bson = "2.4" -serde_json = "1.0" [dev-dependencies] comfy-table = "6" diff --git a/src/query/functions-v2/src/scalars/variant.rs b/src/query/functions-v2/src/scalars/variant.rs index 9243aea5323d1..8bf8bffd6eb33 100644 --- a/src/query/functions-v2/src/scalars/variant.rs +++ b/src/query/functions-v2/src/scalars/variant.rs @@ -12,18 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::convert::TryInto; - -use bson::Bson; -use bson::Document; use bstr::ByteSlice; -use common_expression::types::variant::DEFAULT_BSON; +use common_expression::types::variant::DEFAULT_JSONB; use common_expression::types::StringType; use common_expression::types::VariantType; use common_expression::vectorize_with_builder_1_arg; use common_expression::FunctionProperty; use common_expression::FunctionRegistry; -use serde_json::Value; +use common_jsonb::parse_value; pub fn register(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( @@ -32,22 +28,20 @@ pub fn register(registry: &mut FunctionRegistry) { |_| None, vectorize_with_builder_1_arg::(|s, output| { if s.trim().is_empty() { - output.put_slice(DEFAULT_BSON); + output.put_slice(DEFAULT_JSONB); output.commit_row(); return Ok(()); } - - let json: Value = serde_json::from_slice(s).map_err(|err| { + let value = parse_value(s).map_err(|err| { format!("unable to parse '{}': {}", &String::from_utf8_lossy(s), err) })?; - let bson: Bson = json - .try_into() - .map_err(|err| format!("unable to convert json to bson: {}", err))?; - let mut doc = Document::new(); - doc.insert("v", bson); - output - .write_row(|writer| doc.to_writer(writer)) - .map_err(|err| format!("unable to encode bson: {}", err))?; + let mut buf: Vec = Vec::new(); + value + .to_writer(&mut buf) + .map_err(|_| "unable to encode jsonb".to_string())?; + output.put_slice(buf.as_slice()); + output.commit_row(); + Ok(()) }), ); diff --git a/src/query/functions-v2/tests/it/scalars/testdata/variant.txt b/src/query/functions-v2/tests/it/scalars/testdata/variant.txt index 50c02d3736950..c5eab8bc14d0b 100644 --- a/src/query/functions-v2/tests/it/scalars/testdata/variant.txt +++ b/src/query/functions-v2/tests/it/scalars/testdata/variant.txt @@ -11,14 +11,14 @@ error: --> SQL:1:1 | 1 | parse_json('nuLL') - | ^^^^^^^^^^^^^^^^^^ unable to parse 'nuLL': expected ident at line 1 column 3 + | ^^^^^^^^^^^^^^^^^^ unable to parse 'nuLL': InvalidNullValue ast : parse_json('null') raw expr : parse_json("null") checked expr : parse_json("null") -optimized expr : 0x080000000a760000 +optimized expr : 0x2000000000000000 output type : Variant output domain : Unknown output : null @@ -27,38 +27,92 @@ output : null ast : parse_json(' ') raw expr : parse_json(" \t") checked expr : parse_json(" \t") -optimized expr : 0x080000000a760000 +optimized expr : 0x2000000000000000 output type : Variant output domain : Unknown output : null +ast : parse_json('true') +raw expr : parse_json("true") +checked expr : parse_json("true") +optimized expr : 0x2000000040000000 +output type : Variant +output domain : Unknown +output : true + + +ast : parse_json('false') +raw expr : parse_json("false") +checked expr : parse_json("false") +optimized expr : 0x2000000030000000 +output type : Variant +output domain : Unknown +output : false + + +ast : parse_json('"测试"') +raw expr : parse_json("\"测试\"") +checked expr : parse_json("\"测试\"") +optimized expr : 0x2000000010000006e6b58be8af95 +output type : Variant +output domain : Unknown +output : "测试" + + +ast : parse_json('1234') +raw expr : parse_json("1234") +checked expr : parse_json("1234") +optimized expr : 0x2000000020000002d204 +output type : Variant +output domain : Unknown +output : 1234 + + +ast : parse_json('[1,2,3,4]') +raw expr : parse_json("[1,2,3,4]") +checked expr : parse_json("[1,2,3,4]") +optimized expr : 0x800000042000000120000001200000012000000101020304 +output type : Variant +output domain : Unknown +output : [1,2,3,4] + + +ast : parse_json('{"a":"b","c":"d"}') +raw expr : parse_json("{\"a\":\"b\",\"c\":\"d\"}") +checked expr : parse_json("{\"a\":\"b\",\"c\":\"d\"}") +optimized expr : 0x400000021000000110000001100000011000000161636264 +output type : Variant +output domain : Unknown +output : {"a":"b","c":"d"} + + ast : parse_json(s) raw expr : parse_json(ColumnRef(0)::String) checked expr : parse_json(ColumnRef(0)) evaluation: -+--------+-----------------------------------------------------------+----------------------------+ -| | s | Output | -+--------+-----------------------------------------------------------+----------------------------+ -| Type | String | Variant | -| Domain | {"\"\\\\\\\"abc\\\\\\\"\""..="{\"k\":\"v\",\"a\":\"b\"}"} | Unknown | -| Row 0 | "null" | null | -| Row 1 | "true" | true | -| Row 2 | "9223372036854775807" | 9223372036854775807 | -| Row 3 | "-32768" | -32768 | -| Row 4 | "1234.5678" | 1234.5678 | -| Row 5 | "1.912e2" | 191.2 | -| Row 6 | "\"\\\\\\\"abc\\\\\\\"\"" | "\"abc\"" | -| Row 7 | "\"databend\"" | "databend" | -| Row 8 | "{\"k\":\"v\",\"a\":\"b\"}" | { "k": "v", "a": "b" } | -| Row 9 | "[1,2,3,[\"a\",\"b\",\"c\"]]" | [1, 2, 3, ["a", "b", "c"]] | -+--------+-----------------------------------------------------------+----------------------------+ ++--------+-----------------------------------------------------------+-----------------------+ +| | s | Output | ++--------+-----------------------------------------------------------+-----------------------+ +| Type | String | Variant | +| Domain | {"\"\\\\\\\"abc\\\\\\\"\""..="{\"k\":\"v\",\"a\":\"b\"}"} | Unknown | +| Row 0 | "null" | null | +| Row 1 | "true" | true | +| Row 2 | "9223372036854775807" | 9223372036854775807 | +| Row 3 | "-32768" | -32768 | +| Row 4 | "1234.5678" | 1234.5678 | +| Row 5 | "1.912e2" | 191.2 | +| Row 6 | "\"\\\\\\\"abc\\\\\\\"\"" | "\\\"abc\\\"" | +| Row 7 | "\"databend\"" | "databend" | +| Row 8 | "{\"k\":\"v\",\"a\":\"b\"}" | {"a":"b","k":"v"} | +| Row 9 | "[1,2,3,[\"a\",\"b\",\"c\"]]" | [1,2,3,["a","b","c"]] | ++--------+-----------------------------------------------------------+-----------------------+ evaluation (internal): -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| Column | Data | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| s | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c2222226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 62, 72, 89, 110] } | -| Output | StringColumn { data: 0x080000000a76000009000000087600010010000000127600ffffffffffffff7f000c0000001076000080ffff0010000000017600adfa5c6d454a934000100000000176006666666666e667400014000000027600080000005c226162635c22000015000000027600090000006461746162656e6400001f00000003760017000000026b000200000076000261000200000062000000450000000476003d00000010300001000000103100020000001032000300000004330020000000023000020000006100023100020000006200023200020000006300000000, offsets: [0, 8, 17, 33, 45, 61, 77, 97, 118, 149, 218] } | -+--------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Column | Data | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| s | StringColumn { data: 0x6e756c6c74727565393232333337323033363835343737353830372d3332373638313233342e35363738312e3931326532225c5c5c226162635c5c5c2222226461746162656e64227b226b223a2276222c2261223a2262227d5b312c322c332c5b2261222c2262222c2263225d5d, offsets: [0, 4, 8, 27, 33, 42, 49, 62, 72, 89, 110] } | +| Output | StringColumn { data: 0x20000000000000002000000040000000200000002000000a0200ffffffffffffff7f200000002000000403000080200000002000000502044e61bc20000000200000040201780720000000100000075c226162635c2220000000100000086461746162656e644000000210000001100000011000000110000001616b6276800000042000000120000001200000015000001301020380000003100000011000000110000001616263, offsets: [0, 8, 16, 34, 46, 59, 71, 86, 102, 126, 168] } | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/src/query/functions-v2/tests/it/scalars/variant.rs b/src/query/functions-v2/tests/it/scalars/variant.rs index d6dddf8a93ff2..f289ef915df1b 100644 --- a/src/query/functions-v2/tests/it/scalars/variant.rs +++ b/src/query/functions-v2/tests/it/scalars/variant.rs @@ -34,6 +34,13 @@ fn test_parse_json(file: &mut impl Write) { run_ast(file, "parse_json('nuLL')", &[]); run_ast(file, "parse_json('null')", &[]); run_ast(file, "parse_json(' \t')", &[]); + run_ast(file, "parse_json('true')", &[]); + run_ast(file, "parse_json('false')", &[]); + run_ast(file, "parse_json('\"测试\"')", &[]); + run_ast(file, "parse_json('1234')", &[]); + run_ast(file, "parse_json('[1,2,3,4]')", &[]); + run_ast(file, "parse_json('{\"a\":\"b\",\"c\":\"d\"}')", &[]); + run_ast(file, "parse_json(s)", &[( "s", DataType::String,