Skip to content

Commit 2094291

Browse files
author
Yorhel
committed
Add an ArrayDeserializer to read a JSON array as a stream
This mimics the StreamDeserializer API and implements issue serde-rs#404.
1 parent e6b02d1 commit 2094291

File tree

2 files changed

+243
-0
lines changed

2 files changed

+243
-0
lines changed

src/de.rs

+124
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,19 @@ impl<'de, R: Read<'de>> Deserializer<R> {
155155
}
156156
}
157157

158+
/// Parse the JSON array as an iterator over values of type T.
159+
pub fn into_array_iter<T>(self) -> ArrayDeserializer<'de, R, T>
160+
where
161+
T: de::Deserialize<'de>,
162+
{
163+
ArrayDeserializer {
164+
de: self,
165+
started: false,
166+
output: PhantomData,
167+
lifetime: PhantomData,
168+
}
169+
}
170+
158171
/// Parse arbitrarily deep JSON structures without any consideration for
159172
/// overflowing the stack.
160173
///
@@ -2149,6 +2162,117 @@ where
21492162
}
21502163
}
21512164

2165+
2166+
2167+
//////////////////////////////////////////////////////////////////////////////
2168+
2169+
/// Iterator that deserializes an array into multiple JSON values.
2170+
///
2171+
/// An array deserializer can be created from any JSON deserializer using the
2172+
/// `Deserializer::into_array_iter` method.
2173+
///
2174+
/// The top-level data should be a JSON array, but each array element can consist of any JSON
2175+
/// value. An array deserializer only needs to keep a single array element in memory, and is
2176+
/// therefore preferable over deserializing into a container type such as `Vec` when the complete
2177+
/// array is too large to fit in memory.
2178+
///
2179+
/// ```edition2018
2180+
/// use serde_json::{Deserializer, Value};
2181+
///
2182+
/// fn main() {
2183+
/// let data = "[{\"k\": 3}, 1, \"cool\", \"stuff\", [0, 1, 2]]";
2184+
///
2185+
/// let iter = Deserializer::from_str(data).into_array_iter::<Value>();
2186+
///
2187+
/// for value in iter {
2188+
/// println!("{}", value.unwrap());
2189+
/// }
2190+
/// }
2191+
/// ```
2192+
pub struct ArrayDeserializer<'de, R, T> {
2193+
de: Deserializer<R>,
2194+
started: bool, // True if we have consumed the first '['
2195+
output: PhantomData<T>,
2196+
lifetime: PhantomData<&'de ()>,
2197+
}
2198+
2199+
impl<'de, R, T> ArrayDeserializer<'de, R, T>
2200+
where
2201+
R: read::Read<'de>,
2202+
T: de::Deserialize<'de>,
2203+
{
2204+
/// Create a JSON array deserializer from one of the possible serde_json
2205+
/// input sources.
2206+
///
2207+
/// Typically it is more convenient to use one of these methods instead:
2208+
///
2209+
/// - Deserializer::from_str(...).into_array_iter()
2210+
/// - Deserializer::from_bytes(...).into_array_iter()
2211+
/// - Deserializer::from_reader(...).into_array_iter()
2212+
pub fn new(read: R) -> Self {
2213+
ArrayDeserializer {
2214+
de: Deserializer::new(read),
2215+
started: false,
2216+
output: PhantomData,
2217+
lifetime: PhantomData,
2218+
}
2219+
}
2220+
2221+
fn end(&mut self) -> Option<Result<T>> {
2222+
self.de.eat_char();
2223+
match self.de.end() {
2224+
Ok(_) => None,
2225+
Err(e) => Some(Err(e)),
2226+
}
2227+
}
2228+
2229+
fn next_value(&mut self) -> Option<Result<T>> {
2230+
match de::Deserialize::deserialize(&mut self.de) {
2231+
Ok(v) => Some(Ok(v)),
2232+
Err(e) => Some(Err(e))
2233+
}
2234+
}
2235+
}
2236+
2237+
impl<'de, R, T> Iterator for ArrayDeserializer<'de, R, T>
2238+
where
2239+
R: Read<'de>,
2240+
T: de::Deserialize<'de>,
2241+
{
2242+
type Item = Result<T>;
2243+
2244+
fn next(&mut self) -> Option<Result<T>> {
2245+
match self.de.parse_whitespace() {
2246+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2247+
Ok(Some(b'[')) if !self.started => {
2248+
self.started = true;
2249+
self.de.eat_char();
2250+
2251+
// We have to peek at the next character here to handle an empty array.
2252+
match self.de.parse_whitespace() {
2253+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2254+
Ok(Some(b']')) => self.end(),
2255+
Ok(Some(_)) => self.next_value(),
2256+
Err(e) => Some(Err(e)),
2257+
}
2258+
},
2259+
Ok(Some(b']')) if self.started => self.end(),
2260+
Ok(Some(b',')) if self.started => {
2261+
self.de.eat_char();
2262+
2263+
match self.de.parse_whitespace() {
2264+
Ok(None) => Some(Err(self.de.peek_error(ErrorCode::EofWhileParsingValue))),
2265+
Ok(Some(b']')) => Some(Err(self.de.peek_error(ErrorCode::TrailingComma))),
2266+
Ok(Some(_)) => self.next_value(),
2267+
Err(e) => Some(Err(e)),
2268+
}
2269+
},
2270+
Ok(Some(_)) => Some(Err(self.de.peek_error(ErrorCode::ExpectedSomeValue))),
2271+
Err(e) => Some(Err(e)),
2272+
}
2273+
}
2274+
}
2275+
21522276
//////////////////////////////////////////////////////////////////////////////
21532277

21542278
fn from_trait<'de, R, T>(read: R) -> Result<T>

tests/array.rs

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#![cfg(not(feature = "preserve_order"))]
2+
3+
extern crate serde;
4+
5+
#[macro_use]
6+
extern crate serde_json;
7+
8+
use serde_json::{Deserializer, Value};
9+
10+
// Rustfmt issue https://github.com/rust-lang-nursery/rustfmt/issues/2740
11+
#[cfg_attr(rustfmt, rustfmt_skip)]
12+
macro_rules! test_stream {
13+
($data:expr, $ty:ty, |$stream:ident| $test:block) => {
14+
{
15+
let de = Deserializer::from_str($data);
16+
let mut $stream = de.into_array_iter::<$ty>();
17+
$test
18+
}
19+
{
20+
let de = Deserializer::from_slice($data.as_bytes());
21+
let mut $stream = de.into_array_iter::<$ty>();
22+
$test
23+
}
24+
{
25+
let mut bytes = $data.as_bytes();
26+
let de = Deserializer::from_reader(&mut bytes);
27+
let mut $stream = de.into_array_iter::<$ty>();
28+
$test
29+
}
30+
};
31+
}
32+
33+
#[test]
34+
fn test_json_array_empty() {
35+
let data = "[]";
36+
37+
test_stream!(data, Value, |stream| {
38+
assert!(stream.next().is_none());
39+
});
40+
}
41+
42+
#[test]
43+
fn test_json_array_whitespace() {
44+
let data = "\r [\n{\"x\":42}\t, {\"y\":43}\n] \t\n";
45+
46+
test_stream!(data, Value, |stream| {
47+
assert_eq!(stream.next().unwrap().unwrap()["x"], 42);
48+
49+
assert_eq!(stream.next().unwrap().unwrap()["y"], 43);
50+
51+
assert!(stream.next().is_none());
52+
});
53+
}
54+
55+
#[test]
56+
fn test_json_array_truncated() {
57+
let data = "[{\"x\":40},{\"x\":";
58+
59+
test_stream!(data, Value, |stream| {
60+
assert_eq!(stream.next().unwrap().unwrap()["x"], 40);
61+
62+
assert!(stream.next().unwrap().unwrap_err().is_eof());
63+
});
64+
}
65+
66+
#[test]
67+
fn test_json_array_primitive() {
68+
let data = "[{}, true, 1, [], 1.0, \"hey\", null]";
69+
70+
test_stream!(data, Value, |stream| {
71+
assert_eq!(stream.next().unwrap().unwrap(), json!({}));
72+
73+
assert_eq!(stream.next().unwrap().unwrap(), true);
74+
75+
assert_eq!(stream.next().unwrap().unwrap(), 1);
76+
77+
assert_eq!(stream.next().unwrap().unwrap(), json!([]));
78+
79+
assert_eq!(stream.next().unwrap().unwrap(), 1.0);
80+
81+
assert_eq!(stream.next().unwrap().unwrap(), "hey");
82+
83+
assert_eq!(stream.next().unwrap().unwrap(), Value::Null);
84+
85+
assert!(stream.next().is_none());
86+
});
87+
}
88+
89+
#[test]
90+
fn test_json_array_tailing_data() {
91+
let data = "[]e";
92+
93+
test_stream!(data, Value, |stream| {
94+
let second = stream.next().unwrap().unwrap_err();
95+
assert_eq!(second.to_string(), "trailing characters at line 1 column 3");
96+
});
97+
}
98+
99+
#[test]
100+
fn test_json_array_tailing_comma() {
101+
let data = "[true,]";
102+
103+
test_stream!(data, Value, |stream| {
104+
assert_eq!(stream.next().unwrap().unwrap(), true);
105+
106+
let second = stream.next().unwrap().unwrap_err();
107+
assert_eq!(second.to_string(), "trailing comma at line 1 column 7");
108+
});
109+
}
110+
111+
#[test]
112+
fn test_json_array_eof() {
113+
let data = "";
114+
115+
test_stream!(data, Value, |stream| {
116+
let second = stream.next().unwrap().unwrap_err();
117+
assert_eq!(second.to_string(), "EOF while parsing a value at line 1 column 0");
118+
});
119+
}

0 commit comments

Comments
 (0)