Skip to content

feat: adding JSON scalar #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ dependencies {
implementation "org.apache.arrow:arrow-memory-core:12.0.1"
implementation "org.apache.arrow:arrow-vector:12.0.1"

implementation "com.fasterxml.jackson.core:jackson-core:2.15.1"

implementation 'org.apache.logging.log4j:log4j-api:2.20.0'
implementation 'org.apache.logging.log4j:log4j-core:2.20.0'

Expand Down
82 changes: 82 additions & 0 deletions lib/src/main/java/io/cloudquery/scalar/JSON.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package io.cloudquery.scalar;

import static io.cloudquery.scalar.ValidationException.NO_CONVERSION_AVAILABLE;

import com.fasterxml.jackson.databind.ObjectMapper;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you have a preference. I think I saw some hints that Jackson is slightly faster, which is why I went with it, but I have no solid preference.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't mind, whatever uses less boilerplate code. I read that Jackson serializes dates as long number while gson as strings (with their own format), but probably we should worry about that now.

import io.cloudquery.types.JSONType;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import org.apache.arrow.vector.types.pojo.ArrowType;

public class JSON extends Scalar<byte[]> {
private static final JSONType dt = new JSONType();
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();

public JSON() {
super();
}

public JSON(Object value) throws ValidationException {
super(value);
}

@Override
protected void setValue(Object value) throws ValidationException {
if (value instanceof byte[] bytes) {
if (bytes.length == 0) {
return;
}
if (!isValidJSON(bytes)) {
throw new ValidationException("invalid json", dt, value);
}
this.value = bytes;
} else if (value instanceof java.lang.String string) {
set(string.getBytes());
} else {
set(parseAsJSONBytes(value));
}
}

@Override
public ArrowType dataType() {
return dt;
}

@Override
public boolean equals(Object other) {
if (other instanceof JSON o) {
if (this.value == null) {
return o.value == null;
}
return Arrays.equals(this.value, o.value);
}
return super.equals(other);
}

@Override
public java.lang.String toString() {
if (this.value != null) {
return new java.lang.String(this.value);
}
return NULL_VALUE_STRING;
}

private byte[] parseAsJSONBytes(Object value) throws ValidationException {
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
OBJECT_MAPPER.writeValue(outputStream, value);
return outputStream.toByteArray();
} catch (IOException e) {
throw new ValidationException(NO_CONVERSION_AVAILABLE, this.dataType(), value);
}
}

private boolean isValidJSON(byte[] bytes) {
try {
OBJECT_MAPPER.readTree(bytes);
return true;
} catch (IOException ex) {
return false;
}
}
}
121 changes: 121 additions & 0 deletions lib/src/test/java/io/cloudquery/scalar/JSONTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package io.cloudquery.scalar;

import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;

import io.cloudquery.types.JSONType;
import java.util.Collections;
import java.util.Map;
import lombok.AllArgsConstructor;
import org.junit.jupiter.api.Test;

public class JSONTest {
@AllArgsConstructor
public static class JsonData {
public java.lang.String name;
public int value;
}

@Test
public void testNew() {
assertDoesNotThrow(() -> new JSON());
}

@Test
public void testNewWithValidParam() {
assertDoesNotThrow(() -> new JSON("{}"));
assertDoesNotThrow(() -> new JSON("{}".getBytes()));
assertDoesNotThrow(() -> new JsonData("test", 1));
assertDoesNotThrow(() -> new JSON(new JsonData("test", 1)));
assertDoesNotThrow(() -> new JSON(new int[] {1, 2, 3}));
}

@Test
public void testNewWithInvalidParam() {
assertThrows(ValidationException.class, () -> new JSON("{\"name\":\"test\", incomplete"));
assertThrows(
ValidationException.class, () -> new JSON("{\"name\":\"test\", incomplete".getBytes()));
}

@Test
public void testToString() throws ValidationException {
JSON json = new JSON("{\"name\":\"test\", \"value\":1}");

assertEquals("{\"name\":\"test\", \"value\":1}", json.toString());
}

@Test
public void testDataType() {
JSON json = new JSON();

assertEquals(new JSONType(), json.dataType());
}

@Test
public void testIsValid() throws ValidationException {
assertTrue(new JSON("{}").isValid());

assertTrue(new JSON("{\"name\":\"test\", \"value\":1}").isValid());
assertTrue(new JSON(new int[] {1, 2, 3}).isValid());
assertTrue(new JSON(Collections.emptyList()).isValid());
assertTrue(new JSON(Map.of("foo", "bar")).isValid());
assertTrue(new JSON(Collections.emptyMap()).isValid());
assertTrue(new JSON(new String("{\"name\":\"test\", \"value\":1}")).isValid());

assertFalse(new JSON("").isValid());
assertFalse(new JSON(null).isValid());
assertFalse(new JSON(new byte[] {}).isValid());
}

@Test
public void testSet() throws ValidationException {
JSON json = new JSON();

json.set("{}");
json.set(new JsonData("test", 1));
json.set(new String("{\"name\":\"test\", \"value\":1}"));
json.set(new int[] {1, 2, 3});
}

@Test
public void testSetWithInvalidParam() {}

@Test
public void testGet() throws ValidationException {
assertByteEquals("{}", new JSON("{}"));
assertByteEquals("[1,2,3]", new JSON(new int[] {1, 2, 3}));
assertByteEquals("[1,2,3]", new JSON(new int[] {1, 2, 3}));
assertByteEquals("{\"name\":\"test\",\"value\":1}", new JSON(new JsonData("test", 1)));
assertByteEquals("{\"foo\":\"bar\"}", new JSON(Map.of("foo", "bar")));
assertByteEquals("{}", new JSON(Collections.emptyMap()));
assertByteEquals("[]", new JSON(Collections.emptyList()));
}

@Test
public void testEquals() throws ValidationException {
JSON json1 = new JSON();
JSON json2 = new JSON();

assertEquals(json1, json2);
assertNotEquals(json1, null);
assertNotEquals(json1, new Bool());
assertNotEquals(null, json1);

json1.set(new JsonData("test", 1));
assertNotEquals(json1, json2);
json2.set(new JsonData("test", 1));
assertEquals(json1, json2);
}

private void assertByteEquals(java.lang.String expected, JSON json) throws ValidationException {
assertArrayEquals(
expected.getBytes(),
json.get(),
"expected: " + expected + ", actual: " + new java.lang.String(json.get()));
}
}