Skip to content

Commit dd04a11

Browse files
authored
adding ndjson format (#218)
by @jose-sherpa While the omniparser tool outputs JSON format currently, you will often need another tool or package to stream the JSON output. While I am aware this tool will only be used for JSON output, there is a type of JSON called NDJSON which stands for new line delimited JSON. This makes it easy to stream parse and process a JSON array with no added packages or complexity since you just read each line and parse them one by one. Since a strength of omniparser is to stream parse large files, we think it makes sense to make the output easily streamable without violating the output of JSON. It also results in a smaller file size. http://ndjson.org/
1 parent 79a540b commit dd04a11

File tree

1 file changed

+26
-5
lines changed

1 file changed

+26
-5
lines changed

cli/cmd/transformCmd.go

+26-5
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ var (
3131
}
3232
schema string
3333
input string
34+
stream bool
3435
)
3536

3637
func init() {
@@ -39,6 +40,8 @@ func init() {
3940

4041
transformCmd.Flags().StringVarP(
4142
&input, "input", "i", "", "input file (optional; if not specified, stdin/pipe is used)")
43+
transformCmd.Flags().BoolVarP(
44+
&stream, "stream", "", false, "if specified, each record will be a standalone/full JSON blob and printed out immediately once transform is done")
4245
}
4346

4447
func openFile(label string, filepath string) (io.ReadCloser, error) {
@@ -86,22 +89,40 @@ func doTransform() error {
8689
if err != nil {
8790
return "", err
8891
}
92+
93+
s := string(b)
94+
if stream {
95+
return s, nil
96+
}
97+
8998
return strings.Join(
9099
strs.NoErrMapSlice(
91-
strings.Split(jsons.BPJ(string(b)), "\n"),
100+
strings.Split(jsons.BPJ(s), "\n"),
92101
func(s string) string { return "\t" + s }),
93102
"\n"), nil
94103
}
95104

96105
record, err := doOne()
97106
if err == io.EOF {
98-
fmt.Println("[]")
107+
if !stream {
108+
fmt.Println("[]")
109+
}
99110
return nil
100111
}
101112
if err != nil {
102113
return err
103114
}
104-
fmt.Printf("[\n%s", record)
115+
116+
lparen := "[\n%s"
117+
delim := ",\n%s"
118+
rparen := "\n]"
119+
if stream {
120+
lparen = "%s"
121+
delim = "\n%s"
122+
rparen = ""
123+
}
124+
125+
fmt.Printf(lparen, record)
105126
for {
106127
record, err = doOne()
107128
if err == io.EOF {
@@ -110,8 +131,8 @@ func doTransform() error {
110131
if err != nil {
111132
return err
112133
}
113-
fmt.Printf(",\n%s", record)
134+
fmt.Printf(delim, record)
114135
}
115-
fmt.Println("\n]")
136+
fmt.Println(rparen)
116137
return nil
117138
}

0 commit comments

Comments
 (0)