Skip to content

Commit 524b3e5

Browse files
committed
Revert jl_parse to use raw buffers and sizes
The problem with using String is that many raw buffer types cannot be converted to String without copying. This seems pretty unfortunate, so instead let's just use a raw buffer. Luckily Tokenize is pretty great and uses IOBuffer internally, so we may use unsafe_wrap.
1 parent 52e9eef commit 524b3e5

File tree

6 files changed

+52
-59
lines changed

6 files changed

+52
-59
lines changed

base/client.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -153,9 +153,10 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
153153
end
154154

155155
function _parse_input_line_core(s::String, filename::String)
156-
JL_PARSE_TOPLEVEL = 3
157-
ex,_ = ccall(:jl_parse, Any, (Any, Any, Cint, Cint),
158-
s, filename, 1, JL_PARSE_TOPLEVEL)
156+
JL_PARSE_ALL = 3
157+
ex,_ = ccall(:jl_parse, Any,
158+
(Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t, Csize_t, Cint),
159+
s, sizeof(s), filename, sizeof(filename), 0, JL_PARSE_ALL)
159160
if ex isa Expr && ex.head === :toplevel
160161
if isempty(ex.args)
161162
return nothing

base/meta.jl

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -171,21 +171,25 @@ julia> Meta.parse("x = 3, y = 5", 5)
171171
"""
172172
function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true,
173173
depwarn::Bool=true)
174-
# pos is one based byte offset.
174+
if pos < 1 || pos > ncodeunits(str) + 1
175+
throw(BoundsError(str, pos))
176+
end
175177
filename = "none"
176178
JL_PARSE_ATOM = 1
177179
JL_PARSE_STATEMENT = 2
178180
rule = greedy ? JL_PARSE_STATEMENT : JL_PARSE_ATOM
179181
# For now, assume all parser warnings are depwarns
180182
# TODO: remove parser-depwarn; parser no longer emits warnings.
181183
ex, pos = with_logger(depwarn ? current_logger() : NullLogger()) do
182-
ccall(:jl_parse, Any, (String, String, Cint, Cint),
183-
str, filename, pos, rule)
184+
ccall(:jl_parse, Any,
185+
(Ptr{UInt8}, Csize_t, Ptr{UInt8}, Csize_t, Csize_t, Cint),
186+
str, sizeof(str), filename, sizeof(filename), pos-1, rule)
184187
end
185188
if raise && isa(ex,Expr) && ex.head === :error
186189
throw(ParseError(ex.args[1]))
187190
end
188-
return ex, pos
191+
# internal pos is zero-based byte offset
192+
return ex, pos+1
189193
end
190194

191195
"""

src/ast.c

Lines changed: 18 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -779,42 +779,38 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v)
779779
return julia_to_scm_noalloc2(fl_ctx, v);
780780
}
781781

782-
// Parse string `content` starting at 1-based index `start_pos` attributing the
782+
// Parse string `content` starting at 0-based index `offset` attributing the
783783
// content to `filename`. Return an svec of (parse_result, final_pos)
784-
JL_DLLEXPORT jl_value_t *jl_fl_parse(jl_value_t *text, jl_value_t *filename,
785-
int start_pos, int rule)
784+
JL_DLLEXPORT jl_value_t *jl_fl_parse(const char* text, size_t text_len,
785+
const char* filename, size_t filename_len,
786+
size_t offset, int rule)
786787
{
787788
JL_TIMING(PARSING);
788-
if (!jl_is_string(text) || !jl_is_string(filename)) {
789-
jl_errorf("File content and name must be Strings");
789+
if (offset > text_len) {
790+
jl_value_t *textstr = jl_pchar_to_string(text, text_len);
791+
JL_GC_PUSH1(&textstr);
792+
jl_bounds_error(textstr, jl_box_long(offset));
790793
}
791-
if (start_pos < 1 || start_pos > (int)jl_string_len(text) + 1) {
792-
// jl_bounds_error roots the arguments.
793-
jl_bounds_error(jl_box_long(start_pos), text);
794-
}
795-
else if (start_pos != 1 && rule == JL_PARSE_TOPLEVEL) {
794+
else if (offset != 0 && rule == JL_PARSE_ALL) {
796795
jl_error("Partial parsing not support by top level grammar rule");
797796
}
798797

799798
jl_ast_context_t *ctx = jl_ast_ctx_enter();
800799
fl_context_t *fl_ctx = &ctx->fl;
801-
value_t fl_text = cvalue_static_cstrn(fl_ctx, jl_string_data(text),
802-
jl_string_len(text));
803-
value_t fl_filename = cvalue_static_cstrn(fl_ctx, jl_string_data(filename),
804-
jl_string_len(filename));
800+
value_t fl_text = cvalue_static_cstrn(fl_ctx, text, text_len);
801+
value_t fl_filename = cvalue_static_cstrn(fl_ctx, filename, filename_len);
805802
value_t fl_expr;
806803
size_t pos1 = 0;
807-
if (rule == JL_PARSE_TOPLEVEL) {
804+
if (rule == JL_PARSE_ALL) {
808805
value_t e = fl_applyn(fl_ctx, 2, symbol_value(symbol(fl_ctx, "jl-parse-all")),
809806
fl_text, fl_filename);
810807
fl_expr = e;
811-
pos1 = e == fl_ctx->FL_EOF ? jl_string_len(text) : 0;
808+
pos1 = e == fl_ctx->FL_EOF ? text_len : 0;
812809
}
813810
else if (rule == JL_PARSE_STATEMENT || rule == JL_PARSE_ATOM) {
814811
value_t greedy = rule == JL_PARSE_STATEMENT ? fl_ctx->T : fl_ctx->F;
815-
value_t offset = fixnum(start_pos-1);
816812
value_t p = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-parse-one")),
817-
fl_text, fl_filename, offset, greedy);
813+
fl_text, fl_filename, fixnum(offset), greedy);
818814
fl_expr = car_(p);
819815
pos1 = tosize(fl_ctx, cdr_(p), "parse");
820816
}
@@ -824,12 +820,12 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(jl_value_t *text, jl_value_t *filename,
824820
}
825821

826822
// Convert to julia values
827-
jl_value_t *expr=NULL, *end_pos=NULL;
828-
JL_GC_PUSH2(&expr, &end_pos);
823+
jl_value_t *expr=NULL, *end_offset=NULL;
824+
JL_GC_PUSH2(&expr, &end_offset);
829825
expr = fl_expr == fl_ctx->FL_EOF ? jl_nothing : scm_to_julia(fl_ctx, fl_expr, NULL);
830-
end_pos = jl_box_long(pos1 + 1);
826+
end_offset = jl_box_long(pos1);
831827
jl_ast_ctx_leave(ctx);
832-
jl_value_t *result = (jl_value_t*)jl_svec2(expr, end_pos);
828+
jl_value_t *result = (jl_value_t*)jl_svec2(expr, end_offset);
833829
JL_GC_POP();
834830
return result;
835831
}

src/frontend.c

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,39 @@ JL_DLLEXPORT void jl_set_parser(jl_parse_func_t parser)
1818
jl_current_parser = parser;
1919
}
2020

21-
JL_DLLEXPORT jl_value_t *jl_parse(jl_value_t *text, jl_value_t *filename,
22-
int start_pos, int rule)
21+
JL_DLLEXPORT jl_value_t *jl_parse(const char* text, size_t text_len,
22+
const char* filename, size_t filename_len,
23+
size_t offset, int rule)
2324
{
24-
return (*jl_current_parser)(text, filename, start_pos, rule);
25+
return (*jl_current_parser)(text, text_len, filename, filename_len, offset, rule);
2526
}
2627

2728
// C API
2829
// parse an entire string like a file, reading multiple expressions
29-
JL_DLLEXPORT jl_value_t *jl_parse_all(const char *str, size_t len,
30+
JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
3031
const char *filename, size_t filename_len)
3132
{
32-
jl_value_t *text = NULL;
33-
jl_value_t *filename_ = NULL;
34-
JL_GC_PUSH2(&text, &filename_);
35-
text = jl_pchar_to_string(str, len);
36-
filename_ = jl_pchar_to_string(filename, filename_len);
37-
jl_value_t *p = jl_parse(text, filename_, 1, JL_PARSE_TOPLEVEL);
38-
JL_GC_POP();
33+
jl_value_t *p = jl_parse(text, text_len, filename, filename_len,
34+
0, JL_PARSE_ALL);
3935
return jl_svecref(p, 0);
4036
}
4137

4238
// this is for parsing one expression out of a string, keeping track of
4339
// the current position.
44-
JL_DLLEXPORT jl_value_t *jl_parse_string(const char *str, size_t len,
40+
// FIXME: Add filename?
41+
JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
4542
int pos0, int greedy)
4643
{
47-
jl_value_t *text = NULL;
48-
jl_value_t *filename_ = NULL;
49-
JL_GC_PUSH2(&text, &filename_);
50-
text = jl_pchar_to_string(str, len);
51-
filename_ = jl_cstr_to_string("none");
52-
jl_value_t *result = jl_parse(text, filename_, pos0+1,
53-
greedy ? JL_PARSE_STATEMENT : JL_PARSE_ATOM);
54-
JL_GC_POP();
55-
return result;
44+
return jl_parse(text, text_len, "none", 4,
45+
pos0, greedy ? JL_PARSE_STATEMENT : JL_PARSE_ATOM);
5646
}
5747

5848
// deprecated
5949
JL_DLLEXPORT jl_value_t *jl_parse_input_line(const char *str, size_t len,
6050
const char *filename, size_t filename_len)
6151
{
6252
return jl_parse_all(str, len, filename, filename_len);
63-
}
53+
}
6454

6555
#ifdef __cplusplus
6656
}

src/julia.h

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,13 +1600,14 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t
16001600
typedef enum {
16011601
JL_PARSE_ATOM = 1,
16021602
JL_PARSE_STATEMENT = 2,
1603-
JL_PARSE_TOPLEVEL = 3,
1603+
JL_PARSE_ALL = 3,
16041604
} jl_parse_rule_t;
16051605

16061606
// parsing
1607-
JL_DLLEXPORT jl_value_t *jl_parse(jl_value_t *text, jl_value_t *filename,
1608-
int start_pos, int rule);
1609-
// TODO: Deprecate or convert to passing String to the next two?
1607+
JL_DLLEXPORT jl_value_t *jl_parse(const char* text, size_t text_len,
1608+
const char* filename, size_t filename_len,
1609+
size_t offset, int rule);
1610+
// Convenince functions
16101611
JL_DLLEXPORT jl_value_t *jl_parse_all(const char *str, size_t len,
16111612
const char *filename, size_t filename_len);
16121613
JL_DLLEXPORT jl_value_t *jl_parse_string(const char *str, size_t len,
@@ -1620,7 +1621,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
16201621
JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule);
16211622
JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
16221623
const char *file, int line);
1623-
// deprecated; use jl_parse
1624+
// deprecated; use jl_parse_all
16241625
JL_DLLEXPORT jl_value_t *jl_parse_input_line(const char *str, size_t len,
16251626
const char *filename, size_t filename_len);
16261627

src/julia_internal.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -639,11 +639,12 @@ jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs
639639
int jl_has_meta(jl_array_t *body, jl_sym_t *sym);
640640

641641
// Parser replacement
642-
typedef jl_value_t* (*jl_parse_func_t)(jl_value_t*, jl_value_t*, int, int);
642+
typedef jl_value_t* (*jl_parse_func_t)(const char*, size_t, const char*, size_t, size_t, int);
643643
JL_DLLEXPORT void jl_set_parser(jl_parse_func_t parser);
644644
// Builtin flisp parser
645-
JL_DLLEXPORT jl_value_t *jl_fl_parse(jl_value_t *text, jl_value_t *filename,
646-
int start_pos, int rule);
645+
JL_DLLEXPORT jl_value_t *jl_fl_parse(const char* text, size_t text_len,
646+
const char* filename, size_t filename_len,
647+
size_t offset, int rule);
647648

648649
//--------------------------------------------------
649650
// Backtraces

0 commit comments

Comments
 (0)