Skip to content

Commit 3434a2e

Browse files
committed
readuntil() for any delim type
returns a string for Char delim, otherwise an array readuntil(s, uint8('\n')) provides a way to read a line without an encoding check streams are encouraged to provide an efficient readuntil(s, ::Uint8), then reading until ASCII delimiters is fast, leaving the encoding logic to higher-level layers in io.jl and string.jl. for #1792
1 parent de27d83 commit 3434a2e

File tree

6 files changed

+35
-49
lines changed

6 files changed

+35
-49
lines changed

Diff for: base/io.jl

+20-4
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,12 @@ function read(s::IO, ::Type{Char})
122122
char(c)
123123
end
124124

125-
function readuntil(s::IO, delim)
125+
function readuntil(s::IO, delim::Char)
126+
if delim < 0x80
127+
data = readuntil(s, uint8(delim))
128+
enc = byte_string_classify(data)
129+
return (enc==1) ? ASCIIString(data) : UTF8String(data)
130+
end
126131
out = memio()
127132
while !eof(s)
128133
c = read(s, Char)
@@ -134,6 +139,18 @@ function readuntil(s::IO, delim)
134139
takebuf_string(out)
135140
end
136141

142+
function readuntil{T}(s::IO, delim::T)
143+
out = T[]
144+
while !eof(s)
145+
c = read(s, T)
146+
push(out, c)
147+
if c == delim
148+
break
149+
end
150+
end
151+
out
152+
end
153+
137154
readline(s::IO) = readuntil(s, '\n')
138155

139156
function readall(s::IO)
@@ -395,9 +412,8 @@ end
395412

396413
write(x) = write(OUTPUT_STREAM::IOStream, x)
397414

398-
function readuntil(s::IOStream, delim)
399-
# TODO: faster versions that avoid the encoding check
400-
ccall(:jl_readuntil, ByteString, (Ptr{Void}, Uint8), s.ios, delim)
415+
function readuntil(s::IOStream, delim::Uint8)
416+
ccall(:jl_readuntil, Array{Uint8,1}, (Ptr{Void}, Uint8), s.ios, delim)
401417
end
402418

403419
function readall(s::IOStream)

Diff for: base/iostring.jl

+2-3
Original file line numberDiff line numberDiff line change
@@ -196,11 +196,10 @@ function memchr(buf::IOString, delim)
196196
q = ccall(:memchr,Ptr{Uint8},(Ptr{Uint8},Int32,Int32),p,delim,nb_available(buf))
197197
nb = (q == C_NULL ? 0 : q-p+1)
198198
end
199-
function readuntil(io::IOString, delim)
199+
function readuntil(io::IOString, delim::Uint8)
200200
nb = memchr(io, delim)
201201
if nb == 0
202202
nb = nb_available(io)
203203
end
204-
readbytes(io,nb)
204+
read(io, Array(Uint8, nb))
205205
end
206-

Diff for: base/string.jl

+3-2
Original file line numberDiff line numberDiff line change
@@ -682,8 +682,9 @@ unescape_string(s::String) = sprint(length(s), print_unescaped, s)
682682

683683
## checking UTF-8 & ACSII validity ##
684684

685-
byte_string_classify(s::ByteString) =
686-
ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), s.data, length(s))
685+
byte_string_classify(data::Array{Uint8,1}) =
686+
ccall(:u8_isvalid, Int32, (Ptr{Uint8}, Int), data, length(data))
687+
byte_string_classify(s::ByteString) = byte_string_classify(s.data)
687688
# 0: neither valid ASCII nor UTF-8
688689
# 1: valid ASCII
689690
# 2: valid UTF-8

Diff for: extras/gzip.jl

+6-29
Original file line numberDiff line numberDiff line change
@@ -402,46 +402,23 @@ end
402402
readall(s::GZipStream) = readall(s, Z_BIG_BUFSIZE)
403403

404404
# TODO: Create a c-wrapper based on gzreadline
405-
function readuntil(s::GZipStream, delim)
406-
if delim == '\n'
407-
return readline(s)
408-
else
409-
buf = memio(GZ_LINE_BUFSIZE, false)
410-
c = read(s, Char)
411-
print(buf, c)
412-
while c != delim && !eof(s)
413-
try
414-
c = read(s, Char)
415-
print(buf, c)
416-
catch e
417-
if !isa(e, EOFError)
418-
throw(e)
419-
end
420-
end
421-
end
422-
check_eof(s)
423-
takebuf_string(buf)
424-
end
425-
end
426-
427-
428-
function readline(s::GZipStream)
405+
function readuntil(s::GZipStream, c::Uint8)
429406
buf = Array(Uint8, GZ_LINE_BUFSIZE)
430407
pos = 1
431408

432409
if gzgets(s, buf) == C_NULL # Throws an exception on error
433-
return ""
410+
return buf[1:0]
434411
end
435412

436413
while(true)
437414
# since gzgets didn't return C_NULL, there must be a \0 in the buffer
438415
eos = memchr(buf, '\0', pos)
439-
if eos == 1 || buf[eos-1] == '\n'
440-
return bytestring(buf[1:eos-1])
416+
if eos == 1 || buf[eos-1] == c
417+
return buf[1:eos-1]
441418
end
442419

443420
# If we're at the end of the file, return the string
444-
if eof(s) return bytestring(buf[1:eos-1]) end
421+
if eof(s) return buf[1:eos-1] end
445422

446423
# Otherwise, append to the end of the previous buffer
447424

@@ -454,7 +431,7 @@ function readline(s::GZipStream)
454431
if gzgets(s, pointer(buf)+pos-1, GZ_LINE_BUFSIZE) == C_NULL
455432
# eof(s); remove extra buffer space
456433
grow(buf, -GZ_LINE_BUFSIZE)
457-
return bytestring(buf)
434+
return buf
458435
end
459436
end
460437
end

Diff for: src/sys.c

+1-8
Original file line numberDiff line numberDiff line change
@@ -310,14 +310,7 @@ jl_value_t *jl_readuntil(ios_t *s, uint8_t delim)
310310
((char*)a->data)[n] = '\0';
311311
}
312312
}
313-
JL_GC_PUSH(&a);
314-
jl_struct_type_t* string_type = u8_isvalid(jl_array_data(a), jl_array_len(a)) == 1 ? // ASCII
315-
jl_ascii_string_type : jl_utf8_string_type;
316-
jl_value_t *str = alloc_2w();
317-
str->type = (jl_type_t*)string_type;
318-
jl_set_nth_field(str, 0, (jl_value_t*)a);
319-
JL_GC_POP();
320-
return str;
313+
return (jl_value_t*)a;
321314
}
322315

323316
void jl_free2(void *p, void *hint)

Diff for: test/perf2/perf2.jl

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@ srand(1776) # get more consistent times
1818

1919
require("$JULIA_HOME/../../examples/list.jl")
2020

21-
function listn1n2(n1::Int64,n2::Int64)
22-
l1 = Nil{Int64}()
21+
function listn1n2(n1::Int,n2::Int)
22+
l1 = Nil{Int}()
2323
for i=n2:-1:n1
24-
l1 = Cons{Int64}(i,l1)
24+
l1 = Cons{Int}(i,l1)
2525
end
2626
l1
2727
end

0 commit comments

Comments
 (0)