1
- # This file is a part of Julia . License is MIT: http://julialang.org/license
1
+ # This file is a part of StringEncodings.jl . License is MIT: http://julialang.org/license
2
2
3
3
module StringEncodings
4
4
import Base: close, eof, flush, read, readall, write, show
@@ -8,6 +8,7 @@ export StringEncoder, StringDecoder, encode, decode, encodings
8
8
export StringEncodingError, OutputBufferError, IConvError
9
9
export InvalidEncodingError, InvalidSequenceError, IncompleteSequenceError
10
10
11
+ include (" encodings.jl" )
11
12
12
13
abstract StringEncodingError
13
14
@@ -62,7 +63,7 @@ function iconv_close(cd::Ptr{Void})
62
63
end
63
64
end
64
65
65
- function iconv_open (tocode, fromcode)
66
+ function iconv_open (tocode:: ASCIIString , fromcode:: ASCIIString )
66
67
p = ccall ((:iconv_open , libiconv), Ptr{Void}, (Cstring, Cstring), tocode, fromcode)
67
68
if p != Ptr {Void} (- 1 )
68
69
return p
@@ -173,14 +174,16 @@ end
173
174
# # StringEncoder
174
175
175
176
"""
176
- StringEncoder(istream, to, from="UTF-8")
177
+ StringEncoder(istream, to, from=enc "UTF-8")
177
178
178
179
Returns a new write-only I/O stream, which converts any text in the encoding `from`
179
180
written to it into text in the encoding `to` written to ostream. Calling `close` on the
180
181
stream is necessary to complete the encoding (but does not close `ostream`).
182
+
183
+ `to` and `from` can be specified either as a string or as an `Encoding` object.
181
184
"""
182
- function StringEncoder (ostream:: IO , to:: ASCIIString , from:: ASCIIString = " UTF-8" )
183
- cd = iconv_open (to, from)
185
+ function StringEncoder (ostream:: IO , to:: Encoding , from:: Encoding = enc " UTF-8" )
186
+ cd = iconv_open (ASCIIString (to), ASCIIString ( from) )
184
187
inbuf = Vector {UInt8} (BUFSIZE)
185
188
outbuf = Vector {UInt8} (BUFSIZE)
186
189
s = StringEncoder (ostream, cd, inbuf, outbuf,
@@ -190,6 +193,11 @@ function StringEncoder(ostream::IO, to::ASCIIString, from::ASCIIString="UTF-8")
190
193
s
191
194
end
192
195
196
+ StringEncoder (ostream:: IO , to:: AbstractString , from:: Encoding = enc " UTF-8" ) =
197
+ StringEncoder (ostream, Encoding (to), from)
198
+ StringEncoder (ostream:: IO , to:: AbstractString , from:: AbstractString ) =
199
+ StringEncoder (ostream, Encoding (to), Encoding (from))
200
+
193
201
# Flush input buffer and convert it into output buffer
194
202
# Returns the number of bytes written to output buffer
195
203
function flush (s:: StringEncoder )
@@ -226,16 +234,18 @@ end
226
234
# # StringDecoder
227
235
228
236
"""
229
- StringDecoder(istream, from, to= "UTF-8")
237
+ StringDecoder(istream, from::Encoding , to::Encoding=enc "UTF-8")
230
238
231
239
Returns a new read-only I/O stream, which converts text in the encoding `from`
232
240
read from `istream` into text in the encoding `to`.
233
241
242
+ `to` and `from` can be specified either as a string or as an `Encoding` object.
243
+
234
244
Note that some implementations (notably the Windows one) may accept invalid sequences
235
245
in the input data without raising an error.
236
246
"""
237
- function StringDecoder (istream:: IO , from:: ASCIIString , to:: ASCIIString = " UTF-8" )
238
- cd = iconv_open (to, from)
247
+ function StringDecoder (istream:: IO , from:: Encoding , to:: Encoding = enc " UTF-8" )
248
+ cd = iconv_open (ASCIIString (to), ASCIIString ( from) )
239
249
inbuf = Vector {UInt8} (BUFSIZE)
240
250
outbuf = Vector {UInt8} (BUFSIZE)
241
251
s = StringDecoder (istream, cd, inbuf, outbuf,
@@ -245,6 +255,11 @@ function StringDecoder(istream::IO, from::ASCIIString, to::ASCIIString="UTF-8")
245
255
s
246
256
end
247
257
258
+ StringDecoder (istream:: IO , from:: AbstractString , to:: Encoding = enc " UTF-8" ) =
259
+ StringDecoder (istream, Encoding (from), to)
260
+ StringDecoder (istream:: IO , from:: AbstractString , to:: AbstractString ) =
261
+ StringDecoder (istream, Encoding (from), Encoding (to))
262
+
248
263
# Fill input buffer and convert it into output buffer
249
264
# Returns the number of bytes written to output buffer
250
265
function fill_buffer! (s:: StringDecoder )
@@ -289,68 +304,67 @@ end
289
304
# # Convenience I/O functions
290
305
if isdefined (Base, :readstring )
291
306
@doc """
292
- readstring(stream or filename, enc::ASCIIString )
307
+ readstring(stream or filename, enc::Encoding )
293
308
294
309
Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
295
310
""" ->
296
- Base. readstring (s:: IO , enc:: ASCIIString ) = readstring (StringDecoder (s, enc))
297
- Base. readstring (filename:: AbstractString , enc:: ASCIIString ) = open (io-> readstring (io, enc), filename)
311
+ Base. readstring (s:: IO , enc:: Encoding ) = readstring (StringDecoder (s, enc))
312
+ Base. readstring (filename:: AbstractString , enc:: Encoding ) = open (io-> readstring (io, enc), filename)
298
313
else # Compatibility with Julia 0.4
299
314
@doc """
300
- readall(stream or filename, enc::ASCIIString )
315
+ readall(stream or filename, enc::Encoding )
301
316
302
317
Read the entire contents of an I/O stream or a file in encoding `enc` as a string.
303
318
""" ->
304
- Base. readall (s:: IO , enc:: ASCIIString ) = readall (StringDecoder (s, enc))
305
- Base. readall (filename:: AbstractString , enc:: ASCIIString ) = open (io-> readall (io, enc), filename)
319
+ Base. readall (s:: IO , enc:: Encoding ) = readall (StringDecoder (s, enc))
320
+ Base. readall (filename:: AbstractString , enc:: Encoding ) = open (io-> readall (io, enc), filename)
306
321
end
307
322
308
323
309
324
# # Functions to encode/decode strings
310
325
311
- encoding_string (:: Type{ASCIIString} ) = " ASCII"
312
- encoding_string (:: Type{UTF8String} ) = " UTF-8"
313
- encoding_string (:: Type{UTF16String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-16LE" : " UTF-16BE"
314
- encoding_string (:: Type{UTF32String} ) = (ENDIAN_BOM == 0x04030201 ) ? " UTF-32LE" : " UTF-32BE"
315
-
316
326
"""
317
- decode([T,] a::Vector{UInt8}, enc::ASCIIString )
327
+ decode([T,] a::Vector{UInt8}, enc)
318
328
319
329
Convert an array of bytes `a` representing text in encoding `enc` to a string of type `T`.
320
330
By default, a `UTF8String` is returned.
321
331
332
+ `enc` can be specified either as a string or as an `Encoding` object.
333
+
322
334
Note that some implementations (notably the Windows one) may accept invalid sequences
323
335
in the input data without raising an error.
324
336
"""
325
- function decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: ASCIIString )
337
+ function decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: Encoding )
326
338
b = IOBuffer (a)
327
339
try
328
- T (readbytes (StringDecoder (b, enc, encoding_string (T))))
340
+ T (readbytes (StringDecoder (b, enc, encoding (T))))
329
341
finally
330
342
close (b)
331
343
end
332
344
end
333
345
334
- decode (a:: Vector{UInt8} , enc:: ASCIIString ) = decode (UTF8String, a, enc)
346
+ decode {T<:AbstractString} (:: Type{T} , a:: Vector{UInt8} , enc:: AbstractString ) = decode (T, a, Encoding (enc))
347
+
348
+ decode (a:: Vector{UInt8} , enc:: AbstractString ) = decode (UTF8String, a, Encoding (enc))
349
+ decode (a:: Vector{UInt8} , enc:: Union{AbstractString, Encoding} ) = decode (UTF8String, a, enc)
335
350
336
351
"""
337
- encode(s::AbstractString, enc::ASCIIString )
352
+ encode(s::AbstractString, enc)
338
353
339
354
Convert string `s` to an array of bytes representing text in encoding `enc`.
355
+ `enc` can be specified either as a string or as an `Encoding` object.
340
356
"""
341
- function encode (s:: AbstractString , enc:: ASCIIString )
357
+ function encode (s:: AbstractString , enc:: Encoding )
342
358
b = IOBuffer ()
343
- p = StringEncoder (b, enc, encoding_string (typeof (s)))
359
+ p = StringEncoder (b, enc, encoding (typeof (s)))
344
360
write (p, s)
345
361
close (p)
346
362
takebuf_array (b)
347
363
end
348
364
365
+ encode (s:: AbstractString , enc:: AbstractString ) = encode (s, Encoding (enc))
349
366
350
- # # Function to list supported encodings
351
- include (" encodings.jl" )
352
-
353
- function test_encoding (enc)
367
+ function test_encoding (enc:: ASCIIString )
354
368
# We assume that an encoding is supported if it's possible to convert from it to UTF-8:
355
369
cd = ccall ((:iconv_open , libiconv), Ptr{Void}, (Cstring, Cstring), enc, " UTF-8" )
356
370
if cd == Ptr {Void} (- 1 )
0 commit comments