@@ -54,20 +54,24 @@ class SAS7BDATReader(BaseIterator):
54
54
with given number of lines.
55
55
encoding : string, defaults to None
56
56
String encoding.
57
- convert_text : bool, deafaults to True
57
+ convert_text : bool, defaults to True
58
58
If False, text variables are left as raw bytes.
59
+ convert_header_text : bool, defaults to True
60
+ If False, header text, including column names, are left as raw
61
+ bytes.
59
62
"""
60
63
61
64
def __init__ (self , path_or_buf , index = None , convert_dates = True ,
62
65
blank_missing = True , chunksize = None , encoding = None ,
63
- convert_text = True ):
66
+ convert_text = True , convert_header_text = True ):
64
67
65
68
self .index = index
66
69
self .convert_dates = convert_dates
67
70
self .blank_missing = blank_missing
68
71
self .chunksize = chunksize
69
72
self .encoding = encoding
70
73
self .convert_text = convert_text
74
+ self .convert_header_text = convert_header_text
71
75
72
76
self .compression = ""
73
77
self .column_names_strings = []
@@ -143,10 +147,14 @@ def _get_properties(self):
143
147
self .platform = "unknown"
144
148
145
149
buf = self ._read_bytes (const .dataset_offset , const .dataset_length )
146
- self .name = buf .rstrip (b'\x00 ' ).decode ()
150
+ self .name = buf .rstrip (b'\x00 ' )
151
+ if self .convert_header_text :
152
+ self .name = self .name .decode (self .encoding )
147
153
148
154
buf = self ._read_bytes (const .file_type_offset , const .file_type_length )
149
- self .file_type = buf .rstrip (b'\x00 ' ).decode ()
155
+ self .file_type = buf .rstrip (b'\x00 ' )
156
+ if self .convert_header_text :
157
+ self .file_type = self .file_type .decode (self .encoding )
150
158
151
159
# Timestamp is epoch 01/01/1960
152
160
epoch = pd .datetime (1960 , 1 , 1 )
@@ -173,25 +181,33 @@ def _get_properties(self):
173
181
174
182
buf = self ._read_bytes (const .sas_release_offset + total_align ,
175
183
const .sas_release_length )
176
- self .sas_release = buf .rstrip (b'\x00 ' ).decode ()
184
+ self .sas_release = buf .rstrip (b'\x00 ' )
185
+ if self .convert_header_text :
186
+ self .sas_release = self .sas_release .decode (self .encoding )
177
187
178
188
buf = self ._read_bytes (const .sas_server_type_offset + total_align ,
179
189
const .sas_server_type_length )
180
- self .server_type = buf .rstrip (b'\x00 ' ).decode ()
190
+ self .server_type = buf .rstrip (b'\x00 ' )
191
+ if self .convert_header_text :
192
+ self .server_type = self .server_type .decode (self .encoding )
181
193
182
194
buf = self ._read_bytes (const .os_version_number_offset + total_align ,
183
195
const .os_version_number_length )
184
- self .os_version = buf .rstrip (b'\x00 ' ).decode ()
196
+ self .os_version = buf .rstrip (b'\x00 ' )
197
+ if self .convert_header_text :
198
+ self .os_version = self .os_version .decode (self .encoding )
185
199
186
200
buf = self ._read_bytes (const .os_name_offset + total_align ,
187
201
const .os_name_length )
188
202
buf = buf .rstrip (b'\x00 ' )
189
203
if len (buf ) > 0 :
190
- self .os_name = buf .decode ()
204
+ self .os_name = buf .decode (self . encoding )
191
205
else :
192
206
buf = self ._read_bytes (const .os_maker_offset + total_align ,
193
207
const .os_maker_length )
194
- self .os_name = buf .rstrip (b'\x00 ' ).decode ()
208
+ self .os_name = buf .rstrip (b'\x00 ' )
209
+ if self .convert_header_text :
210
+ self .os_name = self .os_name .decode (self .encoding )
195
211
196
212
# Read a single float of the given width (4 or 8).
197
213
def _read_float (self , offset , width ):
@@ -383,8 +399,10 @@ def _process_columntext_subheader(self, offset, length):
383
399
text_block_size = self ._read_int (offset , const .text_block_size_length )
384
400
385
401
buf = self ._read_bytes (offset , text_block_size )
386
- self .column_names_strings .append (
387
- buf [0 :text_block_size ].rstrip (b"\x00 " ).decode (self .encoding ))
402
+ cname = buf [0 :text_block_size ].rstrip (b"\x00 " )
403
+ if self .convert_header_text :
404
+ cname = cname .decode (self .encoding )
405
+ self .column_names_strings .append (cname )
388
406
389
407
if len (self .column_names_strings ) == 1 :
390
408
column_name = self .column_names_strings [0 ]
0 commit comments