1
1
import io
2
2
import re
3
3
import zipfile
4
- from datetime import datetime , time , timedelta
4
+ from datetime import time , datetime , timedelta
5
5
6
6
from lxml import etree
7
7
from pyexcel_io ._compact import OrderedDict
11
11
WORK_BOOK = "xl/workbook.xml"
12
12
SHEET_MATCHER = "xl/worksheets/(work)?sheet([0-9]+)?.xml"
13
13
SHEET_INDEX_MATCHER = "xl/worksheets/(work)?sheet(([0-9]+)?).xml"
14
- XLSX_ROW_MATCH = re .compile (b ".*?(<row.*?<\/.*?row>).*?" , re .MULTILINE )
14
+ XLSX_ROW_MATCH = re .compile (rb ".*?(<row.*?<\/.*?row>).*?" , re .MULTILINE )
15
15
NUMBER_FMT_MATCHER = re .compile (
16
- b ".*?(<numFmts.*?<\/.*?numFmts>).*?" , re .MULTILINE
16
+ rb ".*?(<numFmts.*?<\/.*?numFmts>).*?" , re .MULTILINE
17
17
)
18
18
XFS_FMT_MATCHER = re .compile (
19
- b ".*?(<cellXfs.*?<\/.*?cellXfs>).*?" , re .MULTILINE
19
+ rb ".*?(<cellXfs.*?<\/.*?cellXfs>).*?" , re .MULTILINE
20
20
)
21
- SHEET_FMT_MATCHER = re .compile (b ".*?(<sheet .*?\/>).*?" , re .MULTILINE )
22
- DATE_1904_MATCHER = re .compile (b ".*?(<workbookPr.*?\/>).*?" , re .MULTILINE )
21
+ SHEET_FMT_MATCHER = re .compile (rb ".*?(<sheet .*?\/>).*?" , re .MULTILINE )
22
+ DATE_1904_MATCHER = re .compile (rb ".*?(<workbookPr.*?\/>).*?" , re .MULTILINE )
23
23
# "xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"
24
24
# But it not used for now
25
25
X14AC_NAMESPACE = b'xmlns:x14ac="http://not.used.com/"'
@@ -140,8 +140,9 @@ def __extract_book_properties(self):
140
140
book_content = self .zip_file .open (WORK_BOOK ).read ()
141
141
return parse_book_properties (book_content )
142
142
143
- def __del__ (self ):
144
- self .zip_file .close ()
143
+ def close (self ):
144
+ if self .zip_file :
145
+ self .zip_file .close ()
145
146
146
147
def make_tables (self ):
147
148
sheet_files = find_sheets (self .zip_file .namelist ())
@@ -216,9 +217,9 @@ def parse_cell_type(cell):
216
217
cell_type = None
217
218
if cell .style_string :
218
219
date_time_flag = (
219
- re .match ("^\d+(\.\d+)?$" , cell .value )
220
+ re .match (r "^\d+(\.\d+)?$" , cell .value )
220
221
and re .match (".*[hsmdyY]" , cell .style_string )
221
- and not re .match (".*\[.*[dmhys].*\]" , cell .style_string )
222
+ and not re .match (r ".*\[.*[dmhys].*\]" , cell .style_string )
222
223
)
223
224
if cell .style_string in FORMATS :
224
225
cell_type = FORMATS [cell .style_string ]
@@ -227,7 +228,7 @@ def parse_cell_type(cell):
227
228
cell_type = "time"
228
229
else :
229
230
cell_type = "date"
230
- elif re .match ("^-?\d+(.\d+)?$" , cell .value ):
231
+ elif re .match (r "^-?\d+(.\d+)?$" , cell .value ):
231
232
cell_type = "float"
232
233
return cell_type
233
234
@@ -313,9 +314,12 @@ def parse_book_properties(book_content):
313
314
properties ["date1904" ] = value .lower ().strip () == "true"
314
315
else :
315
316
properties ["date1904" ] = False
316
- namespaces = {
317
- "r" : "http://schemas.openxmlformats.org/officeDocument/2006/relationships" # flake8: noqa
318
- }
317
+
318
+ ns = (
319
+ "http://schemas.openxmlformats.org/"
320
+ + "officeDocument/2006/relationships"
321
+ )
322
+ namespaces = {"r" : ns }
319
323
320
324
xlsx_header = u"<wrapper {0}>" .format (
321
325
" " .join ('xmlns:{0}="{1}"' .format (k , v ) for k , v in namespaces .items ())
0 commit comments