Skip to content

Commit 08eaf63

Browse files
author
Jason Ward
committed
refs #51: No longer assuming that all docx files must have styles.xml
1 parent 1520e4d commit 08eaf63

File tree

1 file changed

+16
-13
lines changed

1 file changed

+16
-13
lines changed

pydocx/DocxParser.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,23 +46,24 @@ class DocxParser:
4646
__metaclass__ = ABCMeta
4747
pre_processor_class = PydocxPrePorcessor
4848

49+
def _extract_xml(self, f, xml_path):
50+
try:
51+
return f.read(xml_path)
52+
except KeyError:
53+
return None
54+
4955
def _build_data(self, path, *args, **kwargs):
5056
with ZipFile(path) as f:
57+
# These must be in the ZIP in order for the docx to be valid.
5158
self.document_text = f.read('word/document.xml')
52-
self.styles_text = f.read('word/styles.xml')
53-
try:
54-
self.fonts = f.read('/word/fontTable.xml')
55-
except KeyError:
56-
self.fonts = None
57-
try: # Only present if there are lists
58-
self.numbering_text = f.read('word/numbering.xml')
59-
except KeyError:
60-
self.numbering_text = None
61-
try: # Only present if there are comments
62-
self.comment_text = f.read('word/comments.xml')
63-
except KeyError:
64-
self.comment_text = None
6559
self.relationship_text = f.read('word/_rels/document.xml.rels')
60+
61+
# These are all optional.
62+
self.styles_text = self._extract_xml(f, 'word/styles.xml')
63+
self.fonts = self._extract_xml(f, 'word/fontTable.xml')
64+
self.numbering_text = self._extract_xml(f, 'word/numbering.xml')
65+
self.comment_text = self._extract_xml(f, 'word/comments.xml')
66+
6667
zipped_image_files = [
6768
e for e in f.infolist()
6869
if e.filename.startswith('word/media/')
@@ -79,6 +80,8 @@ def _build_data(self, path, *args, **kwargs):
7980
self.comment_root = parse_xml_from_string(self.comment_text)
8081

8182
def _parse_styles(self):
83+
if self.styles_text is None:
84+
return {}
8285
tree = parse_xml_from_string(self.styles_text)
8386
result = {}
8487
for style in find_all(tree, 'style'):

0 commit comments

Comments
 (0)