Merge pull request #62 from OpenScienceFramework/issue_62

jlward · jlward · commit 1d670bebcb0f · 2013-09-19T08:44:07.000-07:00
Create utility scripts for docx2html and docx2markdown
diff --git a/CHANGELOG b/CHANGELOG
@@ -1,6 +1,9 @@
 
 Changelog
 =========
+* 0.3.12
+    * Added command line support to convert from docx to either html or
+      markdown.
 * 0.3.11
     * The non breaking hyphen tag was not correctly being imported. This issue
       has been fixed.
diff --git a/README.rst b/README.rst
@@ -231,3 +231,8 @@ Optional Arguments
 ##################
 
 You can pass in `convert_root_level_upper_roman=True` to the parser and it will convert all root level upper roman lists to headings instead.
+
+Command Line Execution
+######################
+
+First you have to install pydocx, this can be done by running the command `pip install pydocx`. From there you can simply call the command `pydocx --html path/to/file.docx path/to/output.html`. Change `pydocx --html` to `pydocx --markdown` in order to convert to markdown instead.
diff --git a/pydocx/__init__.py b/pydocx/__init__.py
@@ -1,3 +1,4 @@
+import sys
 from .parsers import Docx2Html, Docx2Markdown
 
 
@@ -9,3 +10,25 @@ def docx2markdown(path):
     return Docx2Markdown(path).parsed
 
 VERSION = '0.3.11'
+
+
+def main():
+    try:
+        parser_to_use = sys.argv[1]
+        path_to_docx = sys.argv[2]
+        path_to_html = sys.argv[3]
+    except IndexError:
+        print 'Must specify which parser as well as the file to convert and the name of the resulting file.'  # noqa
+        sys.exit()
+    if parser_to_use == '--html':
+        html = Docx2Html(path_to_docx).parsed
+    elif parser_to_use == '--markdown':
+        html = Docx2Markdown(path_to_docx).parsed
+    else:
+        print 'Only valid parsers are --html and --markdown'
+        sys.exit()
+    with open(path_to_html, 'w') as f:
+        f.write(html.encode('utf-8'))
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
@@ -9,7 +9,6 @@
     from ez_setup import use_setuptools
     use_setuptools()
     from setuptools import setup, find_packages  # noqa
-
 rel_file = lambda *args: os.path.join(
     os.path.dirname(os.path.abspath(__file__)), *args)
 
@@ -55,4 +54,9 @@ def get_description():
         "Topic :: Text Processing :: Markup :: XML",
     ],
     long_description=get_description(),
+    entry_points={
+        'console_scripts': [
+            'pydocx = pydocx.__init__:main',
+        ],
+    },
 )