Skip to content

Added JSON Hot-Swapping Utility, Enabled for JSON-LD and RDFa #69

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions extruct/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
JSON-LD extractor
"""

import json
from extruct import utils
import re

import lxml.etree
Expand All @@ -29,10 +29,10 @@ def extract_items(self, document, *args, **kwargs):
def _extract_items(self, node):
script = node.xpath('string()')
try:
data = json.loads(script)
except ValueError:
data = utils.json_loads(script)
except utils.native_json_exc:
# sometimes JSON-decoding errors are due to leading HTML or JavaScript comments
data = json.loads(HTML_OR_JS_COMMENTLINE.sub('', script))
data = utils.json_loads(HTML_OR_JS_COMMENTLINE.sub('', script))
if isinstance(data, list):
return data
elif isinstance(data, dict):
Expand Down
4 changes: 2 additions & 2 deletions extruct/rdfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

Based on pyrdfa3 and rdflib
"""
import json
from extruct import utils
import logging
rdflib_logger = logging.getLogger('rdflib')
rdflib_logger.setLevel(logging.ERROR)
Expand Down Expand Up @@ -48,4 +48,4 @@ def extract_items(self, document, url, expanded=True, *args, **kwargs):

g = PyRdfa(options, base=url).graph_from_DOM(document, graph=Graph(), pgraph=Graph())
jsonld_string = g.serialize(format='json-ld', auto_compact=not expanded).decode('utf-8')
return json.loads(jsonld_string)
return utils.json_loads(jsonld_string)
57 changes: 57 additions & 0 deletions extruct/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
import sys


# Python 2 is a vengeful fossil
native_json_exc = getattr(json, 'JSONDecodeError', ValueError)


_json_decoder = json.loads
_json_decoder_raises = tuple() # Better not to catch built-in errors at all!
def set_json_decoder(loader_func=_json_decoder,
loader_raises=_json_decoder_raises):
"""
Sets extruct's preferred JSON decoder function.

You should provide a function that accepts strings, and returns decoded
native objects, as loader_func.

When your preferred decoder encounters non-JSON strings, or malformed JSON,
typically it will raise Exceptions. Extruct expects json.JSONDecodeError
in such cases. If your preferred decoder does something else (such as the
ValueErrors raised by ujson), provide a tuple of all Exception classes
raised on bad JSON or non-JSON.
"""
global _json_decoder
global _json_decoder_raises
_json_decoder = loader_func
_json_decoder_raises = loader_raises


def json_loads(json_string):
"""
Uses the preferred JSON decoder (default is stdlib json) to decode a string,
converting any idiosyncratic exceptions to json.JSONDecodeError.

Using this utility function allows one to swap in different decoders with
utils.set_json_decoder, for example to use `ujson`, without requiring
extruct to directly handle and support the weirdnesses of each third-party
json library.
"""
# Does this need `global _json_decoder` to prevent reference capture and failure-to-switch?
try:
data = _json_decoder(json_string)
except _json_decoder_raises as E:
# TODO: Deprecate with Python 2. Reason: Prefer exception chaining with `raise from`
if isinstance(E, native_json_exc):
raise
_, _, traceback = sys.exc_info()
if sys.version_info < (3,):
raise ValueError("Error decoding document: {}".format(traceback))
else:
raise json.JSONDecodeError(
msg="Error decoding document (error index unknown, see preceding traceback)",
doc=json_string,
pos=0,
).with_traceback(traceback)
return data
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def get_version():
packages=find_packages(exclude=['tests',]),
package_data={'extruct': ['VERSION']},
install_requires=['lxml', 'rdflib', 'rdflib-jsonld'],
tests_require=['ujson'],
extras_require={
'service': [
'bottle',
Expand Down
56 changes: 56 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# -*- coding: utf-8 -*-
import unittest
import json

import ujson
from extruct import utils


class NotAJSONDecodeError(ValueError):
pass


class _json_shimmer:
def __init__(self):
self.invoked = False

def decode(self, json_string):
if json_string == 'FAIL WITH VALUEERROR':
raise ValueError("Yes sir")
try:
return json.loads(json_string)
except utils.native_json_exc:
raise NotAJSONDecodeError("This operation totally failed")
finally:
self.invoked = True


class TestJson(unittest.TestCase):

def test_json_abstraction(self):
# Use default decoder
self.assertEqual(utils._json_decoder, json.loads)
self.assertEqual(utils._json_decoder_raises, tuple())
self.assertEqual(utils.json_loads('{}'), {})
with self.assertRaises(utils.native_json_exc): # ugh, Python 2
utils.json_loads('{')
# Set decoder, try again
shimmer = _json_shimmer()
utils.set_json_decoder(shimmer.decode, (NotAJSONDecodeError,))
self.assertEqual(utils._json_decoder, shimmer.decode)
self.assertEqual(utils._json_decoder_raises, (NotAJSONDecodeError,))
self.assertEqual(utils.json_loads('{}'), {})
# ensure utils.json_loads didn't call a stale reference to json.loads
self.assertTrue(shimmer.invoked)
# Specified exceptions should be converted to JSONDecodeErrors.
with self.assertRaises(utils.native_json_exc):
utils.json_loads('{')
# Others should not.
with self.assertRaises(ValueError):
utils.json_loads('FAIL WITH VALUEERROR')

def test_ujson(self):
utils.set_json_decoder(ujson.loads, (ValueError,))
self.assertEqual(utils._json_decoder, ujson.loads)
self.assertEqual(utils._json_decoder_raises, (ValueError,))
self.assertEqual(utils.json_loads('{"foo": "bar"}'), {'foo': 'bar'})
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ deps =
pytest
pytest-cov
mock
ujson

commands = py.test --cov-report=term --cov-report= --cov=extruct {posargs:extruct tests}