Skip to content

Upgrade to 2.6.0, add IDNA functions to library #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 25, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions ada_url/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from ada_url.ada_adapter import (
URL,
check_url,
idna,
idna_to_ascii,
idna_to_unicode,
join_url,
normalize_url,
parse_url,
@@ -10,6 +13,9 @@
__all__ = [
'URL',
'check_url',
'idna',
'idna_to_ascii',
'idna_to_unicode',
'join_url',
'normalize_url',
'parse_url',
219 changes: 118 additions & 101 deletions ada_url/ada.cpp

Large diffs are not rendered by default.

405 changes: 359 additions & 46 deletions ada_url/ada.h

Large diffs are not rendered by default.

76 changes: 66 additions & 10 deletions ada_url/ada_adapter.py
Original file line number Diff line number Diff line change
@@ -18,10 +18,10 @@
SET_ATTRIBUTES = frozenset(URL_ATTRIBUTES)


def _get_urlobj(constructor, *args):
urlobj = constructor(*args)
def _get_obj(constructor, destructor, *args):
obj = constructor(*args)

return ffi.gc(urlobj, lib.ada_free)
return ffi.gc(obj, destructor)


def _get_str(x):
@@ -79,11 +79,14 @@ def __init__(self, url, base=None):
url_bytes = url.encode('utf-8')

if base is None:
self.urlobj = _get_urlobj(lib.ada_parse, url_bytes, len(url_bytes))
self.urlobj = _get_obj(
lib.ada_parse, lib.ada_free, url_bytes, len(url_bytes)
)
else:
base_bytes = base.encode('utf-8')
self.urlobj = _get_urlobj(
self.urlobj = _get_obj(
lib.ada_parse_with_base,
lib.ada_free,
url_bytes,
len(url_bytes),
base_bytes,
@@ -168,7 +171,7 @@ def check_url(s):
except Exception:
return False

urlobj = _get_urlobj(lib.ada_parse, s_bytes, len(s_bytes))
urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
return lib.ada_is_valid(urlobj)


@@ -191,8 +194,13 @@ def join_url(base_url, s):
except Exception:
raise ValueError('Invalid URL') from None

urlobj = _get_urlobj(
lib.ada_parse_with_base, s_bytes, len(s_bytes), base_bytes, len(base_bytes)
urlobj = _get_obj(
lib.ada_parse_with_base,
lib.ada_free,
s_bytes,
len(s_bytes),
base_bytes,
len(base_bytes),
)
if not lib.ada_is_valid(urlobj):
raise ValueError('Invalid URL') from None
@@ -258,7 +266,7 @@ def parse_url(s, attributes=PARSE_ATTRIBUTES):
raise ValueError('Invalid URL') from None

ret = {}
urlobj = _get_urlobj(lib.ada_parse, s_bytes, len(s_bytes))
urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
if not lib.ada_is_valid(urlobj):
raise ValueError('Invalid URL') from None

@@ -295,7 +303,7 @@ def replace_url(s, **kwargs):
except Exception:
raise ValueError('Invalid URL') from None

urlobj = _get_urlobj(lib.ada_parse, s_bytes, len(s_bytes))
urlobj = _get_obj(lib.ada_parse, lib.ada_free, s_bytes, len(s_bytes))
if not lib.ada_is_valid(urlobj):
raise ValueError('Invalid URL') from None

@@ -315,3 +323,51 @@ def replace_url(s, **kwargs):
raise ValueError(f'Invalid value for {attr}') from None

return _get_str(lib.ada_get_href(urlobj))


class idna:
"""Process international domains according to the UTS #46 standard.
:func:`idna.encode` implements the UTS #46 ``ToASCII`` operation.
Its output is a Python ``bytes`` object.
It is also available as :func:`idna_to_ascii`.
.. code-block:: python
>>> from ada_url import idna
>>> idna.encode('meßagefactory.ca')
b'xn--meagefactory-m9a.ca'
:func:`idna.decode` implements the UTS #46 ``ToUnicode`` operation.
Its oputput is a Python ``str`` object.
It is also available as :func:`idna_to_unicode`.
.. code-block:: python
>>> from ada_url import idna
>>> idna.decode('xn--meagefactory-m9a.ca')
'meßagefactory.ca'
Both functions accept either ``str`` or ``bytes`` objects as input.
"""

@staticmethod
def decode(s):
if isinstance(s, str):
s = s.encode('ascii')

data = _get_obj(lib.ada_idna_to_unicode, lib.ada_free_owned_string, s, len(s))
return _get_str(data)

@staticmethod
def encode(s):
if isinstance(s, str):
s = s.encode('utf-8')

val = _get_obj(lib.ada_idna_to_ascii, lib.ada_free_owned_string, s, len(s))
return ffi.string(val.data, val.length) if val.length else b''


idna_to_unicode = idna.decode

idna_to_ascii = idna.encode
4 changes: 4 additions & 0 deletions ada_url/ada_c.h
Original file line number Diff line number Diff line change
@@ -97,4 +97,8 @@ bool ada_has_search(ada_url result);
// returns a pointer to the internal url_aggregator::url_components
const ada_url_components* ada_get_components(ada_url result);

// idna methods
ada_owned_string ada_idna_to_unicode(const char* input, size_t length);
ada_owned_string ada_idna_to_ascii(const char* input, size_t length);

#endif // ADA_C_H
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = ada-url
version = 1.0.1
version = 1.1.0
description = 'URL parser and manipulator based on the WHAT WG URL standard'
long_description = file: README.rst
long_description_content_type = text/x-rst
16 changes: 16 additions & 0 deletions tests/test_ada_url.py
Original file line number Diff line number Diff line change
@@ -3,6 +3,9 @@
from ada_url import (
URL,
check_url,
idna,
idna_to_ascii,
idna_to_unicode,
join_url,
normalize_url,
parse_url,
@@ -290,3 +293,16 @@ def test_replace_url_error(self):
with self.subTest(s=s, kwargs=kwargs):
with self.assertRaises(ValueError):
replace_url(s, **kwargs)

def test_idna_decode(self):
self.assertEqual(idna.decode('xn--meagefactory-m9a.ca'), 'meßagefactory.ca')
self.assertEqual(
idna_to_unicode(b'xn--meagefactory-m9a.ca'), 'meßagefactory.ca'
)

def test_idna_encode(self):
self.assertEqual(idna.encode('meßagefactory.ca'), b'xn--meagefactory-m9a.ca')
self.assertEqual(
idna_to_ascii('meßagefactory.ca'.encode('utf-8')),
b'xn--meagefactory-m9a.ca',
)