Skip to content

Swap http-parse to llhttp #56

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Mar 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ jobs:
__version__\s*=\s*(?:['"])([[:PEP440:]])(?:['"])

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
uses: actions/setup-python@v2
if: steps.release.outputs.version == 0
with:
python-version: ${{ matrix.python-version }}

- name: Test
if: steps.release.outputs.version == 0
run: |
pip install wheel
pip install -e .[test]
python setup.py test
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,5 @@ __pycache__/
/.pytest_cache
/.mypy_cache
/.vscode
.eggs
.venv
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "vendor/http-parser"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like we no longer need the http-parser submodule, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi! Actually it is used to parse urls, as llhttp don't have this feature!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

uparser.pxd is used only to extern url parsing methods from http_parser.h, and url_parser..pyx contains previous code related to url parsing. I couldn't extern both llhttp.h and http_parser.h on the same file as they have naming conflicts

path = vendor/http-parser
url = https://github.com/nodejs/http-parser.git
[submodule "vendor/llhttp"]
path = vendor/llhttp
url = https://github.com/nodejs/llhttp.git
9 changes: 4 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,16 @@ release: compile test
python3 setup.py sdist upload


test:
python3 setup.py test
test: compile
python3 -m unittest -v

clean:
find $(ROOT)/httptools/parser -name '*.c' | xargs rm -f
find $(ROOT)/httptools/parser -name '*.html' | xargs rm -f

distclean:
distclean: clean
git --git-dir="$(ROOT)/vendor/http-parser/.git" clean -dfx
find $(ROOT)/httptools/parser -name '*.c' | xargs rm -f
find $(ROOT)/httptools/parser -name '*.html' | xargs rm -f
git --git-dir="$(ROOT)/vendor/llhttp/.git" clean -dfx


testinstalled:
Expand Down
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ The package is available on PyPI: `pip install httptools`.
# APIs

httptools contains two classes `httptools.HttpRequestParser`,
`httptools.HttpResponseParser` and a function for parsing URLs
`httptools.parse_url`. See unittests for examples.
`httptools.HttpResponseParser` (fulfilled through
[llhttp](https://github.com/nodejs/llhttp)) and a function for
parsing URLs `httptools.parse_url` (through
[http-parse](https://github.com/nodejs/http-parser) for now).
See unittests for examples.


```python
Expand Down
3 changes: 2 additions & 1 deletion httptools/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .parser import * # NoQA
from .errors import * # NoQA
from .url_parser import * # NoQA

__all__ = parser.__all__ + errors.__all__ # NoQA
__all__ = parser.__all__ + errors.__all__ + url_parser.__all__ # NoQA
255 changes: 136 additions & 119 deletions httptools/parser/cparser.pxd
Original file line number Diff line number Diff line change
@@ -1,139 +1,156 @@
from libc.stdint cimport uint16_t, uint32_t, uint64_t
from libc.stdint cimport int32_t, uint8_t, uint16_t, uint64_t


cdef extern from "../../vendor/http-parser/http_parser.h":
ctypedef int (*http_data_cb) (http_parser*,
cdef extern from "llhttp.h":
struct llhttp__internal_s:
int32_t _index
void *_span_pos0
void *_span_cb0
int32_t error
const char *reason
const char *error_pos
void *data
void *_current
uint64_t content_length
uint8_t type
uint8_t method
uint8_t http_major
uint8_t http_minor
uint8_t header_state
uint16_t flags
uint8_t upgrade
uint16_t status_code
uint8_t finish
void *settings
ctypedef llhttp__internal_s llhttp__internal_t
ctypedef llhttp__internal_t llhttp_t

ctypedef int (*llhttp_data_cb) (llhttp_t*,
const char *at,
size_t length) except -1

ctypedef int (*http_cb) (http_parser*) except -1

struct http_parser:
unsigned int type
unsigned int flags
unsigned int state
unsigned int header_state
unsigned int index

uint32_t nread
uint64_t content_length

unsigned short http_major
unsigned short http_minor
unsigned int status_code
unsigned int method
unsigned int http_errno

unsigned int upgrade

void *data

struct http_parser_settings:
http_cb on_message_begin
http_data_cb on_url
http_data_cb on_status
http_data_cb on_header_field
http_data_cb on_header_value
http_cb on_headers_complete
http_data_cb on_body
http_cb on_message_complete
http_cb on_chunk_header
http_cb on_chunk_complete

enum http_parser_type:
ctypedef int (*llhttp_cb) (llhttp_t*) except -1

struct llhttp_settings_s:
llhttp_cb on_message_begin
llhttp_data_cb on_url
llhttp_data_cb on_status
llhttp_data_cb on_header_field
llhttp_data_cb on_header_value
llhttp_cb on_headers_complete
llhttp_data_cb on_body
llhttp_cb on_message_complete
llhttp_cb on_chunk_header
llhttp_cb on_chunk_complete
ctypedef llhttp_settings_s llhttp_settings_t

enum llhttp_type:
HTTP_BOTH,
HTTP_REQUEST,
HTTP_RESPONSE,
HTTP_BOTH
HTTP_RESPONSE
ctypedef llhttp_type llhttp_type_t

enum http_errno:
enum llhttp_errno:
HPE_OK,
HPE_CB_message_begin,
HPE_CB_url,
HPE_CB_header_field,
HPE_CB_header_value,
HPE_CB_headers_complete,
HPE_CB_body,
HPE_CB_message_complete,
HPE_CB_status,
HPE_CB_chunk_header,
HPE_CB_chunk_complete,
HPE_INVALID_EOF_STATE,
HPE_HEADER_OVERFLOW,
HPE_INTERNAL,
HPE_STRICT,
HPE_LF_EXPECTED,
HPE_UNEXPECTED_CONTENT_LENGTH,
HPE_CLOSED_CONNECTION,
HPE_INVALID_VERSION,
HPE_INVALID_STATUS,
HPE_INVALID_METHOD,
HPE_INVALID_URL,
HPE_INVALID_HOST,
HPE_INVALID_PORT,
HPE_INVALID_PATH,
HPE_INVALID_QUERY_STRING,
HPE_INVALID_FRAGMENT,
HPE_LF_EXPECTED,
HPE_INVALID_CONSTANT,
HPE_INVALID_VERSION,
HPE_INVALID_HEADER_TOKEN,
HPE_INVALID_CONTENT_LENGTH,
HPE_INVALID_CHUNK_SIZE,
HPE_INVALID_CONSTANT,
HPE_INVALID_INTERNAL_STATE,
HPE_STRICT,
HPE_INVALID_STATUS,
HPE_INVALID_EOF_STATE,
HPE_INVALID_TRANSFER_ENCODING,
HPE_CB_MESSAGE_BEGIN,
HPE_CB_HEADERS_COMPLETE,
HPE_CB_MESSAGE_COMPLETE,
HPE_CB_CHUNK_HEADER,
HPE_CB_CHUNK_COMPLETE,
HPE_PAUSED,
HPE_UNKNOWN
HPE_PAUSED_UPGRADE,
HPE_USER
ctypedef llhttp_errno llhttp_errno_t

enum flags:
F_CHUNKED,
enum llhttp_flags:
F_CONNECTION_KEEP_ALIVE,
F_CONNECTION_CLOSE,
F_CONNECTION_UPGRADE,
F_TRAILING,
F_CHUNKED,
F_UPGRADE,
F_SKIPBODY

enum http_method:
DELETE, GET, HEAD, POST, PUT, CONNECT, OPTIONS, TRACE, COPY,
LOCK, MKCOL, MOVE, PROPFIND, PROPPATCH, SEARCH, UNLOCK, BIND,
REBIND, UNBIND, ACL, REPORT, MKACTIVITY, CHECKOUT, MERGE,
MSEARCH, NOTIFY, SUBSCRIBE, UNSUBSCRIBE, PATCH, PURGE, MKCALENDAR,
LINK, UNLINK

void http_parser_init(http_parser *parser, http_parser_type type)

size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len)

int http_should_keep_alive(const http_parser *parser)

void http_parser_settings_init(http_parser_settings *settings)

const char *http_errno_name(http_errno err)
const char *http_errno_description(http_errno err)
const char *http_method_str(http_method m)

# URL Parser

enum http_parser_url_fields:
UF_SCHEMA = 0,
UF_HOST = 1,
UF_PORT = 2,
UF_PATH = 3,
UF_QUERY = 4,
UF_FRAGMENT = 5,
UF_USERINFO = 6,
UF_MAX = 7

struct http_parser_url_field_data:
uint16_t off
uint16_t len

struct http_parser_url:
uint16_t field_set
uint16_t port
http_parser_url_field_data[<int>UF_MAX] field_data

void http_parser_url_init(http_parser_url *u)

int http_parser_parse_url(const char *buf,
size_t buflen,
int is_connect,
http_parser_url *u)
F_CONTENT_LENGTH,
F_SKIPBODY,
F_TRAILING,
F_LENIENT,
F_TRANSFER_ENCODING
ctypedef llhttp_flags llhttp_flags_t

enum llhttp_method:
HTTP_DELETE,
HTTP_GET,
HTTP_HEAD,
HTTP_POST,
HTTP_PUT,
HTTP_CONNECT,
HTTP_OPTIONS,
HTTP_TRACE,
HTTP_COPY,
HTTP_LOCK,
HTTP_MKCOL,
HTTP_MOVE,
HTTP_PROPFIND,
HTTP_PROPPATCH,
HTTP_SEARCH,
HTTP_UNLOCK,
HTTP_BIND,
HTTP_REBIND,
HTTP_UNBIND,
HTTP_ACL,
HTTP_REPORT,
HTTP_MKACTIVITY,
HTTP_CHECKOUT,
HTTP_MERGE,
HTTP_MSEARCH,
HTTP_NOTIFY,
HTTP_SUBSCRIBE,
HTTP_UNSUBSCRIBE,
HTTP_PATCH,
HTTP_PURGE,
HTTP_MKCALENDAR,
HTTP_LINK,
HTTP_UNLINK,
HTTP_SOURCE,
HTTP_PRI,
HTTP_DESCRIBE,
HTTP_ANNOUNCE,
HTTP_SETUP,
HTTP_PLAY,
HTTP_PAUSE,
HTTP_TEARDOWN,
HTTP_GET_PARAMETER,
HTTP_SET_PARAMETER,
HTTP_REDIRECT,
HTTP_RECORD,
HTTP_FLUSH
ctypedef llhttp_method llhttp_method_t

void llhttp_init(llhttp_t* parser, llhttp_type_t type, const llhttp_settings_t* settings)

void llhttp_settings_init(llhttp_settings_t* settings)

llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len)

void llhttp_resume_after_upgrade(llhttp_t* parser)

int llhttp_should_keep_alive(const llhttp_t* parser)

const char* llhttp_get_error_pos(const llhttp_t* parser)
const char* llhttp_get_error_reason(const llhttp_t* parser)
const char* llhttp_method_name(llhttp_method_t method)

void llhttp_set_error_reason(llhttp_t* parser, const char* reason);
Loading