Skip to content

Fix segfault on trying to free an invalid pointer in tokenizer.c #14696

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/parser.pyx
Original file line number Diff line number Diff line change
@@ -184,6 +184,7 @@ cdef extern from "parser/tokenizer.h":

# error handling
char *warn_msg
int error;
char *error_msg

int skip_empty_lines
@@ -2010,7 +2011,7 @@ cdef raise_parser_error(object base, parser_t *parser):
Py_XDECREF(value)
raise old_exc
message = '%s. C error: ' % base
if parser.error_msg != NULL:
if parser.error != 0:
if PY3:
message += parser.error_msg.decode('utf-8')
else:
36 changes: 19 additions & 17 deletions pandas/src/parser/tokenizer.c
Original file line number Diff line number Diff line change
@@ -221,8 +221,6 @@ int parser_init(parser_t *self) {
self->word_starts = NULL;
self->line_start = NULL;
self->line_fields = NULL;
self->error_msg = NULL;
self->warn_msg = NULL;

// token stream
self->stream = (char*) malloc(STREAM_INIT_SIZE * sizeof(char));
@@ -271,7 +269,8 @@ int parser_init(parser_t *self) {

self->state = START_RECORD;

self->error_msg = NULL;
self->error = 0;
self->error_msg = (char*) malloc(200);
self->warn_msg = NULL;

self->commentchar = '\0';
@@ -400,7 +399,7 @@ static int push_char(parser_t *self, char c) {
if (self->stream_len >= self->stream_cap) {
TRACE(("push_char: ERROR!!! self->stream_len(%d) >= self->stream_cap(%d)\n",
self->stream_len, self->stream_cap))
self->error_msg = (char*) malloc(64);
self->error = 1;
sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
return PARSER_OUT_OF_MEMORY;
}
@@ -413,7 +412,7 @@ int P_INLINE end_field(parser_t *self) {
// self->numeric_field = 0;
if (self->words_len >= self->words_cap) {
TRACE(("end_field: ERROR!!! self->words_len(%zu) >= self->words_cap(%zu)\n", self->words_len, self->words_cap))
self->error_msg = (char*) malloc(64);
self->error = 1;
sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
return PARSER_OUT_OF_MEMORY;
}
@@ -509,7 +508,7 @@ static int end_line(parser_t *self) {

// file_lines is now the actual file line number (starting at 1)
if (self->error_bad_lines) {
self->error_msg = (char*) malloc(100);
self->error = 1;
sprintf(self->error_msg, "Expected %d fields in line %d, saw %d\n",
ex_fields, self->file_lines, fields);

@@ -533,7 +532,8 @@ static int end_line(parser_t *self) {

// might overrun the buffer when closing fields
if (make_stream_space(self, ex_fields - fields) < 0) {
self->error_msg = "out of memory";
self->error = 1;
sprintf(self->error_msg, "out of memory");
return -1;
}

@@ -549,10 +549,10 @@ static int end_line(parser_t *self) {

// good line, set new start point
if (self->lines >= self->lines_cap) {
TRACE(("end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", self->lines, self->lines_cap)) \
self->error_msg = (char*) malloc(100); \
sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n"); \
return PARSER_OUT_OF_MEMORY; \
TRACE(("end_line: ERROR!!! self->lines(%zu) >= self->lines_cap(%zu)\n", self->lines, self->lines_cap));
self->error = 1;
sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n");
return PARSER_OUT_OF_MEMORY;
}
self->line_start[self->lines] = (self->line_start[self->lines - 1] +
fields);
@@ -606,12 +606,13 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
self->datalen = bytes_read;

if (status != REACHED_EOF && self->data == NULL) {
self->error_msg = (char*) malloc(200);

if (status == CALLING_READ_FAILED) {
self->error = 1;
sprintf(self->error_msg, ("Calling read(nbytes) on source failed. "
"Try engine='python'."));
} else {
self->error = 1;
sprintf(self->error_msg, "Unknown error in IO callback");
}
return -1;
@@ -635,7 +636,7 @@ static int parser_buffer_bytes(parser_t *self, size_t nbytes) {
TRACE(("PUSH_CHAR: Pushing %c, slen= %d, stream_cap=%zu, stream_len=%zu\n", c, slen, self->stream_cap, self->stream_len)) \
if (slen >= maxstreamsize) { \
TRACE(("PUSH_CHAR: ERROR!!! slen(%d) >= maxstreamsize(%d)\n", slen, maxstreamsize)) \
self->error_msg = (char*) malloc(100); \
self->error = 1; \
sprintf(self->error_msg, "Buffer overflow caught - possible malformed input file.\n"); \
return PARSER_OUT_OF_MEMORY; \
} \
@@ -737,7 +738,8 @@ int tokenize_bytes(parser_t *self, size_t line_limit)
start_lines = self->lines;

if (make_stream_space(self, self->datalen - self->datapos) < 0) {
self->error_msg = "out of memory";
self->error = 1;
sprintf(self->error_msg, "out of memory");
return -1;
}

@@ -1043,7 +1045,7 @@ int tokenize_bytes(parser_t *self, size_t line_limit)
PUSH_CHAR(c);
self->state = IN_FIELD;
} else {
self->error_msg = (char*) malloc(50);
self->error = 1;
sprintf(self->error_msg,
"delimiter expected after "
"quote in quote");
@@ -1150,13 +1152,13 @@ static int parser_handle_eof(parser_t *self) {

case ESCAPE_IN_QUOTED_FIELD:
case IN_QUOTED_FIELD:
self->error_msg = (char*)malloc(100);
self->error = 1;
sprintf(self->error_msg, "EOF inside string starting at line %d",
self->file_lines);
return -1;

case ESCAPED_CHAR:
self->error_msg = (char*)malloc(100);
self->error = 1;
sprintf(self->error_msg, "EOF following escape character");
return -1;

1 change: 1 addition & 0 deletions pandas/src/parser/tokenizer.h
Original file line number Diff line number Diff line change
@@ -211,6 +211,7 @@ typedef struct parser_t {

// error handling
char *warn_msg;
int error;
char *error_msg;

int skip_empty_lines;