Skip to content

Commit 6be12fc

Browse files
committed
bpo-46315: Use fdopencookie() to avoid dup() in _PyTokenizer_FindEncodingFilename
1 parent 2bde682 commit 6be12fc

File tree

2 files changed

+76
-6
lines changed

2 files changed

+76
-6
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
``_PyTokenizer_FindEncodingFilename`` now uses ``fdopencookie`` to avoid
2+
``dup`` on Emscripten and WASI.

Parser/tokenizer.c

+74-6
Original file line numberDiff line numberDiff line change
@@ -2072,6 +2072,79 @@ _PyTokenizer_Get(struct tok_state *tok,
20722072
return result;
20732073
}
20742074

2075+
#if defined(__wasi__) || defined(__EMSCRIPTEN__)
2076+
/* fdopen() with borrowed fd
2077+
2078+
WASI does not provide dup() and Emscripten's dup() emulation with open()
2079+
is slow. Implement fdopen() with fd borrowing on top of fdopencookie().
2080+
*/
2081+
typedef union {
2082+
void *cookie;
2083+
int fd;
2084+
} borrowed;
2085+
2086+
static ssize_t
2087+
borrow_read(void *cookie, char *buf, size_t size)
2088+
{
2089+
borrowed b;
2090+
b.cookie = cookie;
2091+
return read(b.fd, (void *)buf, size);
2092+
}
2093+
2094+
static ssize_t
2095+
borrow_write(void *cookie, const char *buf, size_t size)
2096+
{
2097+
errno = ENOTSUP;
2098+
return -1;
2099+
}
2100+
2101+
static int
2102+
borrow_seek(void *cookie, off_t *off, int whence)
2103+
{
2104+
borrowed b;
2105+
b.cookie = cookie;
2106+
off_t pos;
2107+
pos = lseek(b.fd, *off, whence);
2108+
if (pos == (off_t)-1) {
2109+
return -1;
2110+
} else {
2111+
*off = pos;
2112+
return 0;
2113+
}
2114+
}
2115+
2116+
static int
2117+
borrow_close(void *cookie)
2118+
{
2119+
// does not close(fd)
2120+
return 0;
2121+
}
2122+
2123+
static FILE *
2124+
fdopen_borrow(int fd, const char *mode) {
2125+
// only reading is supported
2126+
if (strcmp(mode, "r") != 0) {
2127+
return NULL;
2128+
}
2129+
cookie_io_functions_t cookie_io = {
2130+
borrow_read, borrow_write, borrow_seek, borrow_close
2131+
};
2132+
// cookie is just the fd
2133+
borrowed b;
2134+
b.fd = fd;
2135+
return fopencookie(b.cookie, "r", cookie_io);
2136+
}
2137+
#else
2138+
static FILE *
2139+
fdopen_borrow(int fd, const char *mode) {
2140+
fd = _Py_dup(fd);
2141+
if (fd < 0) {
2142+
return NULL;
2143+
}
2144+
return fdopen(fd, mode);
2145+
}
2146+
#endif
2147+
20752148
/* Get the encoding of a Python file. Check for the coding cookie and check if
20762149
the file starts with a BOM.
20772150
@@ -2091,12 +2164,7 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename)
20912164
const char *p_end = NULL;
20922165
char *encoding = NULL;
20932166

2094-
fd = _Py_dup(fd);
2095-
if (fd < 0) {
2096-
return NULL;
2097-
}
2098-
2099-
fp = fdopen(fd, "r");
2167+
fp = fdopen_borrow(fd, "r");
21002168
if (fp == NULL) {
21012169
return NULL;
21022170
}

0 commit comments

Comments
 (0)