Skip to content

Commit 930bb16

Browse files
drmoosebstaleticSkylion007henryiiirwgk
authored
Call PySys_SetArgv when initializing interpreter. (#2341)
* Call PySys_SetArgv when initializing interpreter. * Document argc/argv parameters in initialize_interpreter. * Remove manual memory management from set_interpreter_argv in favor of smart pointers. * Use size_t for indexers in set_interpreter_argv. * Minimize macros for flow control in set_interpreter_argv. * Fix 'unused variable' warning on Py2 * whitespace * Define wide_char_arg_deleter outside set_interpreter_argv. * Do sys.path workaround in C++ rather than eval. * Factor out wchar conversion to a separate function. * Restore widened_argv variable declaration. * Fix undeclared widened_arg variable on some paths. * Use delete[] to match new wchar_t[]. * Fix compiler errors * Use PY_VERSION_HEX for a cleaner CVE-2008-5983 mode check. * Fix typo * Use explicit type for deleter so delete[] works cross-compiler. * Always use PySys_SetArgvEx because pybind11 doesn't support pythons that don't include it. * Remove pointless ternary operator. * Use unique_ptr.reset instead of a second initialization. * Rename add_program_dir_to_path parameter to clarify intent. * Add defined() check before evaluating HAVE_BROKEN_MBSTOWCS. * Apply clang-tidy fixes * Pre-commit * refactor: use const for set_interpreter_argv * Try to fix const issue and allocate vector properly * fix: copy strings on Python 2 * Applying clang-format-diff relative to master. The only manual change is an added empty line between pybind11 and system `#include`s. ``` git diff -U0 --no-color master | python3 $HOME/clone/llvm-project/clang/tools/clang-format/clang-format-diff.py -p1 -style=file -i ``` Co-authored-by: Boris Staletic <[email protected]> Co-authored-by: Aaron Gokaslan <[email protected]> Co-authored-by: Henry Schreiner <[email protected]> Co-authored-by: Ralf W. Grosse-Kunstleve <[email protected]>
1 parent 503ff2a commit 930bb16

File tree

3 files changed

+122
-8
lines changed

3 files changed

+122
-8
lines changed

include/pybind11/embed.h

Lines changed: 93 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
#include "pybind11.h"
1313
#include "eval.h"
1414

15+
#include <memory>
16+
#include <vector>
17+
1518
#if defined(PYPY_VERSION)
1619
# error Embedding the interpreter is not supported with PyPy
1720
#endif
@@ -83,29 +86,106 @@ struct embedded_module {
8386
}
8487
};
8588

89+
struct wide_char_arg_deleter {
90+
void operator()(wchar_t *ptr) const {
91+
#if PY_VERSION_HEX >= 0x030500f0
92+
// API docs: https://docs.python.org/3/c-api/sys.html#c.Py_DecodeLocale
93+
PyMem_RawFree(ptr);
94+
#else
95+
delete[] ptr;
96+
#endif
97+
}
98+
};
99+
100+
inline wchar_t *widen_chars(const char *safe_arg) {
101+
#if PY_VERSION_HEX >= 0x030500f0
102+
wchar_t *widened_arg = Py_DecodeLocale(safe_arg, nullptr);
103+
#else
104+
wchar_t *widened_arg = nullptr;
105+
# if defined(HAVE_BROKEN_MBSTOWCS) && HAVE_BROKEN_MBSTOWCS
106+
size_t count = strlen(safe_arg);
107+
# else
108+
size_t count = mbstowcs(nullptr, safe_arg, 0);
109+
# endif
110+
if (count != static_cast<size_t>(-1)) {
111+
widened_arg = new wchar_t[count + 1];
112+
mbstowcs(widened_arg, safe_arg, count + 1);
113+
}
114+
#endif
115+
return widened_arg;
116+
}
117+
118+
/// Python 2.x/3.x-compatible version of `PySys_SetArgv`
119+
inline void set_interpreter_argv(int argc, const char *const *argv, bool add_program_dir_to_path) {
120+
// Before it was special-cased in python 3.8, passing an empty or null argv
121+
// caused a segfault, so we have to reimplement the special case ourselves.
122+
bool special_case = (argv == nullptr || argc <= 0);
123+
124+
const char *const empty_argv[]{"\0"};
125+
const char *const *safe_argv = special_case ? empty_argv : argv;
126+
if (special_case)
127+
argc = 1;
128+
129+
auto argv_size = static_cast<size_t>(argc);
130+
#if PY_MAJOR_VERSION >= 3
131+
// SetArgv* on python 3 takes wchar_t, so we have to convert.
132+
std::unique_ptr<wchar_t *[]> widened_argv(new wchar_t *[argv_size]);
133+
std::vector<std::unique_ptr<wchar_t[], wide_char_arg_deleter>> widened_argv_entries;
134+
widened_argv_entries.reserve(argv_size);
135+
for (size_t ii = 0; ii < argv_size; ++ii) {
136+
widened_argv_entries.emplace_back(widen_chars(safe_argv[ii]));
137+
if (!widened_argv_entries.back()) {
138+
// A null here indicates a character-encoding failure or the python
139+
// interpreter out of memory. Give up.
140+
return;
141+
}
142+
widened_argv[ii] = widened_argv_entries.back().get();
143+
}
144+
145+
auto pysys_argv = widened_argv.get();
146+
#else
147+
// python 2.x
148+
std::vector<std::string> strings{safe_argv, safe_argv + argv_size};
149+
std::vector<char *> char_strings{argv_size};
150+
for (std::size_t i = 0; i < argv_size; ++i)
151+
char_strings[i] = &strings[i][0];
152+
char **pysys_argv = char_strings.data();
153+
#endif
154+
155+
PySys_SetArgvEx(argc, pysys_argv, static_cast<int>(add_program_dir_to_path));
156+
}
157+
86158
PYBIND11_NAMESPACE_END(detail)
87159

88160
/** \rst
89161
Initialize the Python interpreter. No other pybind11 or CPython API functions can be
90162
called before this is done; with the exception of `PYBIND11_EMBEDDED_MODULE`. The
91-
optional parameter can be used to skip the registration of signal handlers (see the
92-
`Python documentation`_ for details). Calling this function again after the interpreter
93-
has already been initialized is a fatal error.
163+
optional `init_signal_handlers` parameter can be used to skip the registration of
164+
signal handlers (see the `Python documentation`_ for details). Calling this function
165+
again after the interpreter has already been initialized is a fatal error.
94166
95167
If initializing the Python interpreter fails, then the program is terminated. (This
96168
is controlled by the CPython runtime and is an exception to pybind11's normal behavior
97169
of throwing exceptions on errors.)
98170
171+
The remaining optional parameters, `argc`, `argv`, and `add_program_dir_to_path` are
172+
used to populate ``sys.argv`` and ``sys.path``.
173+
See the |PySys_SetArgvEx documentation|_ for details.
174+
99175
.. _Python documentation: https://docs.python.org/3/c-api/init.html#c.Py_InitializeEx
176+
.. |PySys_SetArgvEx documentation| replace:: ``PySys_SetArgvEx`` documentation
177+
.. _PySys_SetArgvEx documentation: https://docs.python.org/3/c-api/init.html#c.PySys_SetArgvEx
100178
\endrst */
101-
inline void initialize_interpreter(bool init_signal_handlers = true) {
179+
inline void initialize_interpreter(bool init_signal_handlers = true,
180+
int argc = 0,
181+
const char *const *argv = nullptr,
182+
bool add_program_dir_to_path = true) {
102183
if (Py_IsInitialized() != 0)
103184
pybind11_fail("The interpreter is already running");
104185

105186
Py_InitializeEx(init_signal_handlers ? 1 : 0);
106187

107-
// Make .py files in the working directory available by default
108-
module_::import("sys").attr("path").cast<list>().append(".");
188+
detail::set_interpreter_argv(argc, argv, add_program_dir_to_path);
109189
}
110190

111191
/** \rst
@@ -167,6 +247,8 @@ inline void finalize_interpreter() {
167247
Scope guard version of `initialize_interpreter` and `finalize_interpreter`.
168248
This a move-only guard and only a single instance can exist.
169249
250+
See `initialize_interpreter` for a discussion of its constructor arguments.
251+
170252
.. code-block:: cpp
171253
172254
#include <pybind11/embed.h>
@@ -178,8 +260,11 @@ inline void finalize_interpreter() {
178260
\endrst */
179261
class scoped_interpreter {
180262
public:
181-
scoped_interpreter(bool init_signal_handlers = true) {
182-
initialize_interpreter(init_signal_handlers);
263+
scoped_interpreter(bool init_signal_handlers = true,
264+
int argc = 0,
265+
const char *const *argv = nullptr,
266+
bool add_program_dir_to_path = true) {
267+
initialize_interpreter(init_signal_handlers, argc, argv, add_program_dir_to_path);
183268
}
184269

185270
scoped_interpreter(const scoped_interpreter &) = delete;

tests/test_embed/test_interpreter.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ class Widget {
2323

2424
std::string the_message() const { return message; }
2525
virtual int the_answer() const = 0;
26+
virtual std::string argv0() const = 0;
2627

2728
private:
2829
std::string message;
@@ -32,6 +33,7 @@ class PyWidget final : public Widget {
3233
using Widget::Widget;
3334

3435
int the_answer() const override { PYBIND11_OVERRIDE_PURE(int, Widget, the_answer); }
36+
std::string argv0() const override { PYBIND11_OVERRIDE_PURE(std::string, Widget, argv0); }
3537
};
3638

3739
PYBIND11_EMBEDDED_MODULE(widget_module, m) {
@@ -299,3 +301,25 @@ TEST_CASE("Reload module from file") {
299301
result = module_.attr("test")().cast<int>();
300302
REQUIRE(result == 2);
301303
}
304+
305+
TEST_CASE("sys.argv gets initialized properly") {
306+
py::finalize_interpreter();
307+
{
308+
py::scoped_interpreter default_scope;
309+
auto module = py::module::import("test_interpreter");
310+
auto py_widget = module.attr("DerivedWidget")("The question");
311+
const auto &cpp_widget = py_widget.cast<const Widget &>();
312+
REQUIRE(cpp_widget.argv0().empty());
313+
}
314+
315+
{
316+
char *argv[] = {strdup("a.out")};
317+
py::scoped_interpreter argv_scope(true, 1, argv);
318+
free(argv[0]);
319+
auto module = py::module::import("test_interpreter");
320+
auto py_widget = module.attr("DerivedWidget")("The question");
321+
const auto &cpp_widget = py_widget.cast<const Widget &>();
322+
REQUIRE(cpp_widget.argv0() == "a.out");
323+
}
324+
py::initialize_interpreter();
325+
}

tests/test_embed/test_interpreter.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# -*- coding: utf-8 -*-
2+
import sys
3+
24
from widget_module import Widget
35

46

@@ -8,3 +10,6 @@ def __init__(self, message):
810

911
def the_answer(self):
1012
return 42
13+
14+
def argv0(self):
15+
return sys.argv[0]

0 commit comments

Comments
 (0)