From 6d840686b1485df964b08d0f9bb92cbf567f4eff Mon Sep 17 00:00:00 2001 From: Niels Dossche <7771979+nielsdos@users.noreply.github.com> Date: Thu, 18 May 2023 15:56:12 +0200 Subject: [PATCH 1/3] Fix GH-11240: PHP crashes on big file with array inside MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The crash happens because there's a stack overflow in the recursive SCC algorithm. Fix this by transforming it into an iterative implementation of the same algorithm. We manually keep the recursion stack now. I tested the correctness by running the CI test suite using both the old implementation and the new implementation and letting the test fail if the SCC values differ. The tests passed without failure. For the test case of OP, I benchmarked the performance: With this patch Time (mean ± σ): 645.9 ms ± 7.4 ms [User: 603.7 ms, System: 40.7 ms] Range (min … max): 634.0 ms … 659.3 ms 10 runs Without this patch Time (mean ± σ): 755.3 ms ± 18.1 ms [User: 698.8 ms, System: 55.2 ms] Range (min … max): 737.6 ms … 784.4 ms 10 runs We can see an improvement in performance as well because the function call overhead and control transfer overhead is eliminated now. --- Zend/Optimizer/zend_inference.c | 110 ++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 27 deletions(-) diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c index 91a142a9f863f..1a467db6d01c9 100644 --- a/Zend/Optimizer/zend_inference.c +++ b/Zend/Optimizer/zend_inference.c @@ -74,12 +74,34 @@ } \ } while (0) +#ifdef SYM_RANGE +# define STORE_CURRENT_SYM recursion_stack[recursion_stack_idx].current_sym = sym_p +#else +# define STORE_CURRENT_SYM +#endif + #define CHECK_SCC_VAR(var2) \ do { \ if (!ssa->vars[var2].no_val) { \ if (dfs[var2] < 0) { \ - zend_ssa_check_scc_var(op_array, ssa, var2, index, dfs, root, stack); \ + /* Need to come back from "recursion" */ \ + recursion_stack[recursion_stack_idx].var = var; \ + recursion_stack[recursion_stack_idx].current_use = use; \ + recursion_stack[recursion_stack_idx].current_phi = p; \ + STORE_CURRENT_SYM; \ + recursion_stack_idx++; \ + /* "Recurse" on next var */ \ + var = var2; \ + use = ssa->vars[var].use_chain; \ + p = ssa->vars[var].phi_use_chain; \ + sym_p = ssa->vars[var].sym_use_chain; \ + dfs[var] = *index; \ + (*index)++; \ + root[var] = var; \ + goto recurse; \ } \ + /* Because the use, p, sym_p variables will remain the same from the last iteration, + * this will correctly execute after coming back from the "recursion". */ \ if (ssa->vars[var2].scc < 0 && dfs[root[var]] >= dfs[root[var2]]) { \ root[var] = root[var2]; \ } \ @@ -146,21 +168,25 @@ } while (0) -#define FOR_EACH_VAR_USAGE(_var, MACRO) \ +#define FOR_EACH_VAR_USAGE_EX(_var, MACRO) \ do { \ - zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \ - int use = ssa->vars[_var].use_chain; \ while (use >= 0) { \ FOR_EACH_DEFINED_VAR(use, MACRO); \ use = zend_ssa_next_use(ssa->ops, _var, use); \ } \ - p = ssa->vars[_var].phi_use_chain; \ while (p) { \ MACRO(p->ssa_var); \ p = zend_ssa_next_use_phi(ssa, _var, p); \ } \ } while (0) +#define FOR_EACH_VAR_USAGE(_var, MACRO) \ + do { \ + int use = ssa->vars[_var].use_chain; \ + zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \ + FOR_EACH_VAR_USAGE_EX(_var, MACRO); \ + } while (0) + static inline bool add_will_overflow(zend_long a, zend_long b) { return (b > 0 && a > ZEND_LONG_MAX - b) || (b < 0 && a < ZEND_LONG_MIN - b); @@ -172,41 +198,71 @@ static inline bool sub_will_overflow(zend_long a, zend_long b) { } #endif -static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */ -{ +typedef struct { + int var; + int current_use; + const zend_ssa_phi *current_phi; #ifdef SYM_RANGE - zend_ssa_phi *p; + const zend_ssa_phi *current_sym; #endif +} scc_recursion_entry; + +static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */ +{ + ALLOCA_FLAG(recursion_stack_use_heap); + /* Manual recursion stack that tracks to which variable the recursion should "return" to. + * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again. + * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */ + scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap); + uint32_t recursion_stack_idx = 1; dfs[var] = *index; (*index)++; root[var] = var; - FOR_EACH_VAR_USAGE(var, CHECK_SCC_VAR); + recursion_stack[0].var = var; + recursion_stack[0].current_use = ssa->vars[var].use_chain; + recursion_stack[0].current_phi = ssa->vars[var].phi_use_chain; +#ifdef SYM_RANGE + recursion_stack[0].current_sym = ssa->vars[var].sym_use_chain; +#endif + do { + recursion_stack_idx--; + var = recursion_stack[recursion_stack_idx].var; + int use = recursion_stack[recursion_stack_idx].current_use; + const zend_ssa_phi *p = recursion_stack[recursion_stack_idx].current_phi; +#ifdef SYM_RANGE + const zend_ssa_phi *sym_p = recursion_stack[recursion_stack_idx].current_sym; +#endif + +recurse:; + FOR_EACH_VAR_USAGE_EX(var, CHECK_SCC_VAR); #ifdef SYM_RANGE - /* Process symbolic control-flow constraints */ - p = ssa->vars[var].sym_use_chain; - while (p) { - CHECK_SCC_VAR(p->ssa_var); - p = p->sym_use_chain; - } + /* Process symbolic control-flow constraints */ + while (sym_p) { + CHECK_SCC_VAR(sym_p->ssa_var); + sym_p = sym_p->sym_use_chain; + } #endif - if (root[var] == var) { - ssa->vars[var].scc = ssa->sccs; - while (stack->len > 0) { - int var2 = zend_worklist_stack_peek(stack); - if (dfs[var2] <= dfs[var]) { - break; + if (root[var] == var) { + ssa->vars[var].scc = ssa->sccs; + while (stack->len > 0) { + int var2 = zend_worklist_stack_peek(stack); + if (dfs[var2] <= dfs[var]) { + break; + } + zend_worklist_stack_pop(stack); + ssa->vars[var2].scc = ssa->sccs; } - zend_worklist_stack_pop(stack); - ssa->vars[var2].scc = ssa->sccs; + ssa->sccs++; + } else { + zend_worklist_stack_push(stack, var); } - ssa->sccs++; - } else { - zend_worklist_stack_push(stack, var); - } + } while (recursion_stack_idx > 0); + + free_alloca(recursion_stack, recursion_stack_use_heap); } /* }}} */ From 4fe7c126d966f818ca18a6b005f5850866ba79a0 Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Fri, 26 May 2023 20:13:29 +0200 Subject: [PATCH 2/3] Pass in p and use in macro --- Zend/Optimizer/zend_inference.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c index 1a467db6d01c9..8ffe99f54043e 100644 --- a/Zend/Optimizer/zend_inference.c +++ b/Zend/Optimizer/zend_inference.c @@ -168,15 +168,15 @@ } while (0) -#define FOR_EACH_VAR_USAGE_EX(_var, MACRO) \ +#define FOR_EACH_VAR_USAGE_EX(_var, _use, _p, MACRO) \ do { \ - while (use >= 0) { \ - FOR_EACH_DEFINED_VAR(use, MACRO); \ - use = zend_ssa_next_use(ssa->ops, _var, use); \ + while (_use >= 0) { \ + FOR_EACH_DEFINED_VAR(_use, MACRO); \ + _use = zend_ssa_next_use(ssa->ops, _var, _use); \ } \ - while (p) { \ - MACRO(p->ssa_var); \ - p = zend_ssa_next_use_phi(ssa, _var, p); \ + while (_p) { \ + MACRO(_p->ssa_var); \ + _p = zend_ssa_next_use_phi(ssa, _var, _p); \ } \ } while (0) @@ -184,7 +184,7 @@ do { \ int use = ssa->vars[_var].use_chain; \ zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \ - FOR_EACH_VAR_USAGE_EX(_var, MACRO); \ + FOR_EACH_VAR_USAGE_EX(_var, use, p, MACRO); \ } while (0) static inline bool add_will_overflow(zend_long a, zend_long b) { @@ -236,7 +236,7 @@ static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, #endif recurse:; - FOR_EACH_VAR_USAGE_EX(var, CHECK_SCC_VAR); + FOR_EACH_VAR_USAGE_EX(var, use, p, CHECK_SCC_VAR); #ifdef SYM_RANGE /* Process symbolic control-flow constraints */ From fd4a0dbcbd84091d4526001f6b07725a80db866c Mon Sep 17 00:00:00 2001 From: nielsdos <7771979+nielsdos@users.noreply.github.com> Date: Fri, 26 May 2023 20:13:38 +0200 Subject: [PATCH 3/3] Allocate recursion_stack once --- Zend/Optimizer/zend_inference.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c index 8ffe99f54043e..7061bb50f85e2 100644 --- a/Zend/Optimizer/zend_inference.c +++ b/Zend/Optimizer/zend_inference.c @@ -207,13 +207,8 @@ typedef struct { #endif } scc_recursion_entry; -static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */ +static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack, scc_recursion_entry *recursion_stack) /* {{{ */ { - ALLOCA_FLAG(recursion_stack_use_heap); - /* Manual recursion stack that tracks to which variable the recursion should "return" to. - * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again. - * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */ - scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap); uint32_t recursion_stack_idx = 1; dfs[var] = *index; @@ -261,8 +256,6 @@ recurse:; zend_worklist_stack_push(stack, var); } } while (recursion_stack_idx > 0); - - free_alloca(recursion_stack, recursion_stack_use_heap); } /* }}} */ @@ -274,6 +267,12 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /* ALLOCA_FLAG(dfs_use_heap) ALLOCA_FLAG(root_use_heap) ALLOCA_FLAG(stack_use_heap) + ALLOCA_FLAG(recursion_stack_use_heap) + + /* Manual recursion stack that tracks to which variable the recursion should "return" to. + * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again. + * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */ + scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap); dfs = do_alloca(sizeof(int) * ssa->vars_count, dfs_use_heap); memset(dfs, -1, sizeof(int) * ssa->vars_count); @@ -283,7 +282,7 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /* /* Find SCCs using Tarjan's algorithm. */ for (j = 0; j < ssa->vars_count; j++) { if (!ssa->vars[j].no_val && dfs[j] < 0) { - zend_ssa_check_scc_var(op_array, ssa, j, &index, dfs, root, &stack); + zend_ssa_check_scc_var(op_array, ssa, j, &index, dfs, root, &stack, recursion_stack); } } @@ -307,6 +306,7 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /* ZEND_WORKLIST_STACK_FREE_ALLOCA(&stack, stack_use_heap); free_alloca(root, root_use_heap); free_alloca(dfs, dfs_use_heap); + free_alloca(recursion_stack, recursion_stack_use_heap); return SUCCESS; }