From 6d840686b1485df964b08d0f9bb92cbf567f4eff Mon Sep 17 00:00:00 2001
From: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
Date: Thu, 18 May 2023 15:56:12 +0200
Subject: [PATCH 1/3] Fix GH-11240: PHP crashes on big file with array inside
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The crash happens because there's a stack overflow in the recursive SCC
algorithm. Fix this by transforming it into an iterative implementation
of the same algorithm. We manually keep the recursion stack now.

I tested the correctness by running the CI test suite using both the old
implementation and the new implementation and letting the test fail if
the SCC values differ. The tests passed without failure.

For the test case of OP, I benchmarked the performance:
With this patch
  Time (mean ± σ):     645.9 ms ±   7.4 ms    [User: 603.7 ms, System: 40.7 ms]
  Range (min … max):   634.0 ms … 659.3 ms    10 runs
Without this patch
  Time (mean ± σ):     755.3 ms ±  18.1 ms    [User: 698.8 ms, System: 55.2 ms]
  Range (min … max):   737.6 ms … 784.4 ms    10 runs

We can see an improvement in performance as well because the function
call overhead and control transfer overhead is eliminated now.
---
 Zend/Optimizer/zend_inference.c | 110 ++++++++++++++++++++++++--------
 1 file changed, 83 insertions(+), 27 deletions(-)

diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c
index 91a142a9f863f..1a467db6d01c9 100644
--- a/Zend/Optimizer/zend_inference.c
+++ b/Zend/Optimizer/zend_inference.c
@@ -74,12 +74,34 @@
 	} \
 } while (0)
 
+#ifdef SYM_RANGE
+# define STORE_CURRENT_SYM recursion_stack[recursion_stack_idx].current_sym = sym_p
+#else
+# define STORE_CURRENT_SYM
+#endif
+
 #define CHECK_SCC_VAR(var2) \
 	do { \
 		if (!ssa->vars[var2].no_val) { \
 			if (dfs[var2] < 0) { \
-				zend_ssa_check_scc_var(op_array, ssa, var2, index, dfs, root, stack); \
+				/* Need to come back from "recursion" */ \
+				recursion_stack[recursion_stack_idx].var = var; \
+				recursion_stack[recursion_stack_idx].current_use = use; \
+				recursion_stack[recursion_stack_idx].current_phi = p; \
+				STORE_CURRENT_SYM; \
+				recursion_stack_idx++; \
+				/* "Recurse" on next var */ \
+				var = var2; \
+				use = ssa->vars[var].use_chain; \
+				p = ssa->vars[var].phi_use_chain; \
+				sym_p = ssa->vars[var].sym_use_chain; \
+				dfs[var] = *index; \
+				(*index)++; \
+				root[var] = var; \
+				goto recurse; \
 			} \
+			/* Because the use, p, sym_p variables will remain the same from the last iteration,
+			 * this will correctly execute after coming back from the "recursion". */ \
 			if (ssa->vars[var2].scc < 0 && dfs[root[var]] >= dfs[root[var2]]) { \
 				root[var] = root[var2]; \
 			} \
@@ -146,21 +168,25 @@
 	} while (0)
 
 
-#define FOR_EACH_VAR_USAGE(_var, MACRO) \
+#define FOR_EACH_VAR_USAGE_EX(_var, MACRO) \
 	do { \
-		zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \
-		int use = ssa->vars[_var].use_chain; \
 		while (use >= 0) { \
 			FOR_EACH_DEFINED_VAR(use, MACRO); \
 			use = zend_ssa_next_use(ssa->ops, _var, use); \
 		} \
-		p = ssa->vars[_var].phi_use_chain; \
 		while (p) { \
 			MACRO(p->ssa_var); \
 			p = zend_ssa_next_use_phi(ssa, _var, p); \
 		} \
 	} while (0)
 
+#define FOR_EACH_VAR_USAGE(_var, MACRO) \
+	do { \
+		int use = ssa->vars[_var].use_chain; \
+		zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \
+		FOR_EACH_VAR_USAGE_EX(_var, MACRO); \
+	} while (0)
+
 static inline bool add_will_overflow(zend_long a, zend_long b) {
 	return (b > 0 && a > ZEND_LONG_MAX - b)
 		|| (b < 0 && a < ZEND_LONG_MIN - b);
@@ -172,41 +198,71 @@ static inline bool sub_will_overflow(zend_long a, zend_long b) {
 }
 #endif
 
-static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */
-{
+typedef struct {
+	int var;
+	int current_use;
+	const zend_ssa_phi *current_phi;
 #ifdef SYM_RANGE
-	zend_ssa_phi *p;
+	const zend_ssa_phi *current_sym;
 #endif
+} scc_recursion_entry;
+
+static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */
+{
+	ALLOCA_FLAG(recursion_stack_use_heap);
+	/* Manual recursion stack that tracks to which variable the recursion should "return" to.
+	 * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again.
+	 * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */
+	scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap);
+	uint32_t recursion_stack_idx = 1;
 
 	dfs[var] = *index;
 	(*index)++;
 	root[var] = var;
 
-	FOR_EACH_VAR_USAGE(var, CHECK_SCC_VAR);
+	recursion_stack[0].var = var;
+	recursion_stack[0].current_use = ssa->vars[var].use_chain;
+	recursion_stack[0].current_phi = ssa->vars[var].phi_use_chain;
+#ifdef SYM_RANGE
+	recursion_stack[0].current_sym = ssa->vars[var].sym_use_chain;
+#endif
+	do {
+		recursion_stack_idx--;
+		var = recursion_stack[recursion_stack_idx].var;
+		int use = recursion_stack[recursion_stack_idx].current_use;
+		const zend_ssa_phi *p = recursion_stack[recursion_stack_idx].current_phi;
+#ifdef SYM_RANGE
+		const zend_ssa_phi *sym_p = recursion_stack[recursion_stack_idx].current_sym;
+#endif
+
+recurse:;
+		FOR_EACH_VAR_USAGE_EX(var, CHECK_SCC_VAR);
 
 #ifdef SYM_RANGE
-	/* Process symbolic control-flow constraints */
-	p = ssa->vars[var].sym_use_chain;
-	while (p) {
-		CHECK_SCC_VAR(p->ssa_var);
-		p = p->sym_use_chain;
-	}
+		/* Process symbolic control-flow constraints */
+		while (sym_p) {
+			CHECK_SCC_VAR(sym_p->ssa_var);
+			sym_p = sym_p->sym_use_chain;
+		}
 #endif
 
-	if (root[var] == var) {
-		ssa->vars[var].scc = ssa->sccs;
-		while (stack->len > 0) {
-			int var2 = zend_worklist_stack_peek(stack);
-			if (dfs[var2] <= dfs[var]) {
-				break;
+		if (root[var] == var) {
+			ssa->vars[var].scc = ssa->sccs;
+			while (stack->len > 0) {
+				int var2 = zend_worklist_stack_peek(stack);
+				if (dfs[var2] <= dfs[var]) {
+					break;
+				}
+				zend_worklist_stack_pop(stack);
+				ssa->vars[var2].scc = ssa->sccs;
 			}
-			zend_worklist_stack_pop(stack);
-			ssa->vars[var2].scc = ssa->sccs;
+			ssa->sccs++;
+		} else {
+			zend_worklist_stack_push(stack, var);
 		}
-		ssa->sccs++;
-	} else {
-		zend_worklist_stack_push(stack, var);
-	}
+	} while (recursion_stack_idx > 0);
+
+	free_alloca(recursion_stack, recursion_stack_use_heap);
 }
 /* }}} */
 

From 4fe7c126d966f818ca18a6b005f5850866ba79a0 Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 26 May 2023 20:13:29 +0200
Subject: [PATCH 2/3] Pass in p and use in macro

---
 Zend/Optimizer/zend_inference.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c
index 1a467db6d01c9..8ffe99f54043e 100644
--- a/Zend/Optimizer/zend_inference.c
+++ b/Zend/Optimizer/zend_inference.c
@@ -168,15 +168,15 @@
 	} while (0)
 
 
-#define FOR_EACH_VAR_USAGE_EX(_var, MACRO) \
+#define FOR_EACH_VAR_USAGE_EX(_var, _use, _p, MACRO) \
 	do { \
-		while (use >= 0) { \
-			FOR_EACH_DEFINED_VAR(use, MACRO); \
-			use = zend_ssa_next_use(ssa->ops, _var, use); \
+		while (_use >= 0) { \
+			FOR_EACH_DEFINED_VAR(_use, MACRO); \
+			_use = zend_ssa_next_use(ssa->ops, _var, _use); \
 		} \
-		while (p) { \
-			MACRO(p->ssa_var); \
-			p = zend_ssa_next_use_phi(ssa, _var, p); \
+		while (_p) { \
+			MACRO(_p->ssa_var); \
+			_p = zend_ssa_next_use_phi(ssa, _var, _p); \
 		} \
 	} while (0)
 
@@ -184,7 +184,7 @@
 	do { \
 		int use = ssa->vars[_var].use_chain; \
 		zend_ssa_phi *p = ssa->vars[_var].phi_use_chain; \
-		FOR_EACH_VAR_USAGE_EX(_var, MACRO); \
+		FOR_EACH_VAR_USAGE_EX(_var, use, p, MACRO); \
 	} while (0)
 
 static inline bool add_will_overflow(zend_long a, zend_long b) {
@@ -236,7 +236,7 @@ static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa,
 #endif
 
 recurse:;
-		FOR_EACH_VAR_USAGE_EX(var, CHECK_SCC_VAR);
+		FOR_EACH_VAR_USAGE_EX(var, use, p, CHECK_SCC_VAR);
 
 #ifdef SYM_RANGE
 		/* Process symbolic control-flow constraints */

From fd4a0dbcbd84091d4526001f6b07725a80db866c Mon Sep 17 00:00:00 2001
From: nielsdos <7771979+nielsdos@users.noreply.github.com>
Date: Fri, 26 May 2023 20:13:38 +0200
Subject: [PATCH 3/3] Allocate recursion_stack once

---
 Zend/Optimizer/zend_inference.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/Zend/Optimizer/zend_inference.c b/Zend/Optimizer/zend_inference.c
index 8ffe99f54043e..7061bb50f85e2 100644
--- a/Zend/Optimizer/zend_inference.c
+++ b/Zend/Optimizer/zend_inference.c
@@ -207,13 +207,8 @@ typedef struct {
 #endif
 } scc_recursion_entry;
 
-static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack) /* {{{ */
+static void zend_ssa_check_scc_var(const zend_op_array *op_array, zend_ssa *ssa, int var, int *index, int *dfs, int *root, zend_worklist_stack *stack, scc_recursion_entry *recursion_stack) /* {{{ */
 {
-	ALLOCA_FLAG(recursion_stack_use_heap);
-	/* Manual recursion stack that tracks to which variable the recursion should "return" to.
-	 * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again.
-	 * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */
-	scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap);
 	uint32_t recursion_stack_idx = 1;
 
 	dfs[var] = *index;
@@ -261,8 +256,6 @@ recurse:;
 			zend_worklist_stack_push(stack, var);
 		}
 	} while (recursion_stack_idx > 0);
-
-	free_alloca(recursion_stack, recursion_stack_use_heap);
 }
 /* }}} */
 
@@ -274,6 +267,12 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /*
 	ALLOCA_FLAG(dfs_use_heap)
 	ALLOCA_FLAG(root_use_heap)
 	ALLOCA_FLAG(stack_use_heap)
+	ALLOCA_FLAG(recursion_stack_use_heap)
+
+	/* Manual recursion stack that tracks to which variable the recursion should "return" to.
+	 * The number of elements cannot be greater than the variable count because every time a variable is visited it is marked and won't be visited again.
+	 * Each recursion consists of three/four stack elements: (variable, current use chain iterator, current phi node iterator, optionally current sym use chain iterator). */
+	scc_recursion_entry *recursion_stack = do_alloca(sizeof(scc_recursion_entry) * ssa->vars_count, recursion_stack_use_heap);
 
 	dfs = do_alloca(sizeof(int) * ssa->vars_count, dfs_use_heap);
 	memset(dfs, -1, sizeof(int) * ssa->vars_count);
@@ -283,7 +282,7 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /*
 	/* Find SCCs using Tarjan's algorithm. */
 	for (j = 0; j < ssa->vars_count; j++) {
 		if (!ssa->vars[j].no_val && dfs[j] < 0) {
-			zend_ssa_check_scc_var(op_array, ssa, j, &index, dfs, root, &stack);
+			zend_ssa_check_scc_var(op_array, ssa, j, &index, dfs, root, &stack, recursion_stack);
 		}
 	}
 
@@ -307,6 +306,7 @@ ZEND_API int zend_ssa_find_sccs(const zend_op_array *op_array, zend_ssa *ssa) /*
 	ZEND_WORKLIST_STACK_FREE_ALLOCA(&stack, stack_use_heap);
 	free_alloca(root, root_use_heap);
 	free_alloca(dfs, dfs_use_heap);
+	free_alloca(recursion_stack, recursion_stack_use_heap);
 
 	return SUCCESS;
 }