From 1d5c66bb5016fcd63dc1bcf901b4d7affdaa8e2f Mon Sep 17 00:00:00 2001 From: mimrock Date: Fri, 8 Aug 2014 16:45:57 +0200 Subject: [PATCH 1/3] Replace LCS algorithm to Hirschberg\'s that runs in linear space --- src/Differ.php | 97 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 39 deletions(-) diff --git a/src/Differ.php b/src/Differ.php index 0fb3f1ec..005c1114 100644 --- a/src/Differ.php +++ b/src/Differ.php @@ -248,56 +248,75 @@ public function diffToArray($from, $to) } /** - * Calculates the longest common subsequence of two arrays. + * Calculates the longest common subsequences of two strings. * - * @param array $from - * @param array $to + * The method uses Hirschberg's algorithm that runs in linear space and + * quadratic time. + * + * @param array $from + * @param array $to * @return array */ private function longestCommonSubsequence(array $from, array $to) { - $common = array(); - $matrix = array(); - $fromLength = count($from); - $toLength = count($to); + $cfrom = count($from); + $cto = count($to); - for ($i = 0; $i <= $fromLength; ++$i) { - $matrix[$i][0] = 0; + if ($cfrom == 0) { + return array(); } - - for ($j = 0; $j <= $toLength; ++$j) { - $matrix[0][$j] = 0; - } - - for ($i = 1; $i <= $fromLength; ++$i) { - for ($j = 1; $j <= $toLength; ++$j) { - $matrix[$i][$j] = max( - $matrix[$i-1][$j], - $matrix[$i][$j-1], - $from[$i-1] === $to[$j-1] ? $matrix[$i-1][$j-1] + 1 : 0 - ); + elseif ($cfrom == 1) { + if (in_array($from[0], $to)) { + return array($from[0]); } - } - - $i = $fromLength; - $j = $toLength; - - while ($i > 0 && $j > 0) { - if ($from[$i-1] === $to[$j-1]) { - array_unshift($common, $from[$i-1]); - --$i; - --$j; + else { + return array(); } - - else if ($matrix[$i][$j-1] > $matrix[$i-1][$j]) { - --$j; + } + else { + $i = (int)($cfrom / 2); + $fromStart = array_slice($from, 0, $i); + $fromEnd = array_slice($from, $i); + $llB = $this->longestCommonSubsequenceLength($fromStart, $to); + $llE = $this->longestCommonSubsequenceLength(array_reverse($fromEnd), array_reverse($to)); + $jmax = 0; + $max = 0; + for($j = 0; $j <= $cto; $j++) { + $m = $llB[$j] + $llE[$cto - $j]; + if ($m >= $max) { + $max = $m; + $jmax = $j; + } } + $toStart = array_slice($to, 0, $jmax); + $toEnd = array_slice($to, $jmax); + return array_merge($this->longestCommonSubsequence($fromStart, $toStart), $this->longestCommonSubsequence($fromEnd, $toEnd)); + } + } - else { - --$i; + /** + * Helper function for the LCS method. + * + * @param array $from + * @param array $to + * @return array + */ + private function longestCommonSubsequenceLength(array $from, array $to) + { + $curr = array_fill(0, count($to) + 1, 0); + $cfrom = count($from); + $cto = count($to); + for($i = 0; $i < $cfrom; $i++) { + $prev = $curr; + for ($j = 0; $j < $cto; $j++) { + if ($from[$i] == $to[$j]) { + $curr[$j + 1] = $prev[$j] + 1; + } + else { + $curr[$j + 1] = max($curr[$j], $prev[$j + 1]); + } } } - - return $common; + return $curr; } -} +} \ No newline at end of file From 9966c445a4fd9076bfb08240ea046c128fc53d43 Mon Sep 17 00:00:00 2001 From: mimrock Date: Fri, 8 Aug 2014 16:57:57 +0200 Subject: [PATCH 2/3] Add missing newline --- src/Differ.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Differ.php b/src/Differ.php index 005c1114..e934827f 100644 --- a/src/Differ.php +++ b/src/Differ.php @@ -319,4 +319,4 @@ private function longestCommonSubsequenceLength(array $from, array $to) } return $curr; } -} \ No newline at end of file +} From 21b29ce0f414d2714edf63efd195b56c2e67521b Mon Sep 17 00:00:00 2001 From: mimrock Date: Fri, 8 Aug 2014 17:13:11 +0200 Subject: [PATCH 3/3] Fix typo in docs --- src/Differ.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Differ.php b/src/Differ.php index e934827f..0fab1a35 100644 --- a/src/Differ.php +++ b/src/Differ.php @@ -248,7 +248,7 @@ public function diffToArray($from, $to) } /** - * Calculates the longest common subsequences of two strings. + * Calculates the longest common subsequence of two arrays. * * The method uses Hirschberg's algorithm that runs in linear space and * quadratic time.