From 3ac9cb4c9e5611c9a64627d1f69424a895a12386 Mon Sep 17 00:00:00 2001 From: James Gould Date: Sun, 8 Feb 2015 23:00:46 -0700 Subject: [PATCH 1/4] Adding Diff Trimmed Lines I extended the LineDiff.tokenize function to add the ability to ignore leading and trailing spaces when using TrimmedLineDiff, and added diffTrimmedLines to the public-facing functions. My changes should not affect any previous functionality. --- diff.js | 18 +++++++++++++----- index.html | 1 + 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/diff.js b/diff.js index 84b3fa39..8bb65b88 100644 --- a/diff.js +++ b/diff.js @@ -15,6 +15,7 @@ * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.6927 */ (function(global, undefined) { + var JsDiff = (function() { /*jshint maxparams: 5*/ /*istanbul ignore next*/ @@ -114,8 +115,8 @@ return done([{ value: newString, added: true }]); } - newString = this.tokenize(newString); - oldString = this.tokenize(oldString); + newString = this.tokenize(newString, !!self.ignoreTrim); + oldString = this.tokenize(oldString, !!self.ignoreTrim); var newLen = newString.length, oldLen = oldString.length; var maxEditLength = newLen + oldLen; @@ -174,7 +175,7 @@ editLength++; } - // Performs the length of edit iteration. Is a bit fugly as this has to support the + // Performs the length of edit iteration. Is a bit fugly as this has to support the // sync and async mode which is never fun. Loops over execEditLength until a value // is produced. var editLength = 1; @@ -256,7 +257,11 @@ }; var LineDiff = new Diff(); - LineDiff.tokenize = function(value) { + + var TrimmedLineDiff = new Diff(); + TrimmedLineDiff.ignoreTrim = true; + + LineDiff.tokenize = TrimmedLineDiff.tokenize = function(value, ignoreTrim) { var retLines = [], lines = value.split(/^/m); @@ -268,13 +273,14 @@ if (line === '\n' && lastLine && lastLine[lastLine.length - 1] === '\r') { retLines[retLines.length - 1] += '\n'; } else if (line) { - retLines.push(line); + retLines.push(ignoreTrim ? line.trim() + '\n' : line); } } return retLines; }; + var SentenceDiff = new Diff(); SentenceDiff.tokenize = function (value) { return removeEmpty(value.split(/(\S.+?[.!?])(?=\s+|$)/)); @@ -344,6 +350,8 @@ diffWords: function(oldStr, newStr, callback) { return WordDiff.diff(oldStr, newStr, callback); }, diffWordsWithSpace: function(oldStr, newStr, callback) { return WordWithSpaceDiff.diff(oldStr, newStr, callback); }, diffLines: function(oldStr, newStr, callback) { return LineDiff.diff(oldStr, newStr, callback); }, + diffTrimmedLines: function(oldStr, newStr, callback) { return TrimmedLineDiff.diff(oldStr, newStr, callback); }, + diffSentences: function(oldStr, newStr, callback) { return SentenceDiff.diff(oldStr, newStr, callback); }, diffCss: function(oldStr, newStr, callback) { return CssDiff.diff(oldStr, newStr, callback); }, diff --git a/index.html b/index.html index 33bb403b..a6dff732 100644 --- a/index.html +++ b/index.html @@ -11,6 +11,7 @@

Diff

+ From 1892d7796392dd269542d3e29760769661d5db55 Mon Sep 17 00:00:00 2001 From: James Gould Date: Tue, 10 Feb 2015 21:07:23 -0700 Subject: [PATCH 2/4] Documentation and tests for TrimmedLineDiff Refactoring to avoid changing tokenize API, adding documentation to README.md, and adding 4 tests for TrimmedLineDiff to diffTest.js. I also found a bug in how TrimmedLineDiff handled windows new lines while adding the test, as well as changed TrimmedLineDiff so it doesn't add a newline character ('\n') if it is the last line. --- README.md | 5 ++++- diff.js | 26 +++++++++++++++++++------- test/diffTest.js | 30 ++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 9566b129..1eb2d3d0 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,10 @@ or Returns a list of change objects (See below). +* `JsDiff.TrimmedLineDiff(oldStr, newStr[, callback])` - diffs two blocks of text, comparing line by line, ignoring leading and trailing whitespace. + + Returns a list of change objects (See below). + * `JsDiff.diffSentences(oldStr, newStr[, callback])` - diffs two blocks of text, comparing sentence by sentence. Returns a list of change objects (See below). @@ -166,4 +170,3 @@ OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. [![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/kpdecker/jsdiff/trend.png)](https://bitdeli.com/free "Bitdeli Badge") - diff --git a/diff.js b/diff.js index 8bb65b88..7c5572dd 100644 --- a/diff.js +++ b/diff.js @@ -115,8 +115,8 @@ return done([{ value: newString, added: true }]); } - newString = this.tokenize(newString, !!self.ignoreTrim); - oldString = this.tokenize(oldString, !!self.ignoreTrim); + newString = this.tokenize(newString); + oldString = this.tokenize(oldString); var newLen = newString.length, oldLen = oldString.length; var maxEditLength = newLen + oldLen; @@ -261,19 +261,31 @@ var TrimmedLineDiff = new Diff(); TrimmedLineDiff.ignoreTrim = true; - LineDiff.tokenize = TrimmedLineDiff.tokenize = function(value, ignoreTrim) { + LineDiff.tokenize = TrimmedLineDiff.tokenize = function(value) { var retLines = [], lines = value.split(/^/m); - for(var i = 0; i < lines.length; i++) { var line = lines[i], lastLine = lines[i - 1]; // Merge lines that may contain windows new lines - if (line === '\n' && lastLine && lastLine[lastLine.length - 1] === '\r') { - retLines[retLines.length - 1] += '\n'; + if (line === '\n' && lastLine && lastLine[lastLine.length - 1]) { + if(this.ignoreTrim){ + retLines[retLines.length - 1] = retLines[retLines.length - 1].slice(0,-1) + '\r\n'; + } + else{ + retLines[retLines.length - 1] += '\n'; + } } else if (line) { - retLines.push(ignoreTrim ? line.trim() + '\n' : line); + if(this.ignoreTrim){ + + line = line.trim(); + //add a newline unless this is the last line. + if(!(i + 1 === lines.length)){ + line += '\n'; + } + } + retLines.push(line); } } diff --git a/test/diffTest.js b/test/diffTest.js index cfc01f37..e36244c8 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -200,6 +200,36 @@ describe('#diffLines', function() { }); }); +// Line Diff +describe('#TrimmedLineDiff', function() { + it('should diff lines', function() { + var diffResult = diff.diffTrimmedLines( + 'line\nold value\nline', + 'line\nnew value\nline'); + diff.convertChangesToXML(diffResult).should.equal('line\nnew value\nold value\nline'); + }); + it('should the same lines in diff', function() { + var diffResult = diff.diffTrimmedLines( + 'line\nvalue\nline', + 'line\nvalue\nline'); + diff.convertChangesToXML(diffResult).should.equal('line\nvalue\nline'); + }); + + it('should ignore shorespace', function() { + var diffResult = diff.diffTrimmedLines( + 'line\nvalue \nline', + 'line\nvalue\nline'); + diff.convertChangesToXML(diffResult).should.equal('line\nvalue\nline'); + }); + + it('should handle windows line endings', function() { + var diffResult = diff.diffTrimmedLines( + 'line\r\nold value \r\nline', + 'line\r\nnew value\r\nline'); + diff.convertChangesToXML(diffResult).should.equal('line\r\nnew value\r\nold value\r\nline'); + }); +}); + describe('#diffJson', function() { it('should accept objects', function() { diff.diffJson( From b8cd87f7531818d7d5f3f54941bd95855b25b474 Mon Sep 17 00:00:00 2001 From: James Gould Date: Thu, 12 Feb 2015 10:00:47 -0700 Subject: [PATCH 3/4] Fixing windows new line merge and readme I updated the Readme with the Function rather than the Object name. In the windows new line merge, I added a check for '\n', as well as added a comment. The reason I've extended the windows new line merge functionality is to handle any situations where '\n\n' may come up. We want '\r\n' instead. --- README.md | 2 +- diff.js | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1eb2d3d0..0f62251f 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ or Returns a list of change objects (See below). -* `JsDiff.TrimmedLineDiff(oldStr, newStr[, callback])` - diffs two blocks of text, comparing line by line, ignoring leading and trailing whitespace. +* `JsDiff.diffTrimmedLines(oldStr, newStr[, callback])` - diffs two blocks of text, comparing line by line, ignoring leading and trailing whitespace. Returns a list of change objects (See below). diff --git a/diff.js b/diff.js index 7c5572dd..bc72c692 100644 --- a/diff.js +++ b/diff.js @@ -266,11 +266,14 @@ lines = value.split(/^/m); for(var i = 0; i < lines.length; i++) { var line = lines[i], - lastLine = lines[i - 1]; + lastLine = lines[i - 1], + lastLineLastChar = lastLine ? lastLine[lastLine.length - 1] : ''; // Merge lines that may contain windows new lines - if (line === '\n' && lastLine && lastLine[lastLine.length - 1]) { - if(this.ignoreTrim){ + if (line === '\n' && lastLine && + (lastLineLastChar === '\r' || lastLineLastChar === '\n')) { + if(this.ignoreTrim || lastLineLastChar === '\n'){ + //to avoid merging to \n\n, remove \n and add \r\n. retLines[retLines.length - 1] = retLines[retLines.length - 1].slice(0,-1) + '\r\n'; } else{ @@ -278,7 +281,6 @@ } } else if (line) { if(this.ignoreTrim){ - line = line.trim(); //add a newline unless this is the last line. if(!(i + 1 === lines.length)){ From cb0c827ecd69db514970b0f3dcc72119019cef02 Mon Sep 17 00:00:00 2001 From: James Gould Date: Wed, 18 Feb 2015 20:36:40 -0700 Subject: [PATCH 4/4] Style, nitpick, and word choice Fixing everything discussed in the pull request. Also noticed the comment for the Trimmed Line Diff tests just said Line Diff, and fixed if ( style lower in diff.js --- diff.js | 26 ++++++++++++-------------- test/diffTest.js | 6 +++--- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/diff.js b/diff.js index bc72c692..8f9426fa 100644 --- a/diff.js +++ b/diff.js @@ -270,20 +270,18 @@ lastLineLastChar = lastLine ? lastLine[lastLine.length - 1] : ''; // Merge lines that may contain windows new lines - if (line === '\n' && lastLine && - (lastLineLastChar === '\r' || lastLineLastChar === '\n')) { - if(this.ignoreTrim || lastLineLastChar === '\n'){ + if (line === '\n' && (lastLineLastChar === '\r' || lastLineLastChar === '\n')) { + if (this.ignoreTrim || lastLineLastChar === '\n'){ //to avoid merging to \n\n, remove \n and add \r\n. retLines[retLines.length - 1] = retLines[retLines.length - 1].slice(0,-1) + '\r\n'; - } - else{ + } else { retLines[retLines.length - 1] += '\n'; } } else if (line) { - if(this.ignoreTrim){ + if (this.ignoreTrim) { line = line.trim(); //add a newline unless this is the last line. - if(!(i + 1 === lines.length)){ + if (i < lines.length - 1) { line += '\n'; } } @@ -469,7 +467,7 @@ addEOFNL = false; for (var i = (diffstr[0][0]==='I'?4:0); i < diffstr.length; i++) { - if(diffstr[i][0] === '@') { + if (diffstr[i][0] === '@') { var meh = diffstr[i].split(/@@ -(\d+),(\d+) \+(\d+),(\d+) @@/); diff.unshift({ start:meh[3], @@ -478,17 +476,17 @@ newlength:meh[4], newlines:[] }); - } else if(diffstr[i][0] === '+') { + } else if (diffstr[i][0] === '+') { diff[0].newlines.push(diffstr[i].substr(1)); - } else if(diffstr[i][0] === '-') { + } else if (diffstr[i][0] === '-') { diff[0].oldlines.push(diffstr[i].substr(1)); - } else if(diffstr[i][0] === ' ') { + } else if (diffstr[i][0] === ' ') { diff[0].newlines.push(diffstr[i].substr(1)); diff[0].oldlines.push(diffstr[i].substr(1)); - } else if(diffstr[i][0] === '\\') { + } else if (diffstr[i][0] === '\\') { if (diffstr[i-1][0] === '+') { remEOFNL = true; - } else if(diffstr[i-1][0] === '-') { + } else if (diffstr[i-1][0] === '-') { addEOFNL = true; } } @@ -498,7 +496,7 @@ for (var i = diff.length - 1; i >= 0; i--) { var d = diff[i]; for (var j = 0; j < d.oldlength; j++) { - if(str[d.start-1+j] !== d.oldlines[j]) { + if (str[d.start-1+j] !== d.oldlines[j]) { return false; } } diff --git a/test/diffTest.js b/test/diffTest.js index e36244c8..f52d2852 100644 --- a/test/diffTest.js +++ b/test/diffTest.js @@ -185,7 +185,7 @@ describe('#diffLines', function() { diff.convertChangesToXML(diffResult).should.equal('line\nvalue\nline'); }); - it('should handle shorespace', function() { + it('should handle leading and trailing whitespace', function() { var diffResult = diff.diffLines( 'line\nvalue \nline', 'line\nvalue\nline'); @@ -200,7 +200,7 @@ describe('#diffLines', function() { }); }); -// Line Diff +// Trimmed Line Diff describe('#TrimmedLineDiff', function() { it('should diff lines', function() { var diffResult = diff.diffTrimmedLines( @@ -215,7 +215,7 @@ describe('#TrimmedLineDiff', function() { diff.convertChangesToXML(diffResult).should.equal('line\nvalue\nline'); }); - it('should ignore shorespace', function() { + it('should ignore leading and trailing whitespace', function() { var diffResult = diff.diffTrimmedLines( 'line\nvalue \nline', 'line\nvalue\nline');