Skip to content

Commit fe261ae

Browse files
Prefer to order deletions before insertions when the edit cost is the same, like the Myers diff paper does (#439)
* Add test for Unix diff-style ordering of deletions & insertions * Tweak base.js to prefer extending paths that have previously done more deletions * Remove now-redundant code to flip the order of immediately consecutive add and remove operations * Fix existing tests * add release notes
1 parent 97c676d commit fe261ae

File tree

5 files changed

+28
-23
lines changed

5 files changed

+28
-23
lines changed

Diff for: release-notes.md

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
[Commits](https://github.com/kpdecker/jsdiff/compare/master...v6.0.0-staging)
66

77
- [#435](https://github.com/kpdecker/jsdiff/pull/435) Fix `parsePatch` handling of control characters. `parsePatch` used to interpret various unusual control characters - namely vertical tabs, form feeds, lone carriage returns without a line feed, and EBCDIC NELs - as line breaks when parsing a patch file. This was inconsistent with the behavior of both JsDiff's own `diffLines` method and also the Unix `diff` and `patch` utils, which all simply treat those control characters as ordinary characters. The result of this discrepancy was that some well-formed patches - produced either by `diff` or by JsDiff itself and handled properly by the `patch` util - would be wrongly parsed by `parsePatch`, with the effect that it would disregard the remainder of a hunk after encountering one of these control characters.
8+
- [#439](https://github.com/kpdecker/jsdiff/pull/439) Prefer diffs that order deletions before insertions. When faced with a choice between two diffs with an equal total edit distance, the Myers diff algorithm generally prefers one that does deletions before insertions rather than insertions before deletions. For instance, when diffing `abcd` against `acbd`, it will prefer a diff that says to delete the `b` and then insert a new `b` after the `c`, over a diff that says to insert a `c` before the `b` and then delete the existing `c`. JsDiff deviated from the published Myers algorithm in a way that led to it having the opposite preference in many cases, including that example. This is now fixed, meaning diffs output by JsDiff will more accurately reflect what the published Myers diff algorithm would output.
89

910
## Development
1011

Diff for: src/diff/base.js

+1-12
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ Diff.prototype = {
7171
// Select the diagonal that we want to branch from. We select the prior
7272
// path whose position in the old string is the farthest from the origin
7373
// and does not pass the bounds of the diff graph
74-
// TODO: Remove the `+ 1` here to make behavior match Myers algorithm
75-
// and prefer to order removals before insertions.
76-
if (!canRemove || (canAdd && removePath.oldPos + 1 < addPath.oldPos)) {
74+
if (!canRemove || (canAdd && removePath.oldPos < addPath.oldPos)) {
7775
basePath = self.addToPath(addPath, true, undefined, 0);
7876
} else {
7977
basePath = self.addToPath(removePath, undefined, true, 1);
@@ -223,15 +221,6 @@ function buildValues(diff, lastComponent, newString, oldString, useLongestToken)
223221
} else {
224222
component.value = diff.join(oldString.slice(oldPos, oldPos + component.count));
225223
oldPos += component.count;
226-
227-
// Reverse add and remove so removes are output first to match common convention
228-
// The diffing algorithm is tied to add then remove output and this is the simplest
229-
// route to get the desired output with minimal overhead.
230-
if (componentPos && components[componentPos - 1].added) {
231-
let tmp = components[componentPos - 1];
232-
components[componentPos - 1] = components[componentPos];
233-
components[componentPos] = tmp;
234-
}
235224
}
236225
}
237226

Diff for: test/diff/array.js

+9-9
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,27 @@ describe('diff/array', function() {
1010
console.log(diffResult);
1111
expect(diffResult).to.deep.equals([
1212
{count: 1, value: [a]},
13-
{count: 1, value: [c], removed: undefined, added: true},
14-
{count: 1, value: [b]},
15-
{count: 1, value: [c], removed: true, added: undefined}
13+
{count: 1, value: [b], removed: true, added: undefined},
14+
{count: 1, value: [c]},
15+
{count: 1, value: [b], removed: undefined, added: true}
1616
]);
1717
});
1818
it('should diff falsey values', function() {
1919
const a = false;
2020
const b = 0;
2121
const c = '';
2222
// Example sequences from Myers 1986
23-
const arrayA = [c, b, a, b, a, c];
24-
const arrayB = [a, b, c, a, b, b, a];
23+
const arrayA = [a, b, c, a, b, b, a];
24+
const arrayB = [c, b, a, b, a, c];
2525
const diffResult = diffArrays(arrayA, arrayB);
2626
expect(diffResult).to.deep.equals([
27-
{count: 2, value: [a, b], removed: undefined, added: true},
27+
{count: 2, value: [a, b], removed: true, added: undefined},
2828
{count: 1, value: [c]},
29-
{count: 1, value: [b], removed: true, added: undefined},
30-
{count: 2, value: [a, b]},
3129
{count: 1, value: [b], removed: undefined, added: true},
30+
{count: 2, value: [a, b]},
31+
{count: 1, value: [b], removed: true, added: undefined},
3232
{count: 1, value: [a]},
33-
{count: 1, value: [c], removed: true, added: undefined}
33+
{count: 1, value: [c], removed: undefined, added: true}
3434
]);
3535
});
3636
describe('anti-aliasing', function() {

Diff for: test/diff/line.js

+15
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ describe('diff/line', function() {
4747
expect(convertChangesToXML(diffResult)).to.equal('<del>line\n\nold value \n\nline</del>');
4848
});
4949

50+
it('Should prefer to do deletions before insertions, like Unix diff does', function() {
51+
const diffResult = diffLines('a\nb\nc\nd\n', 'a\nc\nb\nd\n');
52+
53+
// There are two possible diffs with equal edit distance here; either we
54+
// can delete the "b" and insert it again later, or we can insert a "c"
55+
// before the "b" and then delete the original "c" later.
56+
// For consistency with the convention of other diff tools, we want to
57+
// prefer the diff where we delete and then later insert over the one
58+
// where we insert and then later delete.
59+
expect(convertChangesToXML(diffResult)).to.equal('a\n<del>b\n</del>c\n<ins>b\n</ins>d\n');
60+
61+
const diffResult2 = diffLines('a\nc\nb\nd\n', 'a\nb\nc\nd\n');
62+
expect(convertChangesToXML(diffResult2)).to.equal('a\n<del>c\n</del>b\n<ins>c\n</ins>d\n');
63+
});
64+
5065
describe('given options.maxEditLength', function() {
5166
it('terminates early', function() {
5267
const diffResult = diffLines(

Diff for: test/diff/word.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ describe('WordDiff', function() {
181181

182182
it('should diff multiple whitespace values', function() {
183183
const diffResult = diffWordsWithSpace('New Value ', 'New ValueMoreData ');
184-
expect(convertChangesToXML(diffResult)).to.equal('New<ins> ValueMoreData</ins> <del>Value </del>');
184+
expect(convertChangesToXML(diffResult)).to.equal('New<del> Value</del> <ins>ValueMoreData </ins>');
185185
});
186186

187187
it('should inserts values in parenthesis', function() {
@@ -220,7 +220,7 @@ describe('WordDiff', function() {
220220

221221
it('should perform async operations', function(done) {
222222
diffWordsWithSpace('New Value ', 'New ValueMoreData ', function(err, diffResult) {
223-
expect(convertChangesToXML(diffResult)).to.equal('New<ins> ValueMoreData</ins> <del>Value </del>');
223+
expect(convertChangesToXML(diffResult)).to.equal('New<del> Value</del> <ins>ValueMoreData </ins>');
224224
done();
225225
});
226226
});

0 commit comments

Comments
 (0)