Skip to content
This repository has been archived by the owner on Jun 26, 2020. It is now read-only.

Commit

Permalink
Merge pull request #274 from ckeditor/t/269
Browse files Browse the repository at this point in the history
Other: Optimized `diff()` function to use `fastDiff()` function internally for large data sets. Closes #269.
  • Loading branch information
Reinmar authored Feb 13, 2019
2 parents 54b8108 + a99c734 commit ee9bed0
Show file tree
Hide file tree
Showing 10 changed files with 792 additions and 214 deletions.
21 changes: 20 additions & 1 deletion src/diff.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
* @module utils/diff
*/

import fastDiff from '../src/fastdiff';

// The following code is based on the "O(NP) Sequence Comparison Algorithm"
// by Sun Wu, Udi Manber, Gene Myers, Webb Miller.

Expand All @@ -16,6 +18,11 @@
*
* diff( 'aba', 'acca' ); // [ 'equal', 'insert', 'insert', 'delete', 'equal' ]
*
* This function is based on the "O(NP) Sequence Comparison Algorithm" by Sun Wu, Udi Manber, Gene Myers, Webb Miller.
* Unfortunately, while it gives the most precise results, its to complex for longer strings/arrow (above 200 items).
* Therefore, `diff()` automatically switches to {@link module:utils/fastdiff~fastDiff `fastDiff()`} when detecting
* such a scenario. The return formats of both functions are identical.
*
* @param {Array|String} a Input array or string.
* @param {Array|String} b Output array or string.
* @param {Function} [cmp] Optional function used to compare array values, by default === is used.
Expand All @@ -27,11 +34,19 @@ export default function diff( a, b, cmp ) {
return a === b;
};

const aLength = a.length;
const bLength = b.length;

// Perform `fastDiff` for longer strings/arrays (see #269).
if ( aLength > 200 || bLength > 200 || aLength + bLength > 300 ) {
return diff.fastDiff( a, b, cmp, true );
}

// Temporary action type statics.
let _insert, _delete;

// Swapped the arrays to use the shorter one as the first one.
if ( b.length < a.length ) {
if ( bLength < aLength ) {
const tmp = a;

a = b;
Expand Down Expand Up @@ -117,3 +132,7 @@ export default function diff( a, b, cmp ) {
// We remove the first item that represents the action for the injected nulls.
return es[ delta ].slice( 1 );
}

// Store the API in static property to easily overwrite it in tests.
// Too bad dependency injection does not work in Webpack + ES 6 (const) + Babel.
diff.fastDiff = fastDiff;
225 changes: 160 additions & 65 deletions src/fastdiff.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
*/

/**
* Finds position of the first and last change in the given strings and generates set of changes. Set of changes
* can be applied to the input text in order to transform it into the output text, for example:
* Finds positions of the first and last change in the given string/array and generates a set of changes:
*
* fastDiff( '12a', '12xyza' );
* // [ { index: 2, type: 'insert', values: [ 'x', 'y', 'z' ] } ]
Expand All @@ -20,17 +19,25 @@
* fastDiff( '12xyza', '12a' );
* // [ { index: 2, type: 'delete', howMany: 3 } ]
*
* fastDiff( '12aa', '12a' );
* fastDiff( [ '1', '2', 'a', 'a' ], [ '1', '2', 'a' ] );
* // [ { index: 3, type: 'delete', howMany: 1 } ]
*
* fastDiff( '12abc3', '2ab' );
* fastDiff( [ '1', '2', 'a', 'b', 'c', '3' ], [ '2', 'a', 'b' ] );
* // [ { index: 0, type: 'insert', values: [ '2', 'a', 'b' ] }, { index: 3, type: 'delete', howMany: 6 } ]
*
* Using returned results you can modify `oldText` to transform it into `newText`:
* Passed arrays can contain any type of data, however to compare them correctly custom comparator function
* should be passed as a third parameter:
*
* let input = '12abc3';
* const output = '2ab';
* const changes = fastDiff( input, output );
* fastDiff( [ { value: 1 }, { value: 2 } ], [ { value: 1 }, { value: 3 } ], ( a, b ) => {
* return a.value === b.value;
* } );
* // [ { index: 1, type: 'insert', values: [ { value: 3 } ] }, { index: 2, type: 'delete', howMany: 1 } ]
*
* The resulted set of changes can be applied to the input in order to transform it into the output, for example:
*
* let input = '12abc3';
* const output = '2ab';
* const changes = fastDiff( input, output );
*
* changes.forEach( change => {
* if ( change.type == 'insert' ) {
Expand All @@ -40,101 +47,156 @@
* }
* } );
*
* input === output; // -> true
* // input equals output now
*
* or in case of arrays:
*
* let input = [ '1', '2', 'a', 'b', 'c', '3' ];
* const output = [ '2', 'a', 'b' ];
* const changes = fastDiff( input, output );
*
* changes.forEach( change => {
* if ( change.type == 'insert' ) {
* input = input.slice( 0, change.index ).concat( change.values, input.slice( change.index ) );
* } else if ( change.type == 'delete' ) {
* input = input.slice( 0, change.index ).concat( input.slice( change.index + change.howMany ) );
* }
* } );
*
* // input equals output now
*
* The output format of this function is compatible with {@link module:utils/difftochanges~diffToChanges} output format.
* By passing `true` as the fourth parameter (`atomicChanges`) the output of this function will become compatible with
* the {@link module:utils/diff~diff `diff()`} function:
*
* @param {String} oldText Input string.
* @param {String} newText Input string.
* fastDiff( '12a', '12xyza' );
* // [ 'equal', 'equal', 'insert', 'insert', 'insert', 'equal' ]
*
* The default output format of this function is compatible with the output format of
* {@link module:utils/difftochanges~diffToChanges `diffToChanges()`}. The `diffToChanges()` input format is, in turn,
* compatible with the output of {@link module:utils/diff~diff `diff()`}:
*
* const a = '1234';
* const b = '12xyz34';
*
* // Both calls will return the same results (grouped changes format).
* fastDiff( a, b );
* diffToChanges( diff( a, b ) );
*
* // Again, both calls will return the same results (atomic changes format).
* fastDiff( a, b, null, true );
* diff( a, b );
*
*
* @param {Array|String} a Input array or string.
* @param {Array|String} b Input array or string.
* @param {Function} [cmp] Optional function used to compare array values, by default `===` (strict equal operator) is used.
* @param {Boolean} [atomicChanges=false] Whether an array of `inset|delete|equal` operations should
* be returned instead of changes set. This makes this function compatible with {@link module:utils/diff~diff `diff()`}.
* @returns {Array} Array of changes.
*/
export default function fastDiff( oldText, newText ) {
// Check if both texts are equal.
if ( oldText === newText ) {
return [];
export default function fastDiff( a, b, cmp, atomicChanges = false ) {
// Set the comparator function.
cmp = cmp || function( a, b ) {
return a === b;
};

// Transform text or any iterable into arrays for easier, consistent processing.
if ( !Array.isArray( a ) ) {
a = Array.from( a );
}

if ( !Array.isArray( b ) ) {
b = Array.from( b );
}

const changeIndexes = findChangeBoundaryIndexes( oldText, newText );
// Find first and last change.
const changeIndexes = findChangeBoundaryIndexes( a, b, cmp );

return changeIndexesToChanges( newText, changeIndexes );
// Transform into changes array.
return atomicChanges ? changeIndexesToAtomicChanges( changeIndexes, b.length ) : changeIndexesToChanges( b, changeIndexes );
}

// Finds position of the first and last change in the given strings. For example:
// Finds position of the first and last change in the given arrays. For example:
//
// const indexes = findChangeBoundaryIndexes( '1234', '13424' );
// const indexes = findChangeBoundaryIndexes( [ '1', '2', '3', '4' ], [ '1', '3', '4', '2', '4' ] );
// console.log( indexes ); // { firstIndex: 1, lastIndexOld: 3, lastIndexNew: 4 }
//
// The above indexes means that in `oldText` modified part is `1[23]4` and in the `newText` it is `1[342]4`.
// Based on such indexes, array with `insert`/`delete` operations which allows transforming
// old text to the new one can be generated.
//
// It is expected that `oldText` and `newText` are different.
// The above indexes means that in the first array the modified part is `1[23]4` and in the second array it is `1[342]4`.
// Based on such indexes, array with `insert`/`delete` operations which allows transforming first value into the second one
// can be generated.
//
// @param {String} oldText
// @param {String} newText
// @param {Array} arr1
// @param {Array} arr2
// @param {Function} cmp Comparator function.
// @returns {Object}
// @returns {Number} return.firstIndex Index of the first change in both strings (always the same for both).
// @returns {Number} result.lastIndexOld Index of the last common character in `oldText` string.
// @returns {Number} result.lastIndexNew Index of the last common character in `newText` string.
function findChangeBoundaryIndexes( oldText, newText ) {
// Find the first difference between texts.
const firstIndex = findFirstDifferenceIndex( oldText, newText );

// Remove the common part of texts and reverse them to make it simpler to find the last difference between texts.
const oldTextReversed = cutAndReverse( oldText, firstIndex );
const newTextReversed = cutAndReverse( newText, firstIndex );

// Find the first difference between reversed texts.
// It should be treated as "how many characters from the end the last difference occurred".
// @returns {Number} return.firstIndex Index of the first change in both values (always the same for both).
// @returns {Number} result.lastIndexOld Index of the last common value in `arr1`.
// @returns {Number} result.lastIndexNew Index of the last common value in `arr2`.
function findChangeBoundaryIndexes( arr1, arr2, cmp ) {
// Find the first difference between passed values.
const firstIndex = findFirstDifferenceIndex( arr1, arr2, cmp );

// If arrays are equal return -1 indexes object.
if ( firstIndex === -1 ) {
return { firstIndex: -1, lastIndexOld: -1, lastIndexNew: -1 };
}

// Remove the common part of each value and reverse them to make it simpler to find the last difference between them.
const oldArrayReversed = cutAndReverse( arr1, firstIndex );
const newArrayReversed = cutAndReverse( arr2, firstIndex );

// Find the first difference between reversed values.
// It should be treated as "how many elements from the end the last difference occurred".
//
// For example:
//
// initial -> after cut -> reversed:
// oldText: '321ba' -> '21ba' -> 'ab12'
// newText: '31xba' -> '1xba' -> 'abx1'
// lastIndex: -> 2
// initial -> after cut -> reversed:
// oldValue: '321ba' -> '21ba' -> 'ab12'
// newValue: '31xba' -> '1xba' -> 'abx1'
// lastIndex: -> 2
//
// So the last change occurred two characters from the end of the texts.
const lastIndex = findFirstDifferenceIndex( oldTextReversed, newTextReversed );
// So the last change occurred two characters from the end of the arrays.
const lastIndex = findFirstDifferenceIndex( oldArrayReversed, newArrayReversed, cmp );

// Use `lastIndex` to calculate proper offset, starting from the beginning (`lastIndex` kind of starts from the end).
const lastIndexOld = oldText.length - lastIndex;
const lastIndexNew = newText.length - lastIndex;
const lastIndexOld = arr1.length - lastIndex;
const lastIndexNew = arr2.length - lastIndex;

return { firstIndex, lastIndexOld, lastIndexNew };
}

// Returns a first index on which `oldText` and `newText` differ.
// Returns a first index on which given arrays differ. If both arrays are the same, -1 is returned.
//
// @param {String} oldText
// @param {String} newText
// @param {Array} arr1
// @param {Array} arr2
// @param {Function} cmp Comparator function.
// @returns {Number}
function findFirstDifferenceIndex( oldText, newText ) {
for ( let i = 0; i < Math.max( oldText.length, newText.length ); i++ ) {
if ( oldText[ i ] !== newText[ i ] ) {
function findFirstDifferenceIndex( arr1, arr2, cmp ) {
for ( let i = 0; i < Math.max( arr1.length, arr2.length ); i++ ) {
if ( arr1[ i ] === undefined || arr2[ i ] === undefined || !cmp( arr1[ i ], arr2[ i ] ) ) {
return i;
}
}
// No "backup" return cause we assume that `oldText` and `newText` differ. This means that they either have a
// difference or they have a different lengths. This means that the `if` condition will always be met eventually.

return -1; // Return -1 if arrays are equal.
}

// Removes `howMany` characters from the given `text` string starting from the beginning, then reverses and returns it.
// Returns a copy of the given array with `howMany` elements removed starting from the beginning and in reversed order.
//
// @param {String} text Text to be processed.
// @param {Number} howMany How many characters from text beginning to cut.
// @returns {String} Shortened and reversed text.
function cutAndReverse( text, howMany ) {
return text.substring( howMany ).split( '' ).reverse().join( '' );
// @param {Array} arr Array to be processed.
// @param {Number} howMany How many elements from array beginning to remove.
// @returns {Array} Shortened and reversed array.
function cutAndReverse( arr, howMany ) {
return arr.slice( howMany ).reverse();
}

// Generates changes array based on change indexes from `findChangeBoundaryIndexes` function. This function will
// generate array with 0 (no changes), 1 (deletion or insertion) or 2 records (insertion and deletion).
//
// @param {String} newText New text for which change indexes were calculated.
// @param {Array} newArray New array for which change indexes were calculated.
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function.
// @returns {Array.<Object>} Array of changes compatible with {@link module:utils/difftochanges~diffToChanges} format.
function changeIndexesToChanges( newText, changeIndexes ) {
function changeIndexesToChanges( newArray, changeIndexes ) {
const result = [];
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes;

Expand All @@ -145,7 +207,7 @@ function changeIndexesToChanges( newText, changeIndexes ) {
result.push( {
index: firstIndex,
type: 'insert',
values: newText.substring( firstIndex, lastIndexNew ).split( '' )
values: newArray.slice( firstIndex, lastIndexNew )
} );
}

Expand All @@ -159,3 +221,36 @@ function changeIndexesToChanges( newText, changeIndexes ) {

return result;
}

// Generates array with set `equal|insert|delete` operations based on change indexes from `findChangeBoundaryIndexes` function.
//
// @param {Object} changeIndexes Change indexes object from `findChangeBoundaryIndexes` function.
// @param {Number} newLength Length of the new array on which `findChangeBoundaryIndexes` calculated change indexes.
// @returns {Array.<String>} Array of changes compatible with {@link module:utils/diff~diff} format.
function changeIndexesToAtomicChanges( changeIndexes, newLength ) {
const { firstIndex, lastIndexOld, lastIndexNew } = changeIndexes;

// No changes.
if ( firstIndex === -1 ) {
return Array( newLength ).fill( 'equal' );
}

let result = [];
if ( firstIndex > 0 ) {
result = result.concat( Array( firstIndex ).fill( 'equal' ) );
}

if ( lastIndexNew - firstIndex > 0 ) {
result = result.concat( Array( lastIndexNew - firstIndex ).fill( 'insert' ) );
}

if ( lastIndexOld - firstIndex > 0 ) {
result = result.concat( Array( lastIndexOld - firstIndex ).fill( 'delete' ) );
}

if ( lastIndexNew < newLength ) {
result = result.concat( Array( newLength - lastIndexNew ).fill( 'equal' ) );
}

return result;
}
46 changes: 46 additions & 0 deletions tests/_utils-tests/longtext.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/**
* @license Copyright (c) 2003-2019, CKSource - Frederico Knabben. All rights reserved.
* For licensing, see LICENSE.md.
*/

import getLongText from '../../tests/_utils/longtext';

describe( 'utils', () => {
describe( 'getLongText', () => {
it( 'should return text with 0 length', () => {
expect( getLongText( 0 ).length ).to.equal( 0 );
} );

it( 'should return text with 553 length', () => {
expect( getLongText( 553 ).length ).to.equal( 553 );
} );

it( 'should return text with 1500 length', () => {
expect( getLongText( 1500 ).length ).to.equal( 1500 );
} );

it( 'should return text with 4000 length', () => {
expect( getLongText( 4000 ).length ).to.equal( 4000 );
} );

it( 'should return different text with fromStart=false', () => {
expect( getLongText( 100 ) ).to.not.equal( getLongText( 100, false ) );
} );

it( 'should return reversed text', () => {
const text1 = getLongText( 100 );
const text2 = getLongText( 100, true, true );

expect( text1 ).to.not.equal( text2 );
expect( text1 ).to.equal( text2.split( '' ).reverse().join( '' ) );
} );

it( 'should return reversed text (with fromStart=false)', () => {
const text1 = getLongText( 150, false );
const text2 = getLongText( 150, false, true );

expect( text1 ).to.not.equal( text2 );
expect( text1 ).to.equal( text2.split( '' ).reverse().join( '' ) );
} );
} );
} );
Loading

0 comments on commit ee9bed0

Please sign in to comment.