254 lines
12 KiB
JavaScript
254 lines
12 KiB
JavaScript
export default class Diff {
|
|
diff(oldStr, newStr,
|
|
// Type below is not accurate/complete - see above for full possibilities - but it compiles
|
|
options = {}) {
|
|
let callback;
|
|
if (typeof options === 'function') {
|
|
callback = options;
|
|
options = {};
|
|
}
|
|
else if ('callback' in options) {
|
|
callback = options.callback;
|
|
}
|
|
// Allow subclasses to massage the input prior to running
|
|
const oldString = this.castInput(oldStr, options);
|
|
const newString = this.castInput(newStr, options);
|
|
const oldTokens = this.removeEmpty(this.tokenize(oldString, options));
|
|
const newTokens = this.removeEmpty(this.tokenize(newString, options));
|
|
return this.diffWithOptionsObj(oldTokens, newTokens, options, callback);
|
|
}
|
|
diffWithOptionsObj(oldTokens, newTokens, options, callback) {
|
|
var _a;
|
|
const done = (value) => {
|
|
value = this.postProcess(value, options);
|
|
if (callback) {
|
|
setTimeout(function () { callback(value); }, 0);
|
|
return undefined;
|
|
}
|
|
else {
|
|
return value;
|
|
}
|
|
};
|
|
const newLen = newTokens.length, oldLen = oldTokens.length;
|
|
let editLength = 1;
|
|
let maxEditLength = newLen + oldLen;
|
|
if (options.maxEditLength != null) {
|
|
maxEditLength = Math.min(maxEditLength, options.maxEditLength);
|
|
}
|
|
const maxExecutionTime = (_a = options.timeout) !== null && _a !== void 0 ? _a : Infinity;
|
|
const abortAfterTimestamp = Date.now() + maxExecutionTime;
|
|
const bestPath = [{ oldPos: -1, lastComponent: undefined }];
|
|
// Seed editLength = 0, i.e. the content starts with the same values
|
|
let newPos = this.extractCommon(bestPath[0], newTokens, oldTokens, 0, options);
|
|
if (bestPath[0].oldPos + 1 >= oldLen && newPos + 1 >= newLen) {
|
|
// Identity per the equality and tokenizer
|
|
return done(this.buildValues(bestPath[0].lastComponent, newTokens, oldTokens));
|
|
}
|
|
// Once we hit the right edge of the edit graph on some diagonal k, we can
|
|
// definitely reach the end of the edit graph in no more than k edits, so
|
|
// there's no point in considering any moves to diagonal k+1 any more (from
|
|
// which we're guaranteed to need at least k+1 more edits).
|
|
// Similarly, once we've reached the bottom of the edit graph, there's no
|
|
// point considering moves to lower diagonals.
|
|
// We record this fact by setting minDiagonalToConsider and
|
|
// maxDiagonalToConsider to some finite value once we've hit the edge of
|
|
// the edit graph.
|
|
// This optimization is not faithful to the original algorithm presented in
|
|
// Myers's paper, which instead pointlessly extends D-paths off the end of
|
|
// the edit graph - see page 7 of Myers's paper which notes this point
|
|
// explicitly and illustrates it with a diagram. This has major performance
|
|
// implications for some common scenarios. For instance, to compute a diff
|
|
// where the new text simply appends d characters on the end of the
|
|
// original text of length n, the true Myers algorithm will take O(n+d^2)
|
|
// time while this optimization needs only O(n+d) time.
|
|
let minDiagonalToConsider = -Infinity, maxDiagonalToConsider = Infinity;
|
|
// Main worker method. checks all permutations of a given edit length for acceptance.
|
|
const execEditLength = () => {
|
|
for (let diagonalPath = Math.max(minDiagonalToConsider, -editLength); diagonalPath <= Math.min(maxDiagonalToConsider, editLength); diagonalPath += 2) {
|
|
let basePath;
|
|
const removePath = bestPath[diagonalPath - 1], addPath = bestPath[diagonalPath + 1];
|
|
if (removePath) {
|
|
// No one else is going to attempt to use this value, clear it
|
|
// @ts-expect-error - perf optimisation. This type-violating value will never be read.
|
|
bestPath[diagonalPath - 1] = undefined;
|
|
}
|
|
let canAdd = false;
|
|
if (addPath) {
|
|
// what newPos will be after we do an insertion:
|
|
const addPathNewPos = addPath.oldPos - diagonalPath;
|
|
canAdd = addPath && 0 <= addPathNewPos && addPathNewPos < newLen;
|
|
}
|
|
const canRemove = removePath && removePath.oldPos + 1 < oldLen;
|
|
if (!canAdd && !canRemove) {
|
|
// If this path is a terminal then prune
|
|
// @ts-expect-error - perf optimisation. This type-violating value will never be read.
|
|
bestPath[diagonalPath] = undefined;
|
|
continue;
|
|
}
|
|
// Select the diagonal that we want to branch from. We select the prior
|
|
// path whose position in the old string is the farthest from the origin
|
|
// and does not pass the bounds of the diff graph
|
|
if (!canRemove || (canAdd && removePath.oldPos < addPath.oldPos)) {
|
|
basePath = this.addToPath(addPath, true, false, 0, options);
|
|
}
|
|
else {
|
|
basePath = this.addToPath(removePath, false, true, 1, options);
|
|
}
|
|
newPos = this.extractCommon(basePath, newTokens, oldTokens, diagonalPath, options);
|
|
if (basePath.oldPos + 1 >= oldLen && newPos + 1 >= newLen) {
|
|
// If we have hit the end of both strings, then we are done
|
|
return done(this.buildValues(basePath.lastComponent, newTokens, oldTokens)) || true;
|
|
}
|
|
else {
|
|
bestPath[diagonalPath] = basePath;
|
|
if (basePath.oldPos + 1 >= oldLen) {
|
|
maxDiagonalToConsider = Math.min(maxDiagonalToConsider, diagonalPath - 1);
|
|
}
|
|
if (newPos + 1 >= newLen) {
|
|
minDiagonalToConsider = Math.max(minDiagonalToConsider, diagonalPath + 1);
|
|
}
|
|
}
|
|
}
|
|
editLength++;
|
|
};
|
|
// Performs the length of edit iteration. Is a bit fugly as this has to support the
|
|
// sync and async mode which is never fun. Loops over execEditLength until a value
|
|
// is produced, or until the edit length exceeds options.maxEditLength (if given),
|
|
// in which case it will return undefined.
|
|
if (callback) {
|
|
(function exec() {
|
|
setTimeout(function () {
|
|
if (editLength > maxEditLength || Date.now() > abortAfterTimestamp) {
|
|
return callback(undefined);
|
|
}
|
|
if (!execEditLength()) {
|
|
exec();
|
|
}
|
|
}, 0);
|
|
}());
|
|
}
|
|
else {
|
|
while (editLength <= maxEditLength && Date.now() <= abortAfterTimestamp) {
|
|
const ret = execEditLength();
|
|
if (ret) {
|
|
return ret;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
addToPath(path, added, removed, oldPosInc, options) {
|
|
const last = path.lastComponent;
|
|
if (last && !options.oneChangePerToken && last.added === added && last.removed === removed) {
|
|
return {
|
|
oldPos: path.oldPos + oldPosInc,
|
|
lastComponent: { count: last.count + 1, added: added, removed: removed, previousComponent: last.previousComponent }
|
|
};
|
|
}
|
|
else {
|
|
return {
|
|
oldPos: path.oldPos + oldPosInc,
|
|
lastComponent: { count: 1, added: added, removed: removed, previousComponent: last }
|
|
};
|
|
}
|
|
}
|
|
extractCommon(basePath, newTokens, oldTokens, diagonalPath, options) {
|
|
const newLen = newTokens.length, oldLen = oldTokens.length;
|
|
let oldPos = basePath.oldPos, newPos = oldPos - diagonalPath, commonCount = 0;
|
|
while (newPos + 1 < newLen && oldPos + 1 < oldLen && this.equals(oldTokens[oldPos + 1], newTokens[newPos + 1], options)) {
|
|
newPos++;
|
|
oldPos++;
|
|
commonCount++;
|
|
if (options.oneChangePerToken) {
|
|
basePath.lastComponent = { count: 1, previousComponent: basePath.lastComponent, added: false, removed: false };
|
|
}
|
|
}
|
|
if (commonCount && !options.oneChangePerToken) {
|
|
basePath.lastComponent = { count: commonCount, previousComponent: basePath.lastComponent, added: false, removed: false };
|
|
}
|
|
basePath.oldPos = oldPos;
|
|
return newPos;
|
|
}
|
|
equals(left, right, options) {
|
|
if (options.comparator) {
|
|
return options.comparator(left, right);
|
|
}
|
|
else {
|
|
return left === right
|
|
|| (!!options.ignoreCase && left.toLowerCase() === right.toLowerCase());
|
|
}
|
|
}
|
|
removeEmpty(array) {
|
|
const ret = [];
|
|
for (let i = 0; i < array.length; i++) {
|
|
if (array[i]) {
|
|
ret.push(array[i]);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
castInput(value, options) {
|
|
return value;
|
|
}
|
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
tokenize(value, options) {
|
|
return Array.from(value);
|
|
}
|
|
join(chars) {
|
|
// Assumes ValueT is string, which is the case for most subclasses.
|
|
// When it's false, e.g. in diffArrays, this method needs to be overridden (e.g. with a no-op)
|
|
// Yes, the casts are verbose and ugly, because this pattern - of having the base class SORT OF
|
|
// assume tokens and values are strings, but not completely - is weird and janky.
|
|
return chars.join('');
|
|
}
|
|
postProcess(changeObjects,
|
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
options) {
|
|
return changeObjects;
|
|
}
|
|
get useLongestToken() {
|
|
return false;
|
|
}
|
|
buildValues(lastComponent, newTokens, oldTokens) {
|
|
// First we convert our linked list of components in reverse order to an
|
|
// array in the right order:
|
|
const components = [];
|
|
let nextComponent;
|
|
while (lastComponent) {
|
|
components.push(lastComponent);
|
|
nextComponent = lastComponent.previousComponent;
|
|
delete lastComponent.previousComponent;
|
|
lastComponent = nextComponent;
|
|
}
|
|
components.reverse();
|
|
const componentLen = components.length;
|
|
let componentPos = 0, newPos = 0, oldPos = 0;
|
|
for (; componentPos < componentLen; componentPos++) {
|
|
const component = components[componentPos];
|
|
if (!component.removed) {
|
|
if (!component.added && this.useLongestToken) {
|
|
let value = newTokens.slice(newPos, newPos + component.count);
|
|
value = value.map(function (value, i) {
|
|
const oldValue = oldTokens[oldPos + i];
|
|
return oldValue.length > value.length ? oldValue : value;
|
|
});
|
|
component.value = this.join(value);
|
|
}
|
|
else {
|
|
component.value = this.join(newTokens.slice(newPos, newPos + component.count));
|
|
}
|
|
newPos += component.count;
|
|
// Common case
|
|
if (!component.added) {
|
|
oldPos += component.count;
|
|
}
|
|
}
|
|
else {
|
|
component.value = this.join(oldTokens.slice(oldPos, oldPos + component.count));
|
|
oldPos += component.count;
|
|
}
|
|
}
|
|
return components;
|
|
}
|
|
}
|