/* * Copyright 2008 Google Inc. All Rights Reserved. * Author: [email protected] (Neil Fraser) * Author: [email protected] (Matthaeus G. Chajdas) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Diff Match and Patch * http://code.google.com/p/google-diff-match-patch/ */ using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Web; namespace DiffMatchPatch { internal static class CompatibilityExtensions { // JScript splice function public static List Splice(this List input, int start, int count, params T[] objects) { List deletedRange = input.GetRange(start, count); input.RemoveRange(start, count); input.InsertRange(start, objects); return deletedRange; } // Java substring function public static string JavaSubstring(this string s, int begin, int end) { return s.Substring(begin, end - begin); } } /**- * The data structure representing a diff is a List of Diff objects: * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), * Diff(Operation.EQUAL, " world.")} * which means: delete "Hello", add "Goodbye" and keep " world." */ public enum Operation { DELETE, INSERT, EQUAL } /** * Class representing one diff operation. */ public class Diff { public Operation operation; // One of: INSERT, DELETE or EQUAL. public string text; // The text associated with this diff operation. /** * Constructor. Initializes the diff with the provided values. * @param operation One of INSERT, DELETE or EQUAL. * @param text The text being applied. */ public Diff(Operation operation, string text) { // Construct a diff with the specified operation and text. this.operation = operation; this.text = text; } /** * Display a human-readable version of this Diff. * @return text version. */ public override string ToString() { string prettyText = this.text.Replace('\n', '\u00b6'); return "Diff(" + this.operation + ",\"" + prettyText + "\")"; } /** * Is this Diff equivalent to another Diff? * @param d Another Diff to compare against. * @return true or false. */ public override bool Equals(Object obj) { // If parameter is null return false. if (obj == null) { return false; } // If parameter cannot be cast to Diff return false. Diff p = obj as Diff; if ((System.Object)p == null) { return false; } // Return true if the fields match. return p.operation == this.operation && p.text == this.text; } public bool Equals(Diff obj) { // If parameter is null return false. if (obj == null) { return false; } // Return true if the fields match. return obj.operation == this.operation && obj.text == this.text; } public override int GetHashCode() { return text.GetHashCode() ^ operation.GetHashCode(); } } /** * Class representing one patch operation. */ public class Patch { public List diffs = new List(); public int start1; public int start2; public int length1; public int length2; /** * Emmulate GNU diff's format. * Header: @@ -382,8 +481,9 @@ * Indicies are printed as 1-based, not 0-based. * @return The GNU diff string. */ public override string ToString() { string coords1, coords2; if (this.length1 == 0) { coords1 = this.start1 + ",0"; } else if (this.length1 == 1) { coords1 = Convert.ToString(this.start1 + 1); } else { coords1 = (this.start1 + 1) + "," + this.length1; } if (this.length2 == 0) { coords2 = this.start2 + ",0"; } else if (this.length2 == 1) { coords2 = Convert.ToString(this.start2 + 1); } else { coords2 = (this.start2 + 1) + "," + this.length2; } StringBuilder text = new StringBuilder(); text.Append("@@ -").Append(coords1).Append(" +").Append(coords2) .Append(" @@\n"); // Escape the body of the patch with %xx notation. foreach (Diff aDiff in this.diffs) { switch (aDiff.operation) { case Operation.INSERT: text.Append('+'); break; case Operation.DELETE: text.Append('-'); break; case Operation.EQUAL: text.Append(' '); break; } text.Append(HttpUtility.UrlEncode(aDiff.text, new UTF8Encoding()).Replace('+', ' ')).Append("\n"); } return diff_match_patch.unescapeForEncodeUriCompatability( text.ToString()); } } /** * Class containing the diff, match and patch methods. * Also Contains the behaviour settings. */ public class diff_match_patch { // Defaults. // Set these on your diff_match_patch instance to override the defaults. // Number of seconds to map a diff before giving up (0 for infinity). public float Diff_Timeout = 1.0f; // Cost of an empty edit operation in terms of edit characters. public short Diff_EditCost = 4; // At what point is no match declared (0.0 = perfection, 1.0 = very loose). public float Match_Threshold = 0.5f; // How far to search for a match (0 = exact location, 1000+ = broad match). // A match this many characters away from the expected location will add // 1.0 to the score (0.0 is a perfect match). public int Match_Distance = 1000; // When deleting a large block of text (over ~64 characters), how close // do the contents have to be to match the expected contents. (0.0 = // perfection, 1.0 = very loose). Note that Match_Threshold controls // how closely the end points of a delete need to match. public float Patch_DeleteThreshold = 0.5f; // Chunk size for context length. public short Patch_Margin = 4; // The number of bits in an int. private short Match_MaxBits = 32; // DIFF FUNCTIONS /** * Find the differences between two texts. * Run a faster, slightly less optimal diff. * This method allows the 'checklines' of diff_main() to be optional. * Most of the time checklines is wanted, so default to true. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @return List of Diff objects. */ public List diff_main(string text1, string text2) { return diff_main(text1, text2, true); } /** * Find the differences between two texts. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff. * @return List of Diff objects. */ public List diff_main(string text1, string text2, bool checklines) { // Set a deadline by which time the diff must be complete. DateTime deadline; if (this.Diff_Timeout diff_main(string text1, string text2, bool checklines, DateTime deadline) { // Check for null inputs not needed since null can't be passed in C#. // Check for equality (speedup). List diffs; if (text1 == text2) { diffs = new List(); if (text1.Length != 0) { diffs.Add(new Diff(Operation.EQUAL, text1)); } return diffs; } // Trim off common prefix (speedup). int commonlength = diff_commonPrefix(text1, text2); string commonprefix = text1.Substring(0, commonlength); text1 = text1.Substring(commonlength); text2 = text2.Substring(commonlength); // Trim off common suffix (speedup). commonlength = diff_commonSuffix(text1, text2); string commonsuffix = text1.Substring(text1.Length - commonlength); text1 = text1.Substring(0, text1.Length - commonlength); text2 = text2.Substring(0, text2.Length - commonlength); // Compute the diff on the middle block. diffs = diff_compute(text1, text2, checklines, deadline); // Restore the prefix and suffix. if (commonprefix.Length != 0) { diffs.Insert(0, (new Diff(Operation.EQUAL, commonprefix))); } if (commonsuffix.Length != 0) { diffs.Add(new Diff(Operation.EQUAL, commonsuffix)); } diff_cleanupMerge(diffs); return diffs; } /** * Find the differences between two texts. Assumes that the texts do not * have any common prefix or suffix. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param checklines Speedup flag. If false, then don't run a * line-level diff first to identify the changed areas. * If true, then run a faster slightly less optimal diff. * @param deadline Time when the diff should be complete by. * @return List of Diff objects. */ private List diff_compute(string text1, string text2, bool checklines, DateTime deadline) { List diffs = new List(); if (text1.Length == 0) { // Just add some text (speedup). diffs.Add(new Diff(Operation.INSERT, text2)); return diffs; } if (text2.Length == 0) { // Just delete some text (speedup). diffs.Add(new Diff(Operation.DELETE, text1)); return diffs; } string longtext = text1.Length > text2.Length ? text1 : text2; string shorttext = text1.Length > text2.Length ? text2 : text1; int i = longtext.IndexOf(shorttext, StringComparison.Ordinal); if (i != -1) { // Shorter text is inside the longer text (speedup). Operation op = (text1.Length > text2.Length) ? Operation.DELETE : Operation.INSERT; diffs.Add(new Diff(op, longtext.Substring(0, i))); diffs.Add(new Diff(Operation.EQUAL, shorttext)); diffs.Add(new Diff(op, longtext.Substring(i + shorttext.Length))); return diffs; } if (shorttext.Length == 1) { // Single character string. // After the previous speedup, the character can't be an equality. diffs.Add(new Diff(Operation.DELETE, text1)); diffs.Add(new Diff(Operation.INSERT, text2)); return diffs; } // Check to see if the problem can be split in two. string[] hm = diff_halfMatch(text1, text2); if (hm != null) { // A half-match was found, sort out the return data. string text1_a = hm[0]; string text1_b = hm[1]; string text2_a = hm[2]; string text2_b = hm[3]; string mid_common = hm[4]; // Send both pairs off for separate processing. List diffs_a = diff_main(text1_a, text2_a, checklines, deadline); List diffs_b = diff_main(text1_b, text2_b, checklines, deadline); // Merge the results. diffs = diffs_a; diffs.Add(new Diff(Operation.EQUAL, mid_common)); diffs.AddRange(diffs_b); return diffs; } if (checklines && text1.Length > 100 && text2.Length > 100) { return diff_lineMode(text1, text2, deadline); } return diff_bisect(text1, text2, deadline); } /** * Do a quick line-level diff on both strings, then rediff the parts for * greater accuracy. * This speedup can produce non-minimal diffs. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param deadline Time when the diff should be complete by. * @return List of Diff objects. */ private List diff_lineMode(string text1, string text2, DateTime deadline) { // Scan the text on a line-by-line basis first. Object[] b = diff_linesToChars(text1, text2); text1 = (string)b[0]; text2 = (string)b[1]; List linearray = (List)b[2]; List diffs = diff_main(text1, text2, false, deadline); // Convert the diff back to original text. diff_charsToLines(diffs, linearray); // Eliminate freak matches (e.g. blank lines) diff_cleanupSemantic(diffs); // Rediff any replacement blocks, this time character-by-character. // Add a dummy entry at the end. diffs.Add(new Diff(Operation.EQUAL, string.Empty)); int pointer = 0; int count_delete = 0; int count_insert = 0; string text_delete = string.Empty; string text_insert = string.Empty; while (pointer = 1 && count_insert >= 1) { // Delete the offending records and add the merged ones. diffs.RemoveRange(pointer - count_delete - count_insert, count_delete + count_insert); pointer = pointer - count_delete - count_insert; List a = this.diff_main(text_delete, text_insert, false, deadline); diffs.InsertRange(pointer, a); pointer = pointer + a.Count; } count_insert = 0; count_delete = 0; text_delete = string.Empty; text_insert = string.Empty; break; } pointer++; } diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. return diffs; } /** * Find the 'middle snake' of a diff, split the problem in two * and return the recursively constructed diff. * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param deadline Time at which to bail if not yet complete. * @return List of Diff objects. */ protected List diff_bisect(string text1, string text2, DateTime deadline) { // Cache the text lengths to prevent multiple calls. int text1_length = text1.Length; int text2_length = text2.Length; int max_d = (text1_length + text2_length + 1) / 2; int v_offset = max_d; int v_length = 2 * max_d; int[] v1 = new int[v_length]; int[] v2 = new int[v_length]; for (int x = 0; x deadline) { break; } // Walk the front path one step. for (int k1 = -d + k1start; k1 text1_length) { // Ran off the right of the graph. k1end += 2; } else if (y1 > text2_length) { // Ran off the bottom of the graph. k1start += 2; } else if (front) { int k2_offset = v_offset + delta - k1; if (k2_offset >= 0 && k2_offset = x2) { // Overlap detected. return diff_bisectSplit(text1, text2, x1, y1, deadline); } } } } // Walk the reverse path one step. for (int k2 = -d + k2start; k2 text1_length) { // Ran off the left of the graph. k2end += 2; } else if (y2 > text2_length) { // Ran off the top of the graph. k2start += 2; } else if (!front) { int k1_offset = v_offset + delta - k2; if (k1_offset >= 0 && k1_offset = x2) { // Overlap detected. return diff_bisectSplit(text1, text2, x1, y1, deadline); } } } } } // Diff took too long and hit the deadline or // number of diffs equals number of characters, no commonality at all. List diffs = new List(); diffs.Add(new Diff(Operation.DELETE, text1)); diffs.Add(new Diff(Operation.INSERT, text2)); return diffs; } /** * Given the location of the 'middle snake', split the diff in two parts * and recurse. * @param text1 Old string to be diffed. * @param text2 New string to be diffed. * @param x Index of split point in text1. * @param y Index of split point in text2. * @param deadline Time at which to bail if not yet complete. * @return LinkedList of Diff objects. */ private List diff_bisectSplit(string text1, string text2, int x, int y, DateTime deadline) { string text1a = text1.Substring(0, x); string text2a = text2.Substring(0, y); string text1b = text1.Substring(x); string text2b = text2.Substring(y); // Compute both diffs serially. List diffs = diff_main(text1a, text2a, false, deadline); List diffsb = diff_main(text1b, text2b, false, deadline); diffs.AddRange(diffsb); return diffs; } /** * Split two texts into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text1 First string. * @param text2 Second string. * @return Three element Object array, containing the encoded text1, the * encoded text2 and the List of unique strings. The zeroth element * of the List of unique strings is intentionally blank. */ protected Object[] diff_linesToChars(string text1, string text2) { List lineArray = new List(); Dictionary lineHash = new Dictionary(); // e.g. linearray[4] == "Hello\n" // e.g. linehash.get("Hello\n") == 4 // "\x00" is a valid character, but various debuggers don't like it. // So we'll insert a junk entry to avoid generating a null character. lineArray.Add(string.Empty); string chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); string chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); return new Object[] { chars1, chars2, lineArray }; } /** * Split a text into a list of strings. Reduce the texts to a string of * hashes where each Unicode character represents one line. * @param text String to encode. * @param lineArray List of unique strings. * @param lineHash Map of strings to indices. * @return Encoded string. */ private string diff_linesToCharsMunge(string text, List lineArray, Dictionary lineHash) { int lineStart = 0; int lineEnd = -1; string line; StringBuilder chars = new StringBuilder(); // Walk the text, pulling out a Substring for each line. // text.split('\n') would would temporarily double our memory footprint. // Modifying text would create many large strings to garbage collect. while (lineEnd diffs, List lineArray) { StringBuilder text; foreach (Diff diff in diffs) { text = new StringBuilder(); for (int y = 0; y text2_length) { text1 = text1.Substring(text1_length - text2_length); } else if (text1_length text2.Length ? text1 : text2; string shorttext = text1.Length > text2.Length ? text2 : text1; if (longtext.Length hm2[4].Length ? hm1 : hm2; } // A half-match was found, sort out the return data. if (text1.Length > text2.Length) { return hm; //return new string[]{hm[0], hm[1], hm[2], hm[3], hm[4]}; } else { return new string[] { hm[2], hm[3], hm[0], hm[1], hm[4] }; } } /** * Does a Substring of shorttext exist within longtext such that the * Substring is at least half the length of longtext? * @param longtext Longer string. * @param shorttext Shorter string. * @param i Start index of quarter length Substring within longtext. * @return Five element string array, containing the prefix of longtext, the * suffix of longtext, the prefix of shorttext, the suffix of shorttext * and the common middle. Or null if there was no match. */ private string[] diff_halfMatchI(string longtext, string shorttext, int i) { // Start with a 1/4 length Substring at position i as a seed. string seed = longtext.Substring(i, longtext.Length / 4); int j = -1; string best_common = string.Empty; string best_longtext_a = string.Empty, best_longtext_b = string.Empty; string best_shorttext_a = string.Empty, best_shorttext_b = string.Empty; while (j = longtext.Length) { return new string[]{best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common}; } else { return null; } } /** * Reduce the number of edits by eliminating semantically trivial * equalities. * @param diffs List of Diff objects. */ public void diff_cleanupSemantic(List diffs) { bool changes = false; // Stack of indices where equalities are found. Stack equalities = new Stack(); // Always equal to equalities[equalitiesLength-1][1] string lastequality = null; int pointer = 0; // Index of current position. // Number of characters that changed prior to the equality. int length_insertions1 = 0; int length_deletions1 = 0; // Number of characters that changed after the equality. int length_insertions2 = 0; int length_deletions2 = 0; while (pointer 0) { equalities.Pop(); } pointer = equalities.Count > 0 ? equalities.Peek() : -1; length_insertions1 = 0; // Reset the counters. length_deletions1 = 0; length_insertions2 = 0; length_deletions2 = 0; lastequality = null; changes = true; } } pointer++; } // Normalize the diff. if (changes) { diff_cleanupMerge(diffs); } diff_cleanupSemanticLossless(diffs); // Find any overlaps between deletions and insertions. // e.g: abcxxxxxxdef // -> abcxxxdef // e.g: xxxabcdefxxx // -> defxxxabc // Only extract an overlap if it is as big as the edit ahead or behind it. pointer = 1; while (pointer = overlap_length2) { if (overlap_length1 >= deletion.Length / 2.0 || overlap_length1 >= insertion.Length / 2.0) { // Overlap found. // Insert an equality and trim the surrounding edits. diffs.Insert(pointer, new Diff(Operation.EQUAL, insertion.Substring(0, overlap_length1))); diffs[pointer - 1].text = deletion.Substring(0, deletion.Length - overlap_length1); diffs[pointer + 1].text = insertion.Substring(overlap_length1); pointer++; } } else { if (overlap_length2 >= deletion.Length / 2.0 || overlap_length2 >= insertion.Length / 2.0) { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. diffs.Insert(pointer, new Diff(Operation.EQUAL, deletion.Substring(0, overlap_length2))); diffs[pointer - 1].operation = Operation.INSERT; diffs[pointer - 1].text = insertion.Substring(0, insertion.Length - overlap_length2); diffs[pointer + 1].operation = Operation.DELETE; diffs[pointer + 1].text = deletion.Substring(overlap_length2); pointer++; } } pointer++; } pointer++; } } /** * Look for single edits surrounded on both sides by equalities * which can be shifted sideways to align the edit to a word boundary. * e.g: The cat came. -> The cat came. * @param diffs List of Diff objects. */ public void diff_cleanupSemanticLossless(List diffs) { int pointer = 1; // Intentionally ignore the first and last element (don't need checking). while (pointer 0) { string commonString = edit.Substring(edit.Length - commonOffset); equality1 = equality1.Substring(0, equality1.Length - commonOffset); edit = commonString + edit.Substring(0, edit.Length - commonOffset); equality2 = commonString + equality2; } // Second, step character by character right, // looking for the best fit. string bestEquality1 = equality1; string bestEdit = edit; string bestEquality2 = equality2; int bestScore = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2); while (edit.Length != 0 && equality2.Length != 0 && edit[0] == equality2[0]) { equality1 += edit[0]; edit = edit.Substring(1) + equality2[0]; equality2 = equality2.Substring(1); int score = diff_cleanupSemanticScore(equality1, edit) + diff_cleanupSemanticScore(edit, equality2); // The >= encourages trailing rather than leading whitespace on // edits. if (score >= bestScore) { bestScore = score; bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; } } if (diffs[pointer - 1].text != bestEquality1) { // We have an improvement, save it back to the diff. if (bestEquality1.Length != 0) { diffs[pointer - 1].text = bestEquality1; } else { diffs.RemoveAt(pointer - 1); pointer--; } diffs[pointer].text = bestEdit; if (bestEquality2.Length != 0) { diffs[pointer + 1].text = bestEquality2; } else { diffs.RemoveAt(pointer + 1); pointer--; } } } pointer++; } } /** * Given two strings, comAdde a score representing whether the internal * boundary falls on logical boundaries. * Scores range from 6 (best) to 0 (worst). * @param one First string. * @param two Second string. * @return The score. */ private int diff_cleanupSemanticScore(string one, string two) { if (one.Length == 0 || two.Length == 0) { // Edges are the best. return 6; } // Each port of this function behaves slightly differently due to // subtle differences in each language's definition of things like // 'whitespace'. Since this function's purpose is largely cosmetic, // the choice has been made to use each language's native features // rather than force total conformity. char char1 = one[one.Length - 1]; char char2 = two[0]; bool nonAlphaNumeric1 = !Char.IsLetterOrDigit(char1); bool nonAlphaNumeric2 = !Char.IsLetterOrDigit(char2); bool whitespace1 = nonAlphaNumeric1 && Char.IsWhiteSpace(char1); bool whitespace2 = nonAlphaNumeric2 && Char.IsWhiteSpace(char2); bool lineBreak1 = whitespace1 && Char.IsControl(char1); bool lineBreak2 = whitespace2 && Char.IsControl(char2); bool blankLine1 = lineBreak1 && BLANKLINEEND.IsMatch(one); bool blankLine2 = lineBreak2 && BLANKLINESTART.IsMatch(two); if (blankLine1 || blankLine2) { // Five points for blank lines. return 5; } else if (lineBreak1 || lineBreak2) { // Four points for line breaks. return 4; } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { // Three points for end of sentences. return 3; } else if (whitespace1 || whitespace2) { // Two points for whitespace. return 2; } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { // One point for non-alphanumeric. return 1; } return 0; } // Define some regex patterns for matching boundaries. private Regex BLANKLINEEND = new Regex("\\n\\r?\\n\\Z"); private Regex BLANKLINESTART = new Regex("\\A\\r?\\n\\r?\\n"); /** * Reduce the number of edits by eliminating operationally trivial * equalities. * @param diffs List of Diff objects. */ public void diff_cleanupEfficiency(List diffs) { bool changes = false; // Stack of indices where equalities are found. Stack equalities = new Stack(); // Always equal to equalities[equalitiesLength-1][1] string lastequality = string.Empty; int pointer = 0; // Index of current position. // Is there an insertion operation before the last equality. bool pre_ins = false; // Is there a deletion operation before the last equality. bool pre_del = false; // Is there an insertion operation after the last equality. bool post_ins = false; // Is there a deletion operation after the last equality. bool post_del = false; while (pointer ABXYCD * AXCD * ABXC * AXCD * ABXC */ if ((lastequality.Length != 0) && ((pre_ins && pre_del && post_ins && post_del) || ((lastequality.Length 0) { equalities.Pop(); } pointer = equalities.Count > 0 ? equalities.Peek() : -1; post_ins = post_del = false; } changes = true; } } pointer++; } if (changes) { diff_cleanupMerge(diffs); } } /** * Reorder and merge like edit sections. Merge equalities. * Any edit section can move as long as it doesn't cross an equality. * @param diffs List of Diff objects. */ public void diff_cleanupMerge(List diffs) { // Add a dummy entry at the end. diffs.Add(new Diff(Operation.EQUAL, string.Empty)); int pointer = 0; int count_delete = 0; int count_insert = 0; string text_delete = string.Empty; string text_insert = string.Empty; int commonlength; while (pointer 1) { if (count_delete != 0 && count_insert != 0) { // Factor out any common prefixies. commonlength = this.diff_commonPrefix(text_insert, text_delete); if (commonlength != 0) { if ((pointer - count_delete - count_insert) > 0 && diffs[pointer - count_delete - count_insert - 1].operation == Operation.EQUAL) { diffs[pointer - count_delete - count_insert - 1].text += text_insert.Substring(0, commonlength); } else { diffs.Insert(0, new Diff(Operation.EQUAL, text_insert.Substring(0, commonlength))); pointer++; } text_insert = text_insert.Substring(commonlength); text_delete = text_delete.Substring(commonlength); } // Factor out any common suffixies. commonlength = this.diff_commonSuffix(text_insert, text_delete); if (commonlength != 0) { diffs[pointer].text = text_insert.Substring(text_insert.Length - commonlength) + diffs[pointer].text; text_insert = text_insert.Substring(0, text_insert.Length - commonlength); text_delete = text_delete.Substring(0, text_delete.Length - commonlength); } } // Delete the offending records and add the merged ones. if (count_delete == 0) { diffs.Splice(pointer - count_insert, count_delete + count_insert, new Diff(Operation.INSERT, text_insert)); } else if (count_insert == 0) { diffs.Splice(pointer - count_delete, count_delete + count_insert, new Diff(Operation.DELETE, text_delete)); } else { diffs.Splice(pointer - count_delete - count_insert, count_delete + count_insert, new Diff(Operation.DELETE, text_delete), new Diff(Operation.INSERT, text_insert)); } pointer = pointer - count_delete - count_insert + (count_delete != 0 ? 1 : 0) + (count_insert != 0 ? 1 : 0) + 1; } else if (pointer != 0 && diffs[pointer - 1].operation == Operation.EQUAL) { // Merge this equality with the previous one. diffs[pointer - 1].text += diffs[pointer].text; diffs.RemoveAt(pointer); } else { pointer++; } count_insert = 0; count_delete = 0; text_delete = string.Empty; text_insert = string.Empty; break; } } if (diffs[diffs.Count - 1].text.Length == 0) { diffs.RemoveAt(diffs.Count - 1); // Remove the dummy entry at the end. } // Second pass: look for single edits surrounded on both sides by // equalities which can be shifted sideways to eliminate an equality. // e.g: ABAC -> ABAC bool changes = false; pointer = 1; // Intentionally ignore the first and last element (don't need checking). while (pointer 1, 5->8 * @param diffs List of Diff objects. * @param loc Location within text1. * @return Location within text2. */ public int diff_xIndex(List diffs, int loc) { int chars1 = 0; int chars2 = 0; int last_chars1 = 0; int last_chars2 = 0; Diff lastDiff = null; foreach (Diff aDiff in diffs) { if (aDiff.operation != Operation.INSERT) { // Equality or deletion. chars1 += aDiff.text.Length; } if (aDiff.operation != Operation.DELETE) { // Equality or insertion. chars2 += aDiff.text.Length; } if (chars1 > loc) { // Overshot the location. lastDiff = aDiff; break; } last_chars1 = chars1; last_chars2 = chars2; } if (lastDiff != null && lastDiff.operation == Operation.DELETE) { // The location was deleted. return last_chars2; } // Add the remaining character length. return last_chars2 + (loc - last_chars1); } /** * Convert a Diff list into a pretty HTML report. * @param diffs List of Diff objects. * @return HTML representation. */ public string diff_prettyHtml(List diffs) { StringBuilder html = new StringBuilder(); foreach (Diff aDiff in diffs) { string text = aDiff.text.Replace("&", "&").Replace("", ">").Replace("\n", "¶
"); switch (aDiff.operation) { case Operation.INSERT: html.Append("").Append(text) .Append(""); break; case Operation.DELETE: html.Append("").Append(text) .Append(""); break; case Operation.EQUAL: html.Append("").Append(text).Append(""); break; } } return html.ToString(); } /** * Compute and return the source text (all equalities and deletions). * @param diffs List of Diff objects. * @return Source text. */ public string diff_text1(List diffs) { StringBuilder text = new StringBuilder(); foreach (Diff aDiff in diffs) { if (aDiff.operation != Operation.INSERT) { text.Append(aDiff.text); } } return text.ToString(); } /** * Compute and return the destination text (all equalities and insertions). * @param diffs List of Diff objects. * @return Destination text. */ public string diff_text2(List diffs) { StringBuilder text = new StringBuilder(); foreach (Diff aDiff in diffs) { if (aDiff.operation != Operation.DELETE) { text.Append(aDiff.text); } } return text.ToString(); } /** * Compute the Levenshtein distance; the number of inserted, deleted or * substituted characters. * @param diffs List of Diff objects. * @return Number of changes. */ public int diff_levenshtein(List diffs) { int levenshtein = 0; int insertions = 0; int deletions = 0; foreach (Diff aDiff in diffs) { switch (aDiff.operation) { case Operation.INSERT: insertions += aDiff.text.Length; break; case Operation.DELETE: deletions += aDiff.text.Length; break; case Operation.EQUAL: // A deletion and an insertion is one substitution. levenshtein += Math.Max(insertions, deletions); insertions = 0; deletions = 0; break; } } levenshtein += Math.Max(insertions, deletions); return levenshtein; } /** * Crush the diff into an encoded string which describes the operations * required to transform text1 into text2. * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. * Operations are tab-separated. Inserted text is escaped using %xx * notation. * @param diffs Array of Diff objects. * @return Delta text. */ public string diff_toDelta(List diffs) { StringBuilder text = new StringBuilder(); foreach (Diff aDiff in diffs) { switch (aDiff.operation) { case Operation.INSERT: text.Append("+").Append(HttpUtility.UrlEncode(aDiff.text, new UTF8Encoding()).Replace('+', ' ')).Append("\t"); break; case Operation.DELETE: text.Append("-").Append(aDiff.text.Length).Append("\t"); break; case Operation.EQUAL: text.Append("=").Append(aDiff.text.Length).Append("\t"); break; } } string delta = text.ToString(); if (delta.Length != 0) { // Strip off trailing tab character. delta = delta.Substring(0, delta.Length - 1); delta = unescapeForEncodeUriCompatability(delta); } return delta; } /** * Given the original text1, and an encoded string which describes the * operations required to transform text1 into text2, comAdde the full diff. * @param text1 Source string for the diff. * @param delta Delta text. * @return Array of Diff objects or null if invalid. * @throws ArgumentException If invalid input. */ public List diff_fromDelta(string text1, string delta) { List diffs = new List(); int pointer = 0; // Cursor in text1 string[] tokens = delta.Split(new string[] { "\t" }, StringSplitOptions.None); foreach (string token in tokens) { if (token.Length == 0) { // Blank tokens are ok (from a trailing \t). continue; } // Each token begins with a one character parameter which specifies the // operation of this token (delete, insert, equality). string param = token.Substring(1); switch (token[0]) { case '+': // decode would change all "+" to " " param = param.Replace("+", "%2b"); param = HttpUtility.UrlDecode(param, new UTF8Encoding(false, true)); //} catch (UnsupportedEncodingException e) { // // Not likely on modern system. // throw new Error("This system does not support UTF-8.", e); //} catch (IllegalArgumentException e) { // // Malformed URI sequence. // throw new IllegalArgumentException( // "Illegal escape in diff_fromDelta: " + param, e); //} diffs.Add(new Diff(Operation.INSERT, param)); break; case '-': // Fall through. case '=': int n; try { n = Convert.ToInt32(param); } catch (FormatException e) { throw new ArgumentException( "Invalid number in diff_fromDelta: " + param, e); } if (n s = match_alphabet(pattern); // Highest score beyond which we give up. double score_threshold = Match_Threshold; // Is there a nearby exact match? (speedup) int best_loc = text.IndexOf(pattern, loc, StringComparison.Ordinal); if (best_loc != -1) { score_threshold = Math.Min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); // What about in the other direction? (speedup) best_loc = text.LastIndexOf(pattern, Math.Min(loc + pattern.Length, text.Length), StringComparison.Ordinal); if (best_loc != -1) { score_threshold = Math.Min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); } } // Initialise the bit arrays. int matchmask = 1 = start; j--) { int charMatch; if (text.Length loc) { // When passing loc, don't exceed our current distance from loc. start = Math.Max(1, 2 * loc - best_loc); } else { // Already passed loc, downhill from here on in. break; } } } } if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { // No hope for a (better) match at greater error levels. break; } last_rd = rd; } return best_loc; } /** * Compute and return the score for a match with e errors and x location. * @param e Number of errors in match. * @param x Location of match. * @param loc Expected location of match. * @param pattern Pattern being sought. * @return Overall score for match (0.0 = good, 1.0 = bad). */ private double match_bitapScore(int e, int x, int loc, string pattern) { float accuracy = (float)e / pattern.Length; int proximity = Math.Abs(loc - x); if (Match_Distance == 0) { // Dodge divide by zero error. return proximity == 0 ? accuracy : 1.0; } return accuracy + (proximity / (float) Match_Distance); } /** * Initialise the alphabet for the Bitap algorithm. * @param pattern The text to encode. * @return Hash of character locations. */ protected Dictionary match_alphabet(string pattern) { Dictionary s = new Dictionary(); char[] char_pattern = pattern.ToCharArray(); foreach (char c in char_pattern) { if (!s.ContainsKey(c)) { s.Add(c, 0); } } int i = 0; foreach (char c in char_pattern) { int value = s[c] | (1 patch_make(string text1, string text2) { // Check for null inputs not needed since null can't be passed in C#. // No diffs provided, comAdde our own. List diffs = diff_main(text1, text2, true); if (diffs.Count > 2) { diff_cleanupSemantic(diffs); diff_cleanupEfficiency(diffs); } return patch_make(text1, diffs); } /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of Diff objects for text1 to text2. * @return List of Patch objects. */ public List

patch_make(List diffs) { // Check for null inputs not needed since null can't be passed in C#. // No origin string provided, comAdde our own. string text1 = diff_text1(diffs); return patch_make(text1, diffs); } /** * Compute a list of patches to turn text1 into text2. * text2 is ignored, diffs are the delta between text1 and text2. * @param text1 Old text * @param text2 Ignored. * @param diffs Array of Diff objects for text1 to text2. * @return List of Patch objects. * @deprecated Prefer patch_make(string text1, List diffs). */ public List
patch_make(string text1, string text2, List diffs) { return patch_make(text1, diffs); } /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of Diff objects for text1 to text2. * @return List of Patch objects. */ public List patch_make(string text1, List diffs) { // Check for null inputs not needed since null can't be passed in C#. List patches = new List(); if (diffs.Count == 0) { return patches; // Get rid of the null case. } Patch patch = new Patch(); int char_count1 = 0; // Number of characters into the text1 string. int char_count2 = 0; // Number of characters into the text2 string. // Start with text1 (prepatch_text) and apply the diffs until we arrive at // text2 (postpatch_text). We recreate the patches one by one to determine // context info. string prepatch_text = text1; string postpatch_text = text1; foreach (Diff aDiff in diffs) { if (patch.diffs.Count == 0 && aDiff.operation != Operation.EQUAL) { // A new patch starts here. patch.start1 = char_count1; patch.start2 = char_count2; } switch (aDiff.operation) { case Operation.INSERT: patch.diffs.Add(aDiff); patch.length2 += aDiff.text.Length; postpatch_text = postpatch_text.Insert(char_count2, aDiff.text); break; case Operation.DELETE: patch.length1 += aDiff.text.Length; patch.diffs.Add(aDiff); postpatch_text = postpatch_text.Remove(char_count2, aDiff.text.Length); break; case Operation.EQUAL: if (aDiff.text.Length = 2 * Patch_Margin) { // Time for a new patch. if (patch.diffs.Count != 0) { patch_addContext(patch, prepatch_text); patches.Add(patch); patch = new Patch(); // Unlike Unidiff, our patch lists have a rolling context. // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff // Update prepatch text & pos to reflect the application of the // just completed patch. prepatch_text = postpatch_text; char_count1 = char_count2; } } break; } // Update the current character count. if (aDiff.operation != Operation.INSERT) { char_count1 += aDiff.text.Length; } if (aDiff.operation != Operation.DELETE) { char_count2 += aDiff.text.Length; } } // Pick up the leftover patch if not empty. if (patch.diffs.Count != 0) { patch_addContext(patch, prepatch_text); patches.Add(patch); } return patches; } /** * Given an array of patches, return another array that is identical. * @param patches Array of Patch objects. * @return Array of Patch objects. */ public List patch_deepCopy(List patches) { List patchesCopy = new List(); foreach (Patch aPatch in patches) { Patch patchCopy = new Patch(); foreach (Diff aDiff in aPatch.diffs) { Diff diffCopy = new Diff(aDiff.operation, aDiff.text); patchCopy.diffs.Add(diffCopy); } patchCopy.start1 = aPatch.start1; patchCopy.start2 = aPatch.start2; patchCopy.length1 = aPatch.length1; patchCopy.length2 = aPatch.length2; patchesCopy.Add(patchCopy); } return patchesCopy; } /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of Patch objects * @param text Old text. * @return Two element Object array, containing the new text and an array of * bool values. */ public Object[] patch_apply(List patches, string text) { if (patches.Count == 0) { return new Object[] { text, new bool[0] }; } // Deep copy the patches so that no changes are made to originals. patches = patch_deepCopy(patches); string nullPadding = this.patch_addPadding(patches); text = nullPadding + text + nullPadding; patch_splitMax(patches); int x = 0; // delta keeps track of the offset between the expected and actual // location of the previous patch. If there are patches expected at // positions 10 and 20, but the first patch was found at 12, delta is 2 // and the second patch has an effective expected position of 22. int delta = 0; bool[] results = new bool[patches.Count]; foreach (Patch aPatch in patches) { int expected_loc = aPatch.start2 + delta; string text1 = diff_text1(aPatch.diffs); int start_loc; int end_loc = -1; if (text1.Length > this.Match_MaxBits) { // patch_splitMax will only provide an oversized pattern // in the case of a monster delete. start_loc = match_main(text, text1.Substring(0, this.Match_MaxBits), expected_loc); if (start_loc != -1) { end_loc = match_main(text, text1.Substring(text1.Length - this.Match_MaxBits), expected_loc + text1.Length - this.Match_MaxBits); if (end_loc == -1 || start_loc >= end_loc) { // Can't find valid trailing context. Drop this patch. start_loc = -1; } } } else { start_loc = this.match_main(text, text1, expected_loc); } if (start_loc == -1) { // No match found. :( results[x] = false; // Subtract the delta for this failed patch from subsequent patches. delta -= aPatch.length2 - aPatch.length1; } else { // Found a match. :) results[x] = true; delta = start_loc - expected_loc; string text2; if (end_loc == -1) { text2 = text.JavaSubstring(start_loc, Math.Min(start_loc + text1.Length, text.Length)); } else { text2 = text.JavaSubstring(start_loc, Math.Min(end_loc + this.Match_MaxBits, text.Length)); } if (text1 == text2) { // Perfect match, just shove the Replacement text in. text = text.Substring(0, start_loc) + diff_text2(aPatch.diffs) + text.Substring(start_loc + text1.Length); } else { // Imperfect match. Run a diff to get a framework of equivalent // indices. List diffs = diff_main(text1, text2, false); if (text1.Length > this.Match_MaxBits && this.diff_levenshtein(diffs) / (float) text1.Length > this.Patch_DeleteThreshold) { // The end points match, but the content is unacceptably bad. results[x] = false; } else { diff_cleanupSemanticLossless(diffs); int index1 = 0; foreach (Diff aDiff in aPatch.diffs) { if (aDiff.operation != Operation.EQUAL) { int index2 = diff_xIndex(diffs, index1); if (aDiff.operation == Operation.INSERT) { // Insertion text = text.Insert(start_loc + index2, aDiff.text); } else if (aDiff.operation == Operation.DELETE) { // Deletion text = text.Remove(start_loc + index2, diff_xIndex(diffs, index1 + aDiff.text.Length) - index2); } } if (aDiff.operation != Operation.DELETE) { index1 += aDiff.text.Length; } } } } } x++; } // Strip the padding off. text = text.Substring(nullPadding.Length, text.Length - 2 * nullPadding.Length); return new Object[] { text, results }; } /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of Patch objects. * @return The padding string added to each side. */ public string patch_addPadding(List patches) { short paddingLength = this.Patch_Margin; string nullPadding = string.Empty; for (short x = 1; x diffs = patch.diffs; if (diffs.Count == 0 || diffs.First().operation != Operation.EQUAL) { // Add nullPadding equality. diffs.Insert(0, new Diff(Operation.EQUAL, nullPadding)); patch.start1 -= paddingLength; // Should be 0. patch.start2 -= paddingLength; // Should be 0. patch.length1 += paddingLength; patch.length2 += paddingLength; } else if (paddingLength > diffs.First().text.Length) { // Grow first equality. Diff firstDiff = diffs.First(); int extraLength = paddingLength - firstDiff.text.Length; firstDiff.text = nullPadding.Substring(firstDiff.text.Length) + firstDiff.text; patch.start1 -= extraLength; patch.start2 -= extraLength; patch.length1 += extraLength; patch.length2 += extraLength; } // Add some padding on end of last diff. patch = patches.Last(); diffs = patch.diffs; if (diffs.Count == 0 || diffs.Last().operation != Operation.EQUAL) { // Add nullPadding equality. diffs.Add(new Diff(Operation.EQUAL, nullPadding)); patch.length1 += paddingLength; patch.length2 += paddingLength; } else if (paddingLength > diffs.Last().text.Length) { // Grow last equality. Diff lastDiff = diffs.Last(); int extraLength = paddingLength - lastDiff.text.Length; lastDiff.text += nullPadding.Substring(0, extraLength); patch.length1 += extraLength; patch.length2 += extraLength; } return nullPadding; } /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches List of Patch objects. */ public void patch_splitMax(List patches) { short patch_size = this.Match_MaxBits; for (int x = 0; x 2 * patch_size) { // This is a large deletion. Let it pass in one chunk. patch.length1 += diff_text.Length; start1 += diff_text.Length; empty = false; patch.diffs.Add(new Diff(diff_type, diff_text)); bigpatch.diffs.RemoveAt(0); } else { // Deletion or equality. Only take as much as we can stomach. diff_text = diff_text.Substring(0, Math.Min(diff_text.Length, patch_size - patch.length1 - Patch_Margin)); patch.length1 += diff_text.Length; start1 += diff_text.Length; if (diff_type == Operation.EQUAL) { patch.length2 += diff_text.Length; start2 += diff_text.Length; } else { empty = false; } patch.diffs.Add(new Diff(diff_type, diff_text)); if (diff_text == bigpatch.diffs[0].text) { bigpatch.diffs.RemoveAt(0); } else { bigpatch.diffs[0].text = bigpatch.diffs[0].text.Substring(diff_text.Length); } } } // Compute the head context for the next patch. precontext = this.diff_text2(patch.diffs); precontext = precontext.Substring(Math.Max(0, precontext.Length - this.Patch_Margin)); string postcontext = null; // Append the end context for this patch. if (diff_text1(bigpatch.diffs).Length > Patch_Margin) { postcontext = diff_text1(bigpatch.diffs) .Substring(0, Patch_Margin); } else { postcontext = diff_text1(bigpatch.diffs); } if (postcontext.Length != 0) { patch.length1 += postcontext.Length; patch.length2 += postcontext.Length; if (patch.diffs.Count != 0 && patch.diffs[patch.diffs.Count - 1].operation == Operation.EQUAL) { patch.diffs[patch.diffs.Count - 1].text += postcontext; } else { patch.diffs.Add(new Diff(Operation.EQUAL, postcontext)); } } if (!empty) { patches.Splice(++x, 0, patch); } } } } /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ public string patch_toText(List patches) { StringBuilder text = new StringBuilder(); foreach (Patch aPatch in patches) { text.Append(aPatch); } return text.ToString(); } /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws ArgumentException If invalid input. */ public List patch_fromText(string textline) { List patches = new List(); if (textline.Length == 0) { return patches; } string[] text = textline.Split('\n'); int textPointer = 0; Patch patch; Regex patchHeader = new Regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); Match m; char sign; string line; while (textPointer "?", "%24" -> "$", etc. * * @param str The string to escape. * @return The escaped string. */ public static string unescapeForEncodeUriCompatability(string str) { return str.Replace("%21", "!").Replace("%7e", "~") .Replace("%27", "'").Replace("%28", "(").Replace("%29", ")") .Replace("%3b", ";").Replace("%2f", "/").Replace("%3f", "?") .Replace("%3a", ":").Replace("%40", "@").Replace("%26", "&") .Replace("%3d", "=").Replace("%2b", "+").Replace("%24", "$") .Replace("%2c", ",").Replace("%23", "#"); } } }