module HTMLDiff Match = Struct.new(:start_in_old, :start_in_new, :size) class Match def end_in_old self.start_in_old + self.size end def end_in_new self.start_in_new + self.size end end Operation = Struct.new(:action, :start_in_old, :end_in_old, :start_in_new, :end_in_new) class DiffBuilder def initialize(old_version, new_version) @old_version, @new_version = old_version, new_version split_inputs_to_words index_new_words end def split_inputs_to_words @old_words = explode(@old_version) @new_words = explode(@new_version) end def index_new_words @word_indices = Hash.new { |h, word| h[word] = [] } @new_words.each_with_index { |word, i| @word_indices[word] << i } end def operations position_in_old = position_in_new = 0 operations = [] matches = matching_blocks # an empty match at the end forces the loop below to handle the unmatched tails # I'm sure it can be done more gracefully, but not at 23:52 matches << Match.new(@old_words.length, @new_words.length, 0) matches.each_with_index do |match, i| match_starts_at_current_position_in_old = (position_in_old == match.start_in_old) match_starts_at_current_position_in_new = (position_in_new == match.start_in_new) action_upto_match_positions = case [match_starts_at_current_position_in_old, match_starts_at_current_position_in_new] when [false, false] :replace when [true, false] :insert when [false, true] :delete else # this happens if the first few words are same in both versions :none end if action_upto_match_positions != :none operation_upto_match_positions = Operation.new(action_upto_match_positions, position_in_old, match.start_in_old, position_in_new, match.start_in_new) operations << operation_upto_match_positions end if match.size != 0 match_operation = Operation.new(:equal, match.start_in_old, match.end_in_old, match.start_in_new, match.end_in_new) operations << match_operation end position_in_old = match.end_in_old position_in_new = match.end_in_new end operations end def matching_blocks matching_blocks = [] recursively_find_matching_blocks(0, @old_words.size, 0, @new_words.size, matching_blocks) matching_blocks end def recursively_find_matching_blocks(start_in_old, end_in_old, start_in_new, end_in_new, matching_blocks) match = find_match(start_in_old, end_in_old, start_in_new, end_in_new) if match if start_in_old < match.start_in_old and start_in_new < match.start_in_new recursively_find_matching_blocks( start_in_old, match.start_in_old, start_in_new, match.start_in_new, matching_blocks) end matching_blocks << match if match.end_in_old < end_in_old and match.end_in_new < end_in_new recursively_find_matching_blocks( match.end_in_old, end_in_old, match.end_in_new, end_in_new, matching_blocks) end end end def find_match(start_in_old, end_in_old, start_in_new, end_in_new) best_match_in_old = start_in_old best_match_in_new = start_in_new best_match_size = 0 match_length_at = Hash.new { |h, index| h[index] = 0 } start_in_old.upto(end_in_old - 1) do |index_in_old| new_match_length_at = Hash.new { |h, index| h[index] = 0 } @word_indices[@old_words[index_in_old]].each do |index_in_new| next if index_in_new < start_in_new break if index_in_new >= end_in_new new_match_length = match_length_at[index_in_new - 1] + 1 new_match_length_at[index_in_new] = new_match_length if new_match_length > best_match_size best_match_in_old = index_in_old - new_match_length + 1 best_match_in_new = index_in_new - new_match_length + 1 best_match_size = new_match_length end end match_length_at = new_match_length_at end # best_match_in_old, best_match_in_new, best_match_size = add_matching_words_left( # best_match_in_old, best_match_in_new, best_match_size, start_in_old, start_in_new) # best_match_in_old, best_match_in_new, match_size = add_matching_words_right( # best_match_in_old, best_match_in_new, best_match_size, end_in_old, end_in_new) return (best_match_size != 0 ? Match.new(best_match_in_old, best_match_in_new, best_match_size) : nil) end def add_matching_words_left(match_in_old, match_in_new, match_size, start_in_old, start_in_new) while match_in_old > start_in_old and match_in_new > start_in_new and @old_words[match_in_old - 1] == @new_words[match_in_new - 1] match_in_old -= 1 match_in_new -= 1 match_size += 1 end [match_in_old, match_in_new, match_size] end def add_matching_words_right(match_in_old, match_in_new, match_size, end_in_old, end_in_new) while match_in_old + match_size < end_in_old and match_in_new + match_size < end_in_new and @old_words[match_in_old + match_size] == @new_words[match_in_new + match_size] match_size += 1 end [match_in_old, match_in_new, match_size] end def explode(sequence) sequence.is_a?(String) ? sequence.split(//) : sequence end end # of class Diff Builder end