module Diff::Display # Processes the diff and generates a Data object which contains the # resulting data structure. # # The +run+ class method is fed a diff and returns a Data object. It will # accept as its argument a String, an Array or a File object (or anything # that responds to #each): # # Diff::Display::Unified::Generator.run(diff) # class Unified::Generator # Extracts the line number info for a given diff section LINE_NUM_RE = /^@@ [+-]([0-9]+)(?:,([0-9]+))? [+-]([0-9]+)(?:,([0-9]+))? @@/ LINE_TYPES = {'+' => :add, '-' => :rem, ' ' => :unmod, '\\' => :nonewline} # Runs the generator on a diff and returns a Data object def self.run(udiff) raise ArgumentError, "Object must be enumerable" unless udiff.respond_to?(:each_line) generator = new udiff.each_line do |line| begin generator.process(line.chomp) rescue ArgumentError => e e.message =~ /^invalid byte sequence/ ? next : raise(e) end end generator.finish generator.data end def initialize @buffer = [] @line_type = nil @prev_line_type = nil @offset = [0, 0] @data = Data.new self end # Finishes up with the generation and returns the Data object (could # probably use a better name...maybe just #data?) def data @data end # This method is called once the generator is done with the unified # diff. It is a finalizer of sorts. By the time it is called all data # has been collected and processed. def finish # certain things could be set now that processing is done #identify_block end def process(line) if is_header_line?(line) push Block.header current_block << Line.header(line) return end if line =~ LINE_NUM_RE push Block.header current_block << Line.header(line) add_separator unless @offset[0].zero? @line_type = nil @offset = Array.new(2) { $3.to_i - 1 } return end @line_type, line = LINE_TYPES[car(line)], cdr(line) if @line_type == :add && @prev_line_type == :rem @offset[0] -= 1 @buffer.push current_block.pop @buffer.push line process_block(:mod, false) return end if LINE_TYPES.values.include?(@line_type) @buffer.push(line.to_s) process_block(@line_type, true) end end protected def is_header_line?(line) return true if ['+++ ', '--- '].include?(line[0,4]) return true if line =~ /^(new|delete) file mode [0-9]+$/ return true if line =~ /^diff \-\-git/ return true if line =~ /^index \w+\.\.\w+( [0-9]+)?$/i false end def process_block(diff_line_type, isnew = false) @data.pop unless isnew push Block.send(diff_line_type) current_line = @buffer.pop return unless current_line # \\ No newline at end of file if diff_line_type == :nonewline current_block << Line.nonewline('\\ No newline at end of file') return end if isnew process_line(current_line, diff_line_type) else process_lines_with_differences(@buffer.shift, current_line) raise "buffer exceeded #{@buffer.inspect}" unless @buffer.empty? end end def process_line(line, type, inline = false) case type when :add @offset[1] += 1 current_block << Line.send(type, line, @offset[1], inline, @offset.dup) when :rem @offset[0] += 1 current_block << Line.send(type, line, @offset[0], inline, @offset.dup) # when :rmod # @offset[0] += 1 # @offset[1] += 1 # TODO: is that really correct? # current_block << Line.send(@prev_line_type, line, @offset[0]) when :unmod @offset[0] += 1 @offset[1] += 1 current_block << Line.send(type, line, *@offset) end @prev_line_type = type end # TODO Needs a better name...it does process a line (two in fact) but # its primary function is to add a Rem and an Add pair which # potentially have inline changes def process_lines_with_differences(oldline, newline) start, ending = get_change_extent(oldline, newline) if start.zero? && ending.zero? process_line(oldline, :rem, false) # - process_line(newline, :add, false) # + else # - line = inline_diff(oldline, start, ending) process_line(line, :rem, true) # + line = inline_diff(newline, start, ending) process_line(line, :add, true) end end # Inserts string formating characters around the section of a string # that differs internally from another line so that the Line class # can insert the desired formating def inline_diff(line, start, ending) if start != 0 || ending != 0 last = ending + line.length str = line[0...start] + '\0' + line[start...last] + '\1' + line[last...line.length] end str || line end def add_separator push SepBlock.new current_block << SepLine.new end def car(line) line[0,1] end def cdr(line) line[1..-1] end # Returns the current Block object def current_block @data.last end # Adds a Line object onto the current Block object def push(line) @data.push line end # Determines the extent of differences between two string. Returns # an array containing the offset at which changes start, and then # negative offset at which the chnages end. If the two strings have # neither a common prefix nor a common suffic, [0, 0] is returned. def get_change_extent(str1, str2) start = 0 limit = [str1.size, str2.size].sort.first while start < limit and str1[start, 1] == str2[start, 1] start += 1 end ending = -1 limit -= start while -ending <= limit and str1[ending, 1] == str2[ending, 1] ending -= 1 end return [start, ending + 1] end end end