| # mdiff.py - diff and patch routines for mercurial |
| # |
| # Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
| # |
| # This software may be used and distributed according to the terms of the |
| # GNU General Public License version 2 or any later version. |
| |
| from i18n import _ |
| import bdiff, mpatch, util |
| import re, struct, base85, zlib |
| |
| def splitnewlines(text): |
| '''like str.splitlines, but only split on newlines.''' |
| lines = [l + '\n' for l in text.split('\n')] |
| if lines: |
| if lines[-1] == '\n': |
| lines.pop() |
| else: |
| lines[-1] = lines[-1][:-1] |
| return lines |
| |
| class diffopts(object): |
| '''context is the number of context lines |
| text treats all files as text |
| showfunc enables diff -p output |
| git enables the git extended patch format |
| nodates removes dates from diff headers |
| ignorews ignores all whitespace changes in the diff |
| ignorewsamount ignores changes in the amount of whitespace |
| ignoreblanklines ignores changes whose lines are all blank |
| upgrade generates git diffs to avoid data loss |
| ''' |
| |
| defaults = { |
| 'context': 3, |
| 'text': False, |
| 'showfunc': False, |
| 'git': False, |
| 'nodates': False, |
| 'ignorews': False, |
| 'ignorewsamount': False, |
| 'ignoreblanklines': False, |
| 'upgrade': False, |
| } |
| |
| __slots__ = defaults.keys() |
| |
| def __init__(self, **opts): |
| for k in self.__slots__: |
| v = opts.get(k) |
| if v is None: |
| v = self.defaults[k] |
| setattr(self, k, v) |
| |
| try: |
| self.context = int(self.context) |
| except ValueError: |
| raise util.Abort(_('diff context lines count must be ' |
| 'an integer, not %r') % self.context) |
| |
| def copy(self, **kwargs): |
| opts = dict((k, getattr(self, k)) for k in self.defaults) |
| opts.update(kwargs) |
| return diffopts(**opts) |
| |
| defaultopts = diffopts() |
| |
| def wsclean(opts, text, blank=True): |
| if opts.ignorews: |
| text = bdiff.fixws(text, 1) |
| elif opts.ignorewsamount: |
| text = bdiff.fixws(text, 0) |
| if blank and opts.ignoreblanklines: |
| text = re.sub('\n+', '\n', text).strip('\n') |
| return text |
| |
| def splitblock(base1, lines1, base2, lines2, opts): |
| # The input lines matches except for interwoven blank lines. We |
| # transform it into a sequence of matching blocks and blank blocks. |
| lines1 = [(wsclean(opts, l) and 1 or 0) for l in lines1] |
| lines2 = [(wsclean(opts, l) and 1 or 0) for l in lines2] |
| s1, e1 = 0, len(lines1) |
| s2, e2 = 0, len(lines2) |
| while s1 < e1 or s2 < e2: |
| i1, i2, btype = s1, s2, '=' |
| if (i1 >= e1 or lines1[i1] == 0 |
| or i2 >= e2 or lines2[i2] == 0): |
| # Consume the block of blank lines |
| btype = '~' |
| while i1 < e1 and lines1[i1] == 0: |
| i1 += 1 |
| while i2 < e2 and lines2[i2] == 0: |
| i2 += 1 |
| else: |
| # Consume the matching lines |
| while i1 < e1 and lines1[i1] == 1 and lines2[i2] == 1: |
| i1 += 1 |
| i2 += 1 |
| yield [base1 + s1, base1 + i1, base2 + s2, base2 + i2], btype |
| s1 = i1 |
| s2 = i2 |
| |
| def allblocks(text1, text2, opts=None, lines1=None, lines2=None, refine=False): |
| """Return (block, type) tuples, where block is an mdiff.blocks |
| line entry. type is '=' for blocks matching exactly one another |
| (bdiff blocks), '!' for non-matching blocks and '~' for blocks |
| matching only after having filtered blank lines. If refine is True, |
| then '~' blocks are refined and are only made of blank lines. |
| line1 and line2 are text1 and text2 split with splitnewlines() if |
| they are already available. |
| """ |
| if opts is None: |
| opts = defaultopts |
| if opts.ignorews or opts.ignorewsamount: |
| text1 = wsclean(opts, text1, False) |
| text2 = wsclean(opts, text2, False) |
| diff = bdiff.blocks(text1, text2) |
| for i, s1 in enumerate(diff): |
| # The first match is special. |
| # we've either found a match starting at line 0 or a match later |
| # in the file. If it starts later, old and new below will both be |
| # empty and we'll continue to the next match. |
| if i > 0: |
| s = diff[i - 1] |
| else: |
| s = [0, 0, 0, 0] |
| s = [s[1], s1[0], s[3], s1[2]] |
| |
| # bdiff sometimes gives huge matches past eof, this check eats them, |
| # and deals with the special first match case described above |
| if s[0] != s[1] or s[2] != s[3]: |
| type = '!' |
| if opts.ignoreblanklines: |
| if lines1 is None: |
| lines1 = splitnewlines(text1) |
| if lines2 is None: |
| lines2 = splitnewlines(text2) |
| old = wsclean(opts, "".join(lines1[s[0]:s[1]])) |
| new = wsclean(opts, "".join(lines2[s[2]:s[3]])) |
| if old == new: |
| type = '~' |
| yield s, type |
| yield s1, '=' |
| |
| def unidiff(a, ad, b, bd, fn1, fn2, opts=defaultopts): |
| def datetag(date, fn=None): |
| if not opts.git and not opts.nodates: |
| return '\t%s\n' % date |
| if fn and ' ' in fn: |
| return '\t\n' |
| return '\n' |
| |
| if not a and not b: |
| return "" |
| epoch = util.datestr((0, 0)) |
| |
| fn1 = util.pconvert(fn1) |
| fn2 = util.pconvert(fn2) |
| |
| if not opts.text and (util.binary(a) or util.binary(b)): |
| if a and b and len(a) == len(b) and a == b: |
| return "" |
| l = ['Binary file %s has changed\n' % fn1] |
| elif not a: |
| b = splitnewlines(b) |
| if a is None: |
| l1 = '--- /dev/null%s' % datetag(epoch) |
| else: |
| l1 = "--- %s%s" % ("a/" + fn1, datetag(ad, fn1)) |
| l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd, fn2)) |
| l3 = "@@ -0,0 +1,%d @@\n" % len(b) |
| l = [l1, l2, l3] + ["+" + e for e in b] |
| elif not b: |
| a = splitnewlines(a) |
| l1 = "--- %s%s" % ("a/" + fn1, datetag(ad, fn1)) |
| if b is None: |
| l2 = '+++ /dev/null%s' % datetag(epoch) |
| else: |
| l2 = "+++ %s%s" % ("b/" + fn2, datetag(bd, fn2)) |
| l3 = "@@ -1,%d +0,0 @@\n" % len(a) |
| l = [l1, l2, l3] + ["-" + e for e in a] |
| else: |
| al = splitnewlines(a) |
| bl = splitnewlines(b) |
| l = list(_unidiff(a, b, al, bl, opts=opts)) |
| if not l: |
| return "" |
| |
| l.insert(0, "--- a/%s%s" % (fn1, datetag(ad, fn1))) |
| l.insert(1, "+++ b/%s%s" % (fn2, datetag(bd, fn2))) |
| |
| for ln in xrange(len(l)): |
| if l[ln][-1] != '\n': |
| l[ln] += "\n\ No newline at end of file\n" |
| |
| return "".join(l) |
| |
| # creates a headerless unified diff |
| # t1 and t2 are the text to be diffed |
| # l1 and l2 are the text broken up into lines |
| def _unidiff(t1, t2, l1, l2, opts=defaultopts): |
| def contextend(l, len): |
| ret = l + opts.context |
| if ret > len: |
| ret = len |
| return ret |
| |
| def contextstart(l): |
| ret = l - opts.context |
| if ret < 0: |
| return 0 |
| return ret |
| |
| lastfunc = [0, ''] |
| def yieldhunk(hunk): |
| (astart, a2, bstart, b2, delta) = hunk |
| aend = contextend(a2, len(l1)) |
| alen = aend - astart |
| blen = b2 - bstart + aend - a2 |
| |
| func = "" |
| if opts.showfunc: |
| lastpos, func = lastfunc |
| # walk backwards from the start of the context up to the start of |
| # the previous hunk context until we find a line starting with an |
| # alphanumeric char. |
| for i in xrange(astart - 1, lastpos - 1, -1): |
| if l1[i][0].isalnum(): |
| func = ' ' + l1[i].rstrip()[:40] |
| lastfunc[1] = func |
| break |
| # by recording this hunk's starting point as the next place to |
| # start looking for function lines, we avoid reading any line in |
| # the file more than once. |
| lastfunc[0] = astart |
| |
| # zero-length hunk ranges report their start line as one less |
| if alen: |
| astart += 1 |
| if blen: |
| bstart += 1 |
| |
| yield "@@ -%d,%d +%d,%d @@%s\n" % (astart, alen, |
| bstart, blen, func) |
| for x in delta: |
| yield x |
| for x in xrange(a2, aend): |
| yield ' ' + l1[x] |
| |
| # bdiff.blocks gives us the matching sequences in the files. The loop |
| # below finds the spaces between those matching sequences and translates |
| # them into diff output. |
| # |
| hunk = None |
| ignoredlines = 0 |
| for s, stype in allblocks(t1, t2, opts, l1, l2): |
| a1, a2, b1, b2 = s |
| if stype != '!': |
| if stype == '~': |
| # The diff context lines are based on t1 content. When |
| # blank lines are ignored, the new lines offsets must |
| # be adjusted as if equivalent blocks ('~') had the |
| # same sizes on both sides. |
| ignoredlines += (b2 - b1) - (a2 - a1) |
| continue |
| delta = [] |
| old = l1[a1:a2] |
| new = l2[b1:b2] |
| |
| b1 -= ignoredlines |
| b2 -= ignoredlines |
| astart = contextstart(a1) |
| bstart = contextstart(b1) |
| prev = None |
| if hunk: |
| # join with the previous hunk if it falls inside the context |
| if astart < hunk[1] + opts.context + 1: |
| prev = hunk |
| astart = hunk[1] |
| bstart = hunk[3] |
| else: |
| for x in yieldhunk(hunk): |
| yield x |
| if prev: |
| # we've joined the previous hunk, record the new ending points. |
| hunk[1] = a2 |
| hunk[3] = b2 |
| delta = hunk[4] |
| else: |
| # create a new hunk |
| hunk = [astart, a2, bstart, b2, delta] |
| |
| delta[len(delta):] = [' ' + x for x in l1[astart:a1]] |
| delta[len(delta):] = ['-' + x for x in old] |
| delta[len(delta):] = ['+' + x for x in new] |
| |
| if hunk: |
| for x in yieldhunk(hunk): |
| yield x |
| |
| def b85diff(to, tn): |
| '''print base85-encoded binary diff''' |
| def fmtline(line): |
| l = len(line) |
| if l <= 26: |
| l = chr(ord('A') + l - 1) |
| else: |
| l = chr(l - 26 + ord('a') - 1) |
| return '%c%s\n' % (l, base85.b85encode(line, True)) |
| |
| def chunk(text, csize=52): |
| l = len(text) |
| i = 0 |
| while i < l: |
| yield text[i:i + csize] |
| i += csize |
| |
| if to is None: |
| to = '' |
| if tn is None: |
| tn = '' |
| |
| if to == tn: |
| return '' |
| |
| # TODO: deltas |
| ret = [] |
| ret.append('GIT binary patch\n') |
| ret.append('literal %s\n' % len(tn)) |
| for l in chunk(zlib.compress(tn)): |
| ret.append(fmtline(l)) |
| ret.append('\n') |
| |
| return ''.join(ret) |
| |
| def patchtext(bin): |
| pos = 0 |
| t = [] |
| while pos < len(bin): |
| p1, p2, l = struct.unpack(">lll", bin[pos:pos + 12]) |
| pos += 12 |
| t.append(bin[pos:pos + l]) |
| pos += l |
| return "".join(t) |
| |
| def patch(a, bin): |
| if len(a) == 0: |
| # skip over trivial delta header |
| return util.buffer(bin, 12) |
| return mpatch.patches(a, [bin]) |
| |
| # similar to difflib.SequenceMatcher.get_matching_blocks |
| def get_matching_blocks(a, b): |
| return [(d[0], d[2], d[1] - d[0]) for d in bdiff.blocks(a, b)] |
| |
| def trivialdiffheader(length): |
| return struct.pack(">lll", 0, 0, length) |
| |
| patches = mpatch.patches |
| patchedsize = mpatch.patchedsize |
| textdiff = bdiff.bdiff |