| # bdiff.py - Python implementation of bdiff.c |
| # |
| # Copyright 2009 Matt Mackall <mpm@selenic.com> and others |
| # |
| # This software may be used and distributed according to the terms of the |
| # GNU General Public License version 2 or any later version. |
| |
| import struct, difflib, re |
| |
| def splitnewlines(text): |
| '''like str.splitlines, but only split on newlines.''' |
| lines = [l + '\n' for l in text.split('\n')] |
| if lines: |
| if lines[-1] == '\n': |
| lines.pop() |
| else: |
| lines[-1] = lines[-1][:-1] |
| return lines |
| |
| def _normalizeblocks(a, b, blocks): |
| prev = None |
| r = [] |
| for curr in blocks: |
| if prev is None: |
| prev = curr |
| continue |
| shift = 0 |
| |
| a1, b1, l1 = prev |
| a1end = a1 + l1 |
| b1end = b1 + l1 |
| |
| a2, b2, l2 = curr |
| a2end = a2 + l2 |
| b2end = b2 + l2 |
| if a1end == a2: |
| while (a1end + shift < a2end and |
| a[a1end + shift] == b[b1end + shift]): |
| shift += 1 |
| elif b1end == b2: |
| while (b1end + shift < b2end and |
| a[a1end + shift] == b[b1end + shift]): |
| shift += 1 |
| r.append((a1, b1, l1 + shift)) |
| prev = a2 + shift, b2 + shift, l2 - shift |
| r.append(prev) |
| return r |
| |
| def bdiff(a, b): |
| a = str(a).splitlines(True) |
| b = str(b).splitlines(True) |
| |
| if not a: |
| s = "".join(b) |
| return s and (struct.pack(">lll", 0, 0, len(s)) + s) |
| |
| bin = [] |
| p = [0] |
| for i in a: p.append(p[-1] + len(i)) |
| |
| d = difflib.SequenceMatcher(None, a, b).get_matching_blocks() |
| d = _normalizeblocks(a, b, d) |
| la = 0 |
| lb = 0 |
| for am, bm, size in d: |
| s = "".join(b[lb:bm]) |
| if am > la or s: |
| bin.append(struct.pack(">lll", p[la], p[am], len(s)) + s) |
| la = am + size |
| lb = bm + size |
| |
| return "".join(bin) |
| |
| def blocks(a, b): |
| an = splitnewlines(a) |
| bn = splitnewlines(b) |
| d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks() |
| d = _normalizeblocks(an, bn, d) |
| return [(i, i + n, j, j + n) for (i, j, n) in d] |
| |
| def fixws(text, allws): |
| if allws: |
| text = re.sub('[ \t\r]+', '', text) |
| else: |
| text = re.sub('[ \t\r]+', ' ', text) |
| text = text.replace(' \n', '\n') |
| return text |