plugins/hg4idea/testData/bin/mercurial/bdiff.c - platform/tools/idea - Git at Google

 /*
  bdiff.c - efficient binary diff extension for Mercurial

  Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>

  This software may be used and distributed according to the terms of
  the GNU General Public License, incorporated herein by reference.

  Based roughly on Python difflib
 */

 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
 #include <stdlib.h>
 #include <string.h>
 #include <limits.h>

 #include "util.h"

 struct line {
 	int hash, n, e;
 	Py_ssize_t len;
 	const char *l;
 };

 struct pos {
 	int pos, len;
 };

 struct hunk;
 struct hunk {
 	int a1, a2, b1, b2;
 	struct hunk *next;
 };

 static int splitlines(const char *a, Py_ssize_t len, struct line **lr)
 {
 	unsigned hash;
 	int i;
 	const char *p, *b = a;
 	const char * const plast = a + len - 1;
 	struct line *l;

 	/* count the lines */
 	i = 1; /* extra line for sentinel */
 	for (p = a; p < a + len; p++)
 		if (*p == '\n' || p == plast)
 			i++;

 	*lr = l = (struct line *)malloc(sizeof(struct line) * i);
 	if (!l)
 		return -1;

 	/* build the line array and calculate hashes */
 	hash = 0;
 	for (p = a; p < a + len; p++) {
 		/* Leonid Yuriev's hash */
 		hash = (hash * 1664525) + (unsigned char)*p + 1013904223;

 		if (*p == '\n' || p == plast) {
 			l->hash = hash;
 			hash = 0;
 			l->len = p - b + 1;
 			l->l = b;
 			l->n = INT_MAX;
 			l++;
 			b = p + 1;
 		}
 	}

 	/* set up a sentinel */
 	l->hash = 0;
 	l->len = 0;
 	l->l = a + len;
 	return i - 1;
 }

 static inline int cmp(struct line *a, struct line *b)
 {
 	return a->hash != b->hash || a->len != b->len || memcmp(a->l, b->l, a->len);
 }

 static int equatelines(struct line *a, int an, struct line *b, int bn)
 {
 	int i, j, buckets = 1, t, scale;
 	struct pos *h = NULL;

 	/* build a hash table of the next highest power of 2 */
 	while (buckets < bn + 1)
 		buckets *= 2;

 	/* try to allocate a large hash table to avoid collisions */
 	for (scale = 4; scale; scale /= 2) {
 		h = (struct pos *)malloc(scale * buckets * sizeof(struct pos));
 		if (h)
 			break;
 	}

 	if (!h)
 		return 0;

 	buckets = buckets * scale - 1;

 	/* clear the hash table */
 	for (i = 0; i <= buckets; i++) {
 		h[i].pos = INT_MAX;
 		h[i].len = 0;
 	}

 	/* add lines to the hash table chains */
 	for (i = bn - 1; i >= 0; i--) {
 		/* find the equivalence class */
 		for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
 		     j = (j + 1) & buckets)
 			if (!cmp(b + i, b + h[j].pos))
 				break;

 		/* add to the head of the equivalence class */
 		b[i].n = h[j].pos;
 		b[i].e = j;
 		h[j].pos = i;
 		h[j].len++; /* keep track of popularity */
 	}

 	/* compute popularity threshold */
 	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

 	/* match items in a to their equivalence class in b */
 	for (i = 0; i < an; i++) {
 		/* find the equivalence class */
 		for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
 		     j = (j + 1) & buckets)
 			if (!cmp(a + i, b + h[j].pos))
 				break;

 		a[i].e = j; /* use equivalence class for quick compare */
 		if (h[j].len <= t)
 			a[i].n = h[j].pos; /* point to head of match list */
 		else
 			a[i].n = INT_MAX; /* too popular */
 	}

 	/* discard hash tables */
 	free(h);
 	return 1;
 }

 static int longest_match(struct line *a, struct line *b, struct pos *pos,
 			 int a1, int a2, int b1, int b2, int *omi, int *omj)
 {
 	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

 	for (i = a1; i < a2; i++) {
 		/* skip things before the current block */
 		for (j = a[i].n; j < b1; j = b[j].n)
 			;

 		/* loop through all lines match a[i] in b */
 		for (; j < b2; j = b[j].n) {
 			/* does this extend an earlier match? */
 			if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
 				k = pos[j - 1].len + 1;
 			else
 				k = 1;
 			pos[j].pos = i;
 			pos[j].len = k;

 			/* best match so far? */
 			if (k > mk) {
 				mi = i;
 				mj = j;
 				mk = k;
 			}
 		}
 	}

 	if (mk) {
 		mi = mi - mk + 1;
 		mj = mj - mk + 1;
 	}

 	/* expand match to include neighboring popular lines */
 	while (mi - mb > a1 && mj - mb > b1 &&
 	       a[mi - mb - 1].e == b[mj - mb - 1].e)
 		mb++;
 	while (mi + mk < a2 && mj + mk < b2 &&
 	       a[mi + mk].e == b[mj + mk].e)
 		mk++;

 	*omi = mi - mb;
 	*omj = mj - mb;

 	return mk + mb;
 }

 static struct hunk *recurse(struct line *a, struct line *b, struct pos *pos,
 			    int a1, int a2, int b1, int b2, struct hunk *l)
 {
 	int i, j, k;

 	while (1) {
 		/* find the longest match in this chunk */
 		k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
 		if (!k)
 			return l;

 		/* and recurse on the remaining chunks on either side */
 		l = recurse(a, b, pos, a1, i, b1, j, l);
 		if (!l)
 			return NULL;

 		l->next = (struct hunk *)malloc(sizeof(struct hunk));
 		if (!l->next)
 			return NULL;

 		l = l->next;
 		l->a1 = i;
 		l->a2 = i + k;
 		l->b1 = j;
 		l->b2 = j + k;
 		l->next = NULL;

 		/* tail-recursion didn't happen, so do equivalent iteration */
 		a1 = i + k;
 		b1 = j + k;
 	}
 }

 static int diff(struct line *a, int an, struct line *b, int bn,
 		 struct hunk *base)
 {
 	struct hunk *curr;
 	struct pos *pos;
 	int t, count = 0;

 	/* allocate and fill arrays */
 	t = equatelines(a, an, b, bn);
 	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

 	if (pos && t) {
 		/* generate the matching block list */

 		curr = recurse(a, b, pos, 0, an, 0, bn, base);
 		if (!curr)
 			return -1;

 		/* sentinel end hunk */
 		curr->next = (struct hunk *)malloc(sizeof(struct hunk));
 		if (!curr->next)
 			return -1;
 		curr = curr->next;
 		curr->a1 = curr->a2 = an;
 		curr->b1 = curr->b2 = bn;
 		curr->next = NULL;
 	}

 	free(pos);

 	/* normalize the hunk list, try to push each hunk towards the end */
 	for (curr = base->next; curr; curr = curr->next) {
 		struct hunk *next = curr->next;
 		int shift = 0;

 		if (!next)
 			break;

 		if (curr->a2 == next->a1)
 			while (curr->a2 + shift < an && curr->b2 + shift < bn
 			       && !cmp(a + curr->a2 + shift,
 				       b + curr->b2 + shift))
 				shift++;
 		else if (curr->b2 == next->b1)
 			while (curr->b2 + shift < bn && curr->a2 + shift < an
 			       && !cmp(b + curr->b2 + shift,
 				       a + curr->a2 + shift))
 				shift++;
 		if (!shift)
 			continue;
 		curr->b2 += shift;
 		next->b1 += shift;
 		curr->a2 += shift;
 		next->a1 += shift;
 	}

 	for (curr = base->next; curr; curr = curr->next)
 		count++;
 	return count;
 }

 static void freehunks(struct hunk *l)
 {
 	struct hunk *n;
 	for (; l; l = n) {
 		n = l->next;
 		free(l);
 	}
 }

 static PyObject *blocks(PyObject *self, PyObject *args)
 {
 	PyObject *sa, *sb, *rl = NULL, *m;
 	struct line *a, *b;
 	struct hunk l, *h;
 	int an, bn, count, pos = 0;

 	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
 		return NULL;

 	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
 	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

 	if (!a || !b)
 		goto nomem;

 	l.next = NULL;
 	count = diff(a, an, b, bn, &l);
 	if (count < 0)
 		goto nomem;

 	rl = PyList_New(count);
 	if (!rl)
 		goto nomem;

 	for (h = l.next; h; h = h->next) {
 		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
 		PyList_SetItem(rl, pos, m);
 		pos++;
 	}

 nomem:
 	free(a);
 	free(b);
 	freehunks(l.next);
 	return rl ? rl : PyErr_NoMemory();
 }

 static PyObject *bdiff(PyObject *self, PyObject *args)
 {
 	char *sa, *sb, *rb;
 	PyObject *result = NULL;
 	struct line *al, *bl;
 	struct hunk l, *h;
 	int an, bn, count;
 	Py_ssize_t len = 0, la, lb;
 	PyThreadState *_save;

 	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
 		return NULL;

 	if (la > UINT_MAX || lb > UINT_MAX) {
 		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
 		return NULL;
 	}

 	_save = PyEval_SaveThread();
 	an = splitlines(sa, la, &al);
 	bn = splitlines(sb, lb, &bl);
 	if (!al || !bl)
 		goto nomem;

 	l.next = NULL;
 	count = diff(al, an, bl, bn, &l);
 	if (count < 0)
 		goto nomem;

 	/* calculate length of output */
 	la = lb = 0;
 	for (h = l.next; h; h = h->next) {
 		if (h->a1 != la || h->b1 != lb)
 			len += 12 + bl[h->b1].l - bl[lb].l;
 		la = h->a2;
 		lb = h->b2;
 	}
 	PyEval_RestoreThread(_save);
 	_save = NULL;

 	result = PyBytes_FromStringAndSize(NULL, len);

 	if (!result)
 		goto nomem;

 	/* build binary patch */
 	rb = PyBytes_AsString(result);
 	la = lb = 0;

 	for (h = l.next; h; h = h->next) {
 		if (h->a1 != la || h->b1 != lb) {
 			len = bl[h->b1].l - bl[lb].l;
 			putbe32((uint32_t)(al[la].l - al->l), rb);
 			putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4);
 			putbe32((uint32_t)len, rb + 8);
 			memcpy(rb + 12, bl[lb].l, len);
 			rb += 12 + len;
 		}
 		la = h->a2;
 		lb = h->b2;
 	}

 nomem:
 	if (_save)
 		PyEval_RestoreThread(_save);
 	free(al);
 	free(bl);
 	freehunks(l.next);
 	return result ? result : PyErr_NoMemory();
 }

 /*
  * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
  * reduce whitespace sequences to a single space and trim remaining whitespace
  * from end of lines.
  */
 static PyObject *fixws(PyObject *self, PyObject *args)
 {
 	PyObject *s, *result = NULL;
 	char allws, c;
 	const char *r;
 	Py_ssize_t i, rlen, wlen = 0;
 	char *w;

 	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
 		return NULL;
 	r = PyBytes_AsString(s);
 	rlen = PyBytes_Size(s);

 	w = (char *)malloc(rlen ? rlen : 1);
 	if (!w)
 		goto nomem;

 	for (i = 0; i != rlen; i++) {
 		c = r[i];
 		if (c == ' ' || c == '\t' || c == '\r') {
 			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
 				w[wlen++] = ' ';
 		} else if (c == '\n' && !allws
 			  && wlen > 0 && w[wlen - 1] == ' ') {
 			w[wlen - 1] = '\n';
 		} else {
 			w[wlen++] = c;
 		}
 	}

 	result = PyBytes_FromStringAndSize(w, wlen);

 nomem:
 	free(w);
 	return result ? result : PyErr_NoMemory();
 }


 static char mdiff_doc[] = "Efficient binary diff.";

 static PyMethodDef methods[] = {
 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
 	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
 	{NULL, NULL}
 };

 #ifdef IS_PY3K
 static struct PyModuleDef bdiff_module = {
 	PyModuleDef_HEAD_INIT,
 	"bdiff",
 	mdiff_doc,
 	-1,
 	methods
 };

 PyMODINIT_FUNC PyInit_bdiff(void)
 {
 	return PyModule_Create(&bdiff_module);
 }
 #else
 PyMODINIT_FUNC initbdiff(void)
 {
 	Py_InitModule3("bdiff", methods, mdiff_doc);
 }
 #endif
	/*
	bdiff.c - efficient binary diff extension for Mercurial

	Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>

	This software may be used and distributed according to the terms of
	the GNU General Public License, incorporated herein by reference.

	Based roughly on Python difflib
	*/

	#define PY_SSIZE_T_CLEAN
	#include <Python.h>
	#include <stdlib.h>
	#include <string.h>
	#include <limits.h>

	#include "util.h"

	struct line {
	int hash, n, e;
	Py_ssize_t len;
	const char *l;
	};

	struct pos {
	int pos, len;
	};

	struct hunk;
	struct hunk {
	int a1, a2, b1, b2;
	struct hunk *next;
	};

	static int splitlines(const char a, Py_ssize_t len, struct line *lr)
	{
	unsigned hash;
	int i;
	const char p, b = a;
	const char * const plast = a + len - 1;
	struct line *l;

	/* count the lines */
	i = 1; /* extra line for sentinel */
	for (p = a; p < a + len; p++)
	if (*p == '\n' \|\| p == plast)
	i++;

	lr = l = (struct line )malloc(sizeof(struct line) * i);
	if (!l)
	return -1;

	/* build the line array and calculate hashes */
	hash = 0;
	for (p = a; p < a + len; p++) {
	/* Leonid Yuriev's hash */
	hash = (hash * 1664525) + (unsigned char)*p + 1013904223;

	if (*p == '\n' \|\| p == plast) {
	l->hash = hash;
	hash = 0;
	l->len = p - b + 1;
	l->l = b;
	l->n = INT_MAX;
	l++;
	b = p + 1;
	}
	}

	/* set up a sentinel */
	l->hash = 0;
	l->len = 0;
	l->l = a + len;
	return i - 1;
	}

	static inline int cmp(struct line a, struct line b)
	{
	return a->hash != b->hash \|\| a->len != b->len \|\| memcmp(a->l, b->l, a->len);
	}

	static int equatelines(struct line a, int an, struct line b, int bn)
	{
	int i, j, buckets = 1, t, scale;
	struct pos *h = NULL;

	/* build a hash table of the next highest power of 2 */
	while (buckets < bn + 1)
	buckets *= 2;

	/* try to allocate a large hash table to avoid collisions */
	for (scale = 4; scale; scale /= 2) {
	h = (struct pos )malloc(scale buckets * sizeof(struct pos));
	if (h)
	break;
	}

	if (!h)
	return 0;

	buckets = buckets * scale - 1;

	/* clear the hash table */
	for (i = 0; i <= buckets; i++) {
	h[i].pos = INT_MAX;
	h[i].len = 0;
	}

	/* add lines to the hash table chains */
	for (i = bn - 1; i >= 0; i--) {
	/* find the equivalence class */
	for (j = b[i].hash & buckets; h[j].pos != INT_MAX;
	j = (j + 1) & buckets)
	if (!cmp(b + i, b + h[j].pos))
	break;

	/* add to the head of the equivalence class */
	b[i].n = h[j].pos;
	b[i].e = j;
	h[j].pos = i;
	h[j].len++; /* keep track of popularity */
	}

	/* compute popularity threshold */
	t = (bn >= 31000) ? bn / 1000 : 1000000 / (bn + 1);

	/* match items in a to their equivalence class in b */
	for (i = 0; i < an; i++) {
	/* find the equivalence class */
	for (j = a[i].hash & buckets; h[j].pos != INT_MAX;
	j = (j + 1) & buckets)
	if (!cmp(a + i, b + h[j].pos))
	break;

	a[i].e = j; /* use equivalence class for quick compare */
	if (h[j].len <= t)
	a[i].n = h[j].pos; /* point to head of match list */
	else
	a[i].n = INT_MAX; /* too popular */
	}

	/* discard hash tables */
	free(h);
	return 1;
	}

	static int longest_match(struct line a, struct line b, struct pos *pos,
	int a1, int a2, int b1, int b2, int omi, int omj)
	{
	int mi = a1, mj = b1, mk = 0, mb = 0, i, j, k;

	for (i = a1; i < a2; i++) {
	/* skip things before the current block */
	for (j = a[i].n; j < b1; j = b[j].n)
	;

	/* loop through all lines match a[i] in b */
	for (; j < b2; j = b[j].n) {
	/* does this extend an earlier match? */
	if (i > a1 && j > b1 && pos[j - 1].pos == i - 1)
	k = pos[j - 1].len + 1;
	else
	k = 1;
	pos[j].pos = i;
	pos[j].len = k;

	/* best match so far? */
	if (k > mk) {
	mi = i;
	mj = j;
	mk = k;
	}
	}
	}

	if (mk) {
	mi = mi - mk + 1;
	mj = mj - mk + 1;
	}

	/* expand match to include neighboring popular lines */
	while (mi - mb > a1 && mj - mb > b1 &&
	a[mi - mb - 1].e == b[mj - mb - 1].e)
	mb++;
	while (mi + mk < a2 && mj + mk < b2 &&
	a[mi + mk].e == b[mj + mk].e)
	mk++;

	*omi = mi - mb;
	*omj = mj - mb;

	return mk + mb;
	}

	static struct hunk recurse(struct line a, struct line b, struct pos pos,
	int a1, int a2, int b1, int b2, struct hunk *l)
	{
	int i, j, k;

	while (1) {
	/* find the longest match in this chunk */
	k = longest_match(a, b, pos, a1, a2, b1, b2, &i, &j);
	if (!k)
	return l;

	/* and recurse on the remaining chunks on either side */
	l = recurse(a, b, pos, a1, i, b1, j, l);
	if (!l)
	return NULL;

	l->next = (struct hunk *)malloc(sizeof(struct hunk));
	if (!l->next)
	return NULL;

	l = l->next;
	l->a1 = i;
	l->a2 = i + k;
	l->b1 = j;
	l->b2 = j + k;
	l->next = NULL;

	/* tail-recursion didn't happen, so do equivalent iteration */
	a1 = i + k;
	b1 = j + k;
	}
	}

	static int diff(struct line a, int an, struct line b, int bn,
	struct hunk *base)
	{
	struct hunk *curr;
	struct pos *pos;
	int t, count = 0;

	/* allocate and fill arrays */
	t = equatelines(a, an, b, bn);
	pos = (struct pos *)calloc(bn ? bn : 1, sizeof(struct pos));

	if (pos && t) {
	/* generate the matching block list */

	curr = recurse(a, b, pos, 0, an, 0, bn, base);
	if (!curr)
	return -1;

	/* sentinel end hunk */
	curr->next = (struct hunk *)malloc(sizeof(struct hunk));
	if (!curr->next)
	return -1;
	curr = curr->next;
	curr->a1 = curr->a2 = an;
	curr->b1 = curr->b2 = bn;
	curr->next = NULL;
	}

	free(pos);

	/* normalize the hunk list, try to push each hunk towards the end */
	for (curr = base->next; curr; curr = curr->next) {
	struct hunk *next = curr->next;
	int shift = 0;

	if (!next)
	break;

	if (curr->a2 == next->a1)
	while (curr->a2 + shift < an && curr->b2 + shift < bn
	&& !cmp(a + curr->a2 + shift,
	b + curr->b2 + shift))
	shift++;
	else if (curr->b2 == next->b1)
	while (curr->b2 + shift < bn && curr->a2 + shift < an
	&& !cmp(b + curr->b2 + shift,
	a + curr->a2 + shift))
	shift++;
	if (!shift)
	continue;
	curr->b2 += shift;
	next->b1 += shift;
	curr->a2 += shift;
	next->a1 += shift;
	}

	for (curr = base->next; curr; curr = curr->next)
	count++;
	return count;
	}

	static void freehunks(struct hunk *l)
	{
	struct hunk *n;
	for (; l; l = n) {
	n = l->next;
	free(l);
	}
	}

	static PyObject blocks(PyObject self, PyObject *args)
	{
	PyObject sa, sb, rl = NULL, m;
	struct line a, b;
	struct hunk l, *h;
	int an, bn, count, pos = 0;

	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
	return NULL;

	an = splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
	bn = splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);

	if (!a \|\| !b)
	goto nomem;

	l.next = NULL;
	count = diff(a, an, b, bn, &l);
	if (count < 0)
	goto nomem;

	rl = PyList_New(count);
	if (!rl)
	goto nomem;

	for (h = l.next; h; h = h->next) {
	m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
	PyList_SetItem(rl, pos, m);
	pos++;
	}

	nomem:
	free(a);
	free(b);
	freehunks(l.next);
	return rl ? rl : PyErr_NoMemory();
	}

	static PyObject bdiff(PyObject self, PyObject *args)
	{
	char sa, sb, *rb;
	PyObject *result = NULL;
	struct line al, bl;
	struct hunk l, *h;
	int an, bn, count;
	Py_ssize_t len = 0, la, lb;
	PyThreadState *_save;

	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
	return NULL;

	if (la > UINT_MAX \|\| lb > UINT_MAX) {
	PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
	return NULL;
	}

	_save = PyEval_SaveThread();
	an = splitlines(sa, la, &al);
	bn = splitlines(sb, lb, &bl);
	if (!al \|\| !bl)
	goto nomem;

	l.next = NULL;
	count = diff(al, an, bl, bn, &l);
	if (count < 0)
	goto nomem;

	/* calculate length of output */
	la = lb = 0;
	for (h = l.next; h; h = h->next) {
	if (h->a1 != la \|\| h->b1 != lb)
	len += 12 + bl[h->b1].l - bl[lb].l;
	la = h->a2;
	lb = h->b2;
	}
	PyEval_RestoreThread(_save);
	_save = NULL;

	result = PyBytes_FromStringAndSize(NULL, len);

	if (!result)
	goto nomem;

	/* build binary patch */
	rb = PyBytes_AsString(result);
	la = lb = 0;

	for (h = l.next; h; h = h->next) {
	if (h->a1 != la \|\| h->b1 != lb) {
	len = bl[h->b1].l - bl[lb].l;
	putbe32((uint32_t)(al[la].l - al->l), rb);
	putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4);
	putbe32((uint32_t)len, rb + 8);
	memcpy(rb + 12, bl[lb].l, len);
	rb += 12 + len;
	}
	la = h->a2;
	lb = h->b2;
	}

	nomem:
	if (_save)
	PyEval_RestoreThread(_save);
	free(al);
	free(bl);
	freehunks(l.next);
	return result ? result : PyErr_NoMemory();
	}

	/*
	* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
	* reduce whitespace sequences to a single space and trim remaining whitespace
	* from end of lines.
	*/
	static PyObject fixws(PyObject self, PyObject *args)
	{
	PyObject s, result = NULL;
	char allws, c;
	const char *r;
	Py_ssize_t i, rlen, wlen = 0;
	char *w;

	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
	return NULL;
	r = PyBytes_AsString(s);
	rlen = PyBytes_Size(s);

	w = (char *)malloc(rlen ? rlen : 1);
	if (!w)
	goto nomem;

	for (i = 0; i != rlen; i++) {
	c = r[i];
	if (c == ' ' \|\| c == '\t' \|\| c == '\r') {
	if (!allws && (wlen == 0 \|\| w[wlen - 1] != ' '))
	w[wlen++] = ' ';
	} else if (c == '\n' && !allws
	&& wlen > 0 && w[wlen - 1] == ' ') {
	w[wlen - 1] = '\n';
	} else {
	w[wlen++] = c;
	}
	}

	result = PyBytes_FromStringAndSize(w, wlen);

	nomem:
	free(w);
	return result ? result : PyErr_NoMemory();
	}


	static char mdiff_doc[] = "Efficient binary diff.";

	static PyMethodDef methods[] = {
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
	{NULL, NULL}
	};

	#ifdef IS_PY3K
	static struct PyModuleDef bdiff_module = {
	PyModuleDef_HEAD_INIT,
	"bdiff",
	mdiff_doc,
	-1,
	methods
	};

	PyMODINIT_FUNC PyInit_bdiff(void)
	{
	return PyModule_Create(&bdiff_module);
	}
	#else
	PyMODINIT_FUNC initbdiff(void)
	{
	Py_InitModule3("bdiff", methods, mdiff_doc);
	}
	#endif