plugins/hg4idea/testData/bin/mercurial/pvec.py - platform/tools/idea - Git at Google

 # pvec.py - probabilistic vector clocks for Mercurial
 #
 # Copyright 2012 Matt Mackall <mpm@selenic.com>
 #
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.

 '''
 A "pvec" is a changeset property based on the theory of vector clocks
 that can be compared to discover relatedness without consulting a
 graph. This can be useful for tasks like determining how a
 disconnected patch relates to a repository.

 Currently a pvec consist of 448 bits, of which 24 are 'depth' and the
 remainder are a bit vector. It is represented as a 70-character base85
 string.

 Construction:

 - a root changeset has a depth of 0 and a bit vector based on its hash
 - a normal commit has a changeset where depth is increased by one and
   one bit vector bit is flipped based on its hash
 - a merge changeset pvec is constructed by copying changes from one pvec into
   the other to balance its depth

 Properties:

 - for linear changes, difference in depth is always <= hamming distance
 - otherwise, changes are probably divergent
 - when hamming distance is < 200, we can reliably detect when pvecs are near

 Issues:

 - hamming distance ceases to work over distances of ~ 200
 - detecting divergence is less accurate when the common ancestor is very close
   to either revision or total distance is high
 - this could probably be improved by modeling the relation between
   delta and hdist

 Uses:

 - a patch pvec can be used to locate the nearest available common ancestor for
   resolving conflicts
 - ordering of patches can be established without a DAG
 - two head pvecs can be compared to determine whether push/pull/merge is needed
   and approximately how many changesets are involved
 - can be used to find a heuristic divergence measure between changesets on
   different branches
 '''

 import base85, util
 from node import nullrev

 _size = 448 # 70 chars b85-encoded
 _bytes = _size / 8
 _depthbits = 24
 _depthbytes = _depthbits / 8
 _vecbytes = _bytes - _depthbytes
 _vecbits = _vecbytes * 8
 _radius = (_vecbits - 30) / 2 # high probability vectors are related

 def _bin(bs):
     '''convert a bytestring to a long'''
     v = 0
     for b in bs:
         v = v * 256 + ord(b)
     return v

 def _str(v, l):
     bs = ""
     for p in xrange(l):
         bs = chr(v & 255) + bs
         v >>= 8
     return bs

 def _split(b):
     '''depth and bitvec'''
     return _bin(b[:_depthbytes]), _bin(b[_depthbytes:])

 def _join(depth, bitvec):
     return _str(depth, _depthbytes) + _str(bitvec, _vecbytes)

 def _hweight(x):
     c = 0
     while x:
         if x & 1:
             c += 1
         x >>= 1
     return c
 _htab = [_hweight(x) for x in xrange(256)]

 def _hamming(a, b):
     '''find the hamming distance between two longs'''
     d = a ^ b
     c = 0
     while d:
         c += _htab[d & 0xff]
         d >>= 8
     return c

 def _mergevec(x, y, c):
     # Ideally, this function would be x ^ y ^ ancestor, but finding
     # ancestors is a nuisance. So instead we find the minimal number
     # of changes to balance the depth and hamming distance

     d1, v1 = x
     d2, v2 = y
     if d1 < d2:
         d1, d2, v1, v2 = d2, d1, v2, v1

     hdist = _hamming(v1, v2)
     ddist = d1 - d2
     v = v1
     m = v1 ^ v2 # mask of different bits
     i = 1

     if hdist > ddist:
         # if delta = 10 and hdist = 100, then we need to go up 55 steps
         # to the ancestor and down 45
         changes = (hdist - ddist + 1) / 2
     else:
         # must make at least one change
         changes = 1
     depth = d1 + changes

     # copy changes from v2
     if m:
         while changes:
             if m & i:
                 v ^= i
                 changes -= 1
             i <<= 1
     else:
         v = _flipbit(v, c)

     return depth, v

 def _flipbit(v, node):
     # converting bit strings to longs is slow
     bit = (hash(node) & 0xffffffff) % _vecbits
     return v ^ (1<<bit)

 def ctxpvec(ctx):
     '''construct a pvec for ctx while filling in the cache'''
     r = ctx._repo
     if not util.safehasattr(r, "_pveccache"):
         r._pveccache = {}
     pvc = r._pveccache
     if ctx.rev() not in pvc:
         cl = r.changelog
         for n in xrange(ctx.rev() + 1):
             if n not in pvc:
                 node = cl.node(n)
                 p1, p2 = cl.parentrevs(n)
                 if p1 == nullrev:
                     # start with a 'random' vector at root
                     pvc[n] = (0, _bin((node * 3)[:_vecbytes]))
                 elif p2 == nullrev:
                     d, v = pvc[p1]
                     pvc[n] = (d + 1, _flipbit(v, node))
                 else:
                     pvc[n] = _mergevec(pvc[p1], pvc[p2], node)
     bs = _join(*pvc[ctx.rev()])
     return pvec(base85.b85encode(bs))

 class pvec(object):
     def __init__(self, hashorctx):
         if isinstance(hashorctx, str):
             self._bs = hashorctx
             self._depth, self._vec = _split(base85.b85decode(hashorctx))
         else:
             self._vec = ctxpvec(hashorctx)

     def __str__(self):
         return self._bs

     def __eq__(self, b):
         return self._vec == b._vec and self._depth == b._depth

     def __lt__(self, b):
         delta = b._depth - self._depth
         if delta < 0:
             return False # always correct
         if _hamming(self._vec, b._vec) > delta:
             return False
         return True

     def __gt__(self, b):
         return b < self

     def __or__(self, b):
         delta = abs(b._depth - self._depth)
         if _hamming(self._vec, b._vec) <= delta:
             return False
         return True

     def __sub__(self, b):
         if self | b:
             raise ValueError("concurrent pvecs")
         return self._depth - b._depth

     def distance(self, b):
         d = abs(b._depth - self._depth)
         h = _hamming(self._vec, b._vec)
         return max(d, h)

     def near(self, b):
         dist = abs(b.depth - self._depth)
         if dist > _radius or _hamming(self._vec, b._vec) > _radius:
             return False
	# pvec.py - probabilistic vector clocks for Mercurial
	#
	# Copyright 2012 Matt Mackall <mpm@selenic.com>
	#
	# This software may be used and distributed according to the terms of the
	# GNU General Public License version 2 or any later version.

	'''
	A "pvec" is a changeset property based on the theory of vector clocks
	that can be compared to discover relatedness without consulting a
	graph. This can be useful for tasks like determining how a
	disconnected patch relates to a repository.

	Currently a pvec consist of 448 bits, of which 24 are 'depth' and the
	remainder are a bit vector. It is represented as a 70-character base85
	string.

	Construction:

	- a root changeset has a depth of 0 and a bit vector based on its hash
	- a normal commit has a changeset where depth is increased by one and
	one bit vector bit is flipped based on its hash
	- a merge changeset pvec is constructed by copying changes from one pvec into
	the other to balance its depth

	Properties:

	- for linear changes, difference in depth is always <= hamming distance
	- otherwise, changes are probably divergent
	- when hamming distance is < 200, we can reliably detect when pvecs are near

	Issues:

	- hamming distance ceases to work over distances of ~ 200
	- detecting divergence is less accurate when the common ancestor is very close
	to either revision or total distance is high
	- this could probably be improved by modeling the relation between
	delta and hdist

	Uses:

	- a patch pvec can be used to locate the nearest available common ancestor for
	resolving conflicts
	- ordering of patches can be established without a DAG
	- two head pvecs can be compared to determine whether push/pull/merge is needed
	and approximately how many changesets are involved
	- can be used to find a heuristic divergence measure between changesets on
	different branches
	'''

	import base85, util
	from node import nullrev

	_size = 448 # 70 chars b85-encoded
	_bytes = _size / 8
	_depthbits = 24
	_depthbytes = _depthbits / 8
	_vecbytes = _bytes - _depthbytes
	_vecbits = _vecbytes * 8
	_radius = (_vecbits - 30) / 2 # high probability vectors are related

	def _bin(bs):
	'''convert a bytestring to a long'''
	v = 0
	for b in bs:
	v = v * 256 + ord(b)
	return v

	def _str(v, l):
	bs = ""
	for p in xrange(l):
	bs = chr(v & 255) + bs
	v >>= 8
	return bs

	def _split(b):
	'''depth and bitvec'''
	return _bin(b[:_depthbytes]), _bin(b[_depthbytes:])

	def _join(depth, bitvec):
	return _str(depth, _depthbytes) + _str(bitvec, _vecbytes)

	def _hweight(x):
	c = 0
	while x:
	if x & 1:
	c += 1
	x >>= 1
	return c
	_htab = [_hweight(x) for x in xrange(256)]

	def _hamming(a, b):
	'''find the hamming distance between two longs'''
	d = a ^ b
	c = 0
	while d:
	c += _htab[d & 0xff]
	d >>= 8
	return c

	def _mergevec(x, y, c):
	# Ideally, this function would be x ^ y ^ ancestor, but finding
	# ancestors is a nuisance. So instead we find the minimal number
	# of changes to balance the depth and hamming distance

	d1, v1 = x
	d2, v2 = y
	if d1 < d2:
	d1, d2, v1, v2 = d2, d1, v2, v1

	hdist = _hamming(v1, v2)
	ddist = d1 - d2
	v = v1
	m = v1 ^ v2 # mask of different bits
	i = 1

	if hdist > ddist:
	# if delta = 10 and hdist = 100, then we need to go up 55 steps
	# to the ancestor and down 45
	changes = (hdist - ddist + 1) / 2
	else:
	# must make at least one change
	changes = 1
	depth = d1 + changes

	# copy changes from v2
	if m:
	while changes:
	if m & i:
	v ^= i
	changes -= 1
	i <<= 1
	else:
	v = _flipbit(v, c)

	return depth, v

	def _flipbit(v, node):
	# converting bit strings to longs is slow
	bit = (hash(node) & 0xffffffff) % _vecbits
	return v ^ (1<<bit)

	def ctxpvec(ctx):
	'''construct a pvec for ctx while filling in the cache'''
	r = ctx._repo
	if not util.safehasattr(r, "_pveccache"):
	r._pveccache = {}
	pvc = r._pveccache
	if ctx.rev() not in pvc:
	cl = r.changelog
	for n in xrange(ctx.rev() + 1):
	if n not in pvc:
	node = cl.node(n)
	p1, p2 = cl.parentrevs(n)
	if p1 == nullrev:
	# start with a 'random' vector at root
	pvc[n] = (0, _bin((node * 3)[:_vecbytes]))
	elif p2 == nullrev:
	d, v = pvc[p1]
	pvc[n] = (d + 1, _flipbit(v, node))
	else:
	pvc[n] = _mergevec(pvc[p1], pvc[p2], node)
	bs = _join(*pvc[ctx.rev()])
	return pvec(base85.b85encode(bs))

	class pvec(object):
	def __init__(self, hashorctx):
	if isinstance(hashorctx, str):
	self._bs = hashorctx
	self._depth, self._vec = _split(base85.b85decode(hashorctx))
	else:
	self._vec = ctxpvec(hashorctx)

	def __str__(self):
	return self._bs

	def __eq__(self, b):
	return self._vec == b._vec and self._depth == b._depth

	def __lt__(self, b):
	delta = b._depth - self._depth
	if delta < 0:
	return False # always correct
	if _hamming(self._vec, b._vec) > delta:
	return False
	return True

	def __gt__(self, b):
	return b < self

	def __or__(self, b):
	delta = abs(b._depth - self._depth)
	if _hamming(self._vec, b._vec) <= delta:
	return False
	return True

	def __sub__(self, b):
	if self \| b:
	raise ValueError("concurrent pvecs")
	return self._depth - b._depth

	def distance(self, b):
	d = abs(b._depth - self._depth)
	h = _hamming(self._vec, b._vec)
	return max(d, h)

	def near(self, b):
	dist = abs(b.depth - self._depth)
	if dist > _radius or _hamming(self._vec, b._vec) > _radius:
	return False