blob: 491e5c22872c129a28b3f89baac0fe1d7d677fb0 [file] [log] [blame]
"""
altgraph.Graph - Base Graph class
=================================
..
#--Version 2.1
#--Bob Ippolito October, 2004
#--Version 2.0
#--Istvan Albert June, 2004
#--Version 1.0
#--Nathan Denny, May 27, 1999
"""
from altgraph import GraphError
from collections import deque
class Graph(object):
"""
The Graph class represents a directed graph with *N* nodes and *E* edges.
Naming conventions:
- the prefixes such as *out*, *inc* and *all* will refer to methods
that operate on the outgoing, incoming or all edges of that node.
For example: :py:meth:`inc_degree` will refer to the degree of the node
computed over the incoming edges (the number of neighbours linking to
the node).
- the prefixes such as *forw* and *back* will refer to the
orientation of the edges used in the method with respect to the node.
For example: :py:meth:`forw_bfs` will start at the node then use the outgoing
edges to traverse the graph (goes forward).
"""
def __init__(self, edges=None):
"""
Initialization
"""
self.next_edge = 0
self.nodes, self.edges = {}, {}
self.hidden_edges, self.hidden_nodes = {}, {}
if edges is not None:
for item in edges:
if len(item) == 2:
head, tail = item
self.add_edge(head, tail)
elif len(item) == 3:
head, tail, data = item
self.add_edge(head, tail, data)
else:
raise GraphError("Cannot create edge from %s"%(item,))
def __repr__(self):
return '<Graph: %d nodes, %d edges>' % (
self.number_of_nodes(), self.number_of_edges())
def add_node(self, node, node_data=None):
"""
Adds a new node to the graph. Arbitrary data can be attached to the
node via the node_data parameter. Adding the same node twice will be
silently ignored.
The node must be a hashable value.
"""
#
# the nodes will contain tuples that will store incoming edges,
# outgoing edges and data
#
# index 0 -> incoming edges
# index 1 -> outgoing edges
if node in self.hidden_nodes:
# Node is present, but hidden
return
if node not in self.nodes:
self.nodes[node] = ([], [], node_data)
def add_edge(self, head_id, tail_id, edge_data=1, create_nodes=True):
"""
Adds a directed edge going from head_id to tail_id.
Arbitrary data can be attached to the edge via edge_data.
It may create the nodes if adding edges between nonexisting ones.
:param head_id: head node
:param tail_id: tail node
:param edge_data: (optional) data attached to the edge
:param create_nodes: (optional) creates the head_id or tail_id node in case they did not exist
"""
# shorcut
edge = self.next_edge
# add nodes if on automatic node creation
if create_nodes:
self.add_node(head_id)
self.add_node(tail_id)
# update the corresponding incoming and outgoing lists in the nodes
# index 0 -> incoming edges
# index 1 -> outgoing edges
try:
self.nodes[tail_id][0].append(edge)
self.nodes[head_id][1].append(edge)
except KeyError:
raise GraphError('Invalid nodes %s -> %s' % (head_id, tail_id))
# store edge information
self.edges[edge] = (head_id, tail_id, edge_data)
self.next_edge += 1
def hide_edge(self, edge):
"""
Hides an edge from the graph. The edge may be unhidden at some later
time.
"""
try:
head_id, tail_id, edge_data = self.hidden_edges[edge] = self.edges[edge]
self.nodes[tail_id][0].remove(edge)
self.nodes[head_id][1].remove(edge)
del self.edges[edge]
except KeyError:
raise GraphError('Invalid edge %s' % edge)
def hide_node(self, node):
"""
Hides a node from the graph. The incoming and outgoing edges of the
node will also be hidden. The node may be unhidden at some later time.
"""
try:
all_edges = self.all_edges(node)
self.hidden_nodes[node] = (self.nodes[node], all_edges)
for edge in all_edges:
self.hide_edge(edge)
del self.nodes[node]
except KeyError:
raise GraphError('Invalid node %s' % node)
def restore_node(self, node):
"""
Restores a previously hidden node back into the graph and restores
all of its incoming and outgoing edges.
"""
try:
self.nodes[node], all_edges = self.hidden_nodes[node]
for edge in all_edges:
self.restore_edge(edge)
del self.hidden_nodes[node]
except KeyError:
raise GraphError('Invalid node %s' % node)
def restore_edge(self, edge):
"""
Restores a previously hidden edge back into the graph.
"""
try:
head_id, tail_id, data = self.hidden_edges[edge]
self.nodes[tail_id][0].append(edge)
self.nodes[head_id][1].append(edge)
self.edges[edge] = head_id, tail_id, data
del self.hidden_edges[edge]
except KeyError:
raise GraphError('Invalid edge %s' % edge)
def restore_all_edges(self):
"""
Restores all hidden edges.
"""
for edge in list(self.hidden_edges.keys()):
try:
self.restore_edge(edge)
except GraphError:
pass
def restore_all_nodes(self):
"""
Restores all hidden nodes.
"""
for node in list(self.hidden_nodes.keys()):
self.restore_node(node)
def __contains__(self, node):
"""
Test whether a node is in the graph
"""
return node in self.nodes
def edge_by_id(self, edge):
"""
Returns the edge that connects the head_id and tail_id nodes
"""
try:
head, tail, data = self.edges[edge]
except KeyError:
head, tail = None, None
raise GraphError('Invalid edge %s' % edge)
return (head, tail)
def edge_by_node(self, head, tail):
"""
Returns the edge that connects the head_id and tail_id nodes
"""
for edge in self.out_edges(head):
if self.tail(edge) == tail:
return edge
return None
def number_of_nodes(self):
"""
Returns the number of nodes
"""
return len(self.nodes)
def number_of_edges(self):
"""
Returns the number of edges
"""
return len(self.edges)
def __iter__(self):
"""
Iterates over all nodes in the graph
"""
return iter(self.nodes)
def node_list(self):
"""
Return a list of the node ids for all visible nodes in the graph.
"""
return list(self.nodes.keys())
def edge_list(self):
"""
Returns an iterator for all visible nodes in the graph.
"""
return list(self.edges.keys())
def number_of_hidden_edges(self):
"""
Returns the number of hidden edges
"""
return len(self.hidden_edges)
def number_of_hidden_nodes(self):
"""
Returns the number of hidden nodes
"""
return len(self.hidden_nodes)
def hidden_node_list(self):
"""
Returns the list with the hidden nodes
"""
return list(self.hidden_nodes.keys())
def hidden_edge_list(self):
"""
Returns a list with the hidden edges
"""
return list(self.hidden_edges.keys())
def describe_node(self, node):
"""
return node, node data, outgoing edges, incoming edges for node
"""
incoming, outgoing, data = self.nodes[node]
return node, data, outgoing, incoming
def describe_edge(self, edge):
"""
return edge, edge data, head, tail for edge
"""
head, tail, data = self.edges[edge]
return edge, data, head, tail
def node_data(self, node):
"""
Returns the data associated with a node
"""
return self.nodes[node][2]
def edge_data(self, edge):
"""
Returns the data associated with an edge
"""
return self.edges[edge][2]
def update_edge_data(self, edge, edge_data):
"""
Replace the edge data for a specific edge
"""
self.edges[edge] = self.edges[edge][0:2] + (edge_data,)
def head(self, edge):
"""
Returns the node of the head of the edge.
"""
return self.edges[edge][0]
def tail(self, edge):
"""
Returns node of the tail of the edge.
"""
return self.edges[edge][1]
def out_nbrs(self, node):
"""
List of nodes connected by outgoing edges
"""
l = [self.tail(n) for n in self.out_edges(node)]
return l
def inc_nbrs(self, node):
"""
List of nodes connected by incoming edges
"""
l = [self.head(n) for n in self.inc_edges(node)]
return l
def all_nbrs(self, node):
"""
List of nodes connected by incoming and outgoing edges
"""
l = dict.fromkeys( self.inc_nbrs(node) + self.out_nbrs(node) )
return list(l)
def out_edges(self, node):
"""
Returns a list of the outgoing edges
"""
try:
return list(self.nodes[node][1])
except KeyError:
raise GraphError('Invalid node %s' % node)
return None
def inc_edges(self, node):
"""
Returns a list of the incoming edges
"""
try:
return list(self.nodes[node][0])
except KeyError:
raise GraphError('Invalid node %s' % node)
return None
def all_edges(self, node):
"""
Returns a list of incoming and outging edges.
"""
return set(self.inc_edges(node) + self.out_edges(node))
def out_degree(self, node):
"""
Returns the number of outgoing edges
"""
return len(self.out_edges(node))
def inc_degree(self, node):
"""
Returns the number of incoming edges
"""
return len(self.inc_edges(node))
def all_degree(self, node):
"""
The total degree of a node
"""
return self.inc_degree(node) + self.out_degree(node)
def _topo_sort(self, forward=True):
"""
Topological sort.
Returns a list of nodes where the successors (based on outgoing and
incoming edges selected by the forward parameter) of any given node
appear in the sequence after that node.
"""
topo_list = []
queue = deque()
indeg = {}
# select the operation that will be performed
if forward:
get_edges = self.out_edges
get_degree = self.inc_degree
get_next = self.tail
else:
get_edges = self.inc_edges
get_degree = self.out_degree
get_next = self.head
for node in self.node_list():
degree = get_degree(node)
if degree:
indeg[node] = degree
else:
queue.append(node)
while queue:
curr_node = queue.popleft()
topo_list.append(curr_node)
for edge in get_edges(curr_node):
tail_id = get_next(edge)
if tail_id in indeg:
indeg[tail_id] -= 1
if indeg[tail_id] == 0:
queue.append(tail_id)
if len(topo_list) == len(self.node_list()):
valid = True
else:
# the graph has cycles, invalid topological sort
valid = False
return (valid, topo_list)
def forw_topo_sort(self):
"""
Topological sort.
Returns a list of nodes where the successors (based on outgoing edges)
of any given node appear in the sequence after that node.
"""
return self._topo_sort(forward=True)
def back_topo_sort(self):
"""
Reverse topological sort.
Returns a list of nodes where the successors (based on incoming edges)
of any given node appear in the sequence after that node.
"""
return self._topo_sort(forward=False)
def _bfs_subgraph(self, start_id, forward=True):
"""
Private method creates a subgraph in a bfs order.
The forward parameter specifies whether it is a forward or backward
traversal.
"""
if forward:
get_bfs = self.forw_bfs
get_nbrs = self.out_nbrs
else:
get_bfs = self.back_bfs
get_nbrs = self.inc_nbrs
g = Graph()
bfs_list = get_bfs(start_id)
for node in bfs_list:
g.add_node(node)
for node in bfs_list:
for nbr_id in get_nbrs(node):
g.add_edge(node, nbr_id)
return g
def forw_bfs_subgraph(self, start_id):
"""
Creates and returns a subgraph consisting of the breadth first
reachable nodes based on their outgoing edges.
"""
return self._bfs_subgraph(start_id, forward=True)
def back_bfs_subgraph(self, start_id):
"""
Creates and returns a subgraph consisting of the breadth first
reachable nodes based on the incoming edges.
"""
return self._bfs_subgraph(start_id, forward=False)
def iterdfs(self, start, end=None, forward=True):
"""
Collecting nodes in some depth first traversal.
The forward parameter specifies whether it is a forward or backward
traversal.
"""
visited, stack = set([start]), deque([start])
if forward:
get_edges = self.out_edges
get_next = self.tail
else:
get_edges = self.inc_edges
get_next = self.head
while stack:
curr_node = stack.pop()
yield curr_node
if curr_node == end:
break
for edge in sorted(get_edges(curr_node)):
tail = get_next(edge)
if tail not in visited:
visited.add(tail)
stack.append(tail)
def iterdata(self, start, end=None, forward=True, condition=None):
"""
Perform a depth-first walk of the graph (as ``iterdfs``)
and yield the item data of every node where condition matches. The
condition callback is only called when node_data is not None.
"""
visited, stack = set([start]), deque([start])
if forward:
get_edges = self.out_edges
get_next = self.tail
else:
get_edges = self.inc_edges
get_next = self.head
get_data = self.node_data
while stack:
curr_node = stack.pop()
curr_data = get_data(curr_node)
if curr_data is not None:
if condition is not None and not condition(curr_data):
continue
yield curr_data
if curr_node == end:
break
for edge in get_edges(curr_node):
tail = get_next(edge)
if tail not in visited:
visited.add(tail)
stack.append(tail)
def _iterbfs(self, start, end=None, forward=True):
"""
The forward parameter specifies whether it is a forward or backward
traversal. Returns a list of tuples where the first value is the hop
value the second value is the node id.
"""
queue, visited = deque([(start, 0)]), set([start])
# the direction of the bfs depends on the edges that are sampled
if forward:
get_edges = self.out_edges
get_next = self.tail
else:
get_edges = self.inc_edges
get_next = self.head
while queue:
curr_node, curr_step = queue.popleft()
yield (curr_node, curr_step)
if curr_node == end:
break
for edge in get_edges(curr_node):
tail = get_next(edge)
if tail not in visited:
visited.add(tail)
queue.append((tail, curr_step + 1))
def forw_bfs(self, start, end=None):
"""
Returns a list of nodes in some forward BFS order.
Starting from the start node the breadth first search proceeds along
outgoing edges.
"""
return [node for node, step in self._iterbfs(start, end, forward=True)]
def back_bfs(self, start, end=None):
"""
Returns a list of nodes in some backward BFS order.
Starting from the start node the breadth first search proceeds along
incoming edges.
"""
return [node for node, step in self._iterbfs(start, end, forward=False)]
def forw_dfs(self, start, end=None):
"""
Returns a list of nodes in some forward DFS order.
Starting with the start node the depth first search proceeds along
outgoing edges.
"""
return list(self.iterdfs(start, end, forward=True))
def back_dfs(self, start, end=None):
"""
Returns a list of nodes in some backward DFS order.
Starting from the start node the depth first search proceeds along
incoming edges.
"""
return list(self.iterdfs(start, end, forward=False))
def connected(self):
"""
Returns :py:data:`True` if the graph's every node can be reached from every
other node.
"""
node_list = self.node_list()
for node in node_list:
bfs_list = self.forw_bfs(node)
if len(bfs_list) != len(node_list):
return False
return True
def clust_coef(self, node):
"""
Computes and returns the local clustering coefficient of node. The
local cluster coefficient is proportion of the actual number of edges between
neighbours of node and the maximum number of edges between those neighbours.
See <http://en.wikipedia.org/wiki/Clustering_coefficient#Local_clustering_coefficient>
for a formal definition.
"""
num = 0
nbr_set = set(self.out_nbrs(node))
if node in nbr_set:
nbr_set.remove(node) # loop defense
for nbr in nbr_set:
sec_set = set(self.out_nbrs(nbr))
if nbr in sec_set:
sec_set.remove(nbr) # loop defense
num += len(nbr_set & sec_set)
nbr_num = len(nbr_set)
if nbr_num:
clust_coef = float(num) / (nbr_num * (nbr_num - 1))
else:
clust_coef = 0.0
return clust_coef
def get_hops(self, start, end=None, forward=True):
"""
Computes the hop distance to all nodes centered around a specified node.
First order neighbours are at hop 1, their neigbours are at hop 2 etc.
Uses :py:meth:`forw_bfs` or :py:meth:`back_bfs` depending on the value of the forward
parameter. If the distance between all neighbouring nodes is 1 the hop
number corresponds to the shortest distance between the nodes.
:param start: the starting node
:param end: ending node (optional). When not specified will search the whole graph.
:param forward: directionality parameter (optional). If C{True} (default) it uses L{forw_bfs} otherwise L{back_bfs}.
:return: returns a list of tuples where each tuple contains the node and the hop.
Typical usage::
>>> print (graph.get_hops(1, 8))
>>> [(1, 0), (2, 1), (3, 1), (4, 2), (5, 3), (7, 4), (8, 5)]
# node 1 is at 0 hops
# node 2 is at 1 hop
# ...
# node 8 is at 5 hops
"""
if forward:
return list(self._iterbfs(start=start, end=end, forward=True))
else:
return list(self._iterbfs(start=start, end=end, forward=False))