Python connected components
I like this algorithm:
def connected_components(neighbors):
seen = set()
def component(node):
nodes = set([node])
while nodes:
node = nodes.pop()
seen.add(node)
nodes |= neighbors[node] - seen
yield node
for node in neighbors:
if node not in seen:
yield component(node)
Not only is it short and elegant, but also fast. Use it like so (Python 2.7):
old_graph = {
0: [(0, 1), (0, 2), (0, 3)],
1: [],
2: [(2, 1)],
3: [(3, 4), (3, 5)],
4: [(4, 3), (4, 5)],
5: [(5, 3), (5, 4), (5, 7)],
6: [(6, 8)],
7: [],
8: [(8, 9)],
9: []}
edges = {v for k, vs in old_graph.items() for v in vs}
graph = defaultdict(set)
for v1, v2 in edges:
graph[v1].add(v2)
graph[v2].add(v1)
components = []
for component in connected_components(graph):
c = set(component)
components.append([edge for edges in old_graph.values()
for edge in edges
if c.intersection(edge)])
print(components)
The result is:
[[(0, 1), (0, 2), (0, 3), (2, 1), (3, 4), (3, 5), (4, 3), (4, 5), (5, 3), (5, 4), (5, 7)],
[(6, 8), (8, 9)]]
Thanks, aparpara for spotting the bug.
Let's simplify the graph representation:
myGraph = {0: [1,2,3], 1: [], 2: [1], 3: [4,5],4: [3,5], 5: [3,4,7], 6: [8], 7: [],8: [9], 9: []}
Here we have the function returning a dictionary whose keys are the roots and whose values are the connected components:
def getRoots(aNeigh):
def findRoot(aNode,aRoot):
while aNode != aRoot[aNode][0]:
aNode = aRoot[aNode][0]
return (aNode,aRoot[aNode][1])
myRoot = {}
for myNode in aNeigh.keys():
myRoot[myNode] = (myNode,0)
for myI in aNeigh:
for myJ in aNeigh[myI]:
(myRoot_myI,myDepthMyI) = findRoot(myI,myRoot)
(myRoot_myJ,myDepthMyJ) = findRoot(myJ,myRoot)
if myRoot_myI != myRoot_myJ:
myMin = myRoot_myI
myMax = myRoot_myJ
if myDepthMyI > myDepthMyJ:
myMin = myRoot_myJ
myMax = myRoot_myI
myRoot[myMax] = (myMax,max(myRoot[myMin][1]+1,myRoot[myMax][1]))
myRoot[myMin] = (myRoot[myMax][0],-1)
myToRet = {}
for myI in aNeigh:
if myRoot[myI][0] == myI:
myToRet[myI] = []
for myI in aNeigh:
myToRet[findRoot(myI,myRoot)[0]].append(myI)
return myToRet
Let's try it:
print getRoots(myGraph)
{8: [6, 8, 9], 1: [0, 1, 2, 3, 4, 5, 7]}
The previous answer is great. Anyway, it took to me a bit to understand what was going on. So, I refactored the code in this way that is easier to read for me. I leave here the code in case someone founds it easier too (it runs in python 3.6)
def get_all_connected_groups(graph):
already_seen = set()
result = []
for node in graph:
if node not in already_seen:
connected_group, already_seen = get_connected_group(node, already_seen)
result.append(connected_group)
return result
def get_connected_group(node, already_seen):
result = []
nodes = set([node])
while nodes:
node = nodes.pop()
already_seen.add(node)
nodes = nodes or graph[node] - already_seen
result.append(node)
return result, already_seen
graph = {
0: {0, 1, 2, 3},
1: set(),
2: {1, 2},
3: {3, 4, 5},
4: {3, 4, 5},
5: {3, 4, 5, 7},
6: {6, 8},
7: set(),
8: {8, 9},
9: set()}
components = get_all_connected_groups(graph)
print(components)
Result:
Out[0]: [[0, 1, 2, 3, 4, 5, 7], [6, 8, 9]]
Also, I simplified the input and output. I think it's a bit more clear to print all the nodes that are in a group