# Revision history [back]

### Boost implementation of Dijkstra's algorithm

I have benchmarked Dijkstra's algorithm using the Boost Graph library interface provided by SageMath against a basic and academic one but written in pure Python. And the later performs always better. This is very surprising since Boost Graph library is an templated C++ library. Perphaps I'using it in the wrong way. Here is the code :

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
import networkx as nx
G=nx.erdos_renyi_graph(n, p)

G=nx.relabel_nodes(G, lambda k:'v'+str(k))
edges=list(G.edges())
wedges=[]
for u, v in edges:
w=float(randrange(1, 10))
G[u][v]["weight"]=w
wedges.append((u, v, w))

return G, wedges

"From weighted wedges to weighted adjacency list"
n=len(nodes)
node2int={node:i for (i,node) in enumerate(nodes)}
for a, b,w in wedges:
i=node2int[a]
j=node2int[b]

"""
vertices indexed fropm 0 to n-1
Returns shortest path list from src to all vertices"""

processed=[0]*n
processed[src]=1
distances=[(0,src)]
INF=float("inf")
dist=[INF]*n
dist[src]=0
while distances:
d, u=heappop(distances)
if processed[u]==1:
processed[u]=2

dd=d+w
if processed[v]==0 or dd<dist[v]:
heappush(distances, (dd,v))
dist[v]=dd
if processed[v]==0:
processed[v]=1
return dist

def dijkstra(nodes, wedges, n, src):
int2node={i:node for (node,i) in node2int.items()}
src_int=node2int[src]
return {int2node[i]:d for (i, d) in enumerate(dijkstra_int(adj, src_int)) if d!=float("inf")}

print("------------  Generating Graph --------------------")
debut=clock()

n=3000
p=0.3

Gnx, wedges=make_graph(n, p)
src=sample(Gnx.nodes(),1)[0]
nodes=Gnx.nodes()
gtime=(clock()-debut)
print "Number of nodes:", len(Gnx)
print "Number of edges:", len(Gnx.edges())
print("Graph generation: %.2fs" %gtime)

print("------------  Boost --------------------")

G = Graph()
G.weighted(True)

debut=clock()
dist_boost = G.shortest_path_lengths(src,by_weight = True, algorithm='Dijkstra_Boost')

boost=(clock()-debut)
print("Boost: %.2fs" %boost)

print("------------  Pure Python --------------------")
debut=clock()
dist_pp=dijkstra(nodes, wedges, n, src)
py_time=(clock()-debut)
print("Pure Python: %.2fs" %py_time)

print"----------"
print "Checking:", dist_boost==dist_pp


Output :

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1350005
Graph generation: 11.76s
------------  Boost --------------------
Boost: 1.61s
------------  Pure Python --------------------
Pure Python: 1.48s
----------
Checking: True


### Boost implementation of Dijkstra's algorithm

I have benchmarked Dijkstra's algorithm using the Boost Graph library interface provided by SageMath against a basic and academic one but written in pure Python. And the later performs always better. This is very surprising since Boost Graph library is an a templated C++ library. Perphaps I'using it in the wrong way. Here is the code :

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
import networkx as nx
G=nx.erdos_renyi_graph(n, p)

G=nx.relabel_nodes(G, lambda k:'v'+str(k))
edges=list(G.edges())
wedges=[]
for u, v in edges:
w=float(randrange(1, 10))
G[u][v]["weight"]=w
wedges.append((u, v, w))

return G, wedges

"From weighted wedges to weighted adjacency list"
n=len(nodes)
node2int={node:i for (i,node) in enumerate(nodes)}
for a, b,w in wedges:
i=node2int[a]
j=node2int[b]

"""
vertices indexed fropm 0 to n-1
Returns shortest path list from src to all vertices"""

processed=[0]*n
processed[src]=1
distances=[(0,src)]
INF=float("inf")
dist=[INF]*n
dist[src]=0
while distances:
d, u=heappop(distances)
if processed[u]==1:
processed[u]=2

dd=d+w
if processed[v]==0 or dd<dist[v]:
heappush(distances, (dd,v))
dist[v]=dd
if processed[v]==0:
processed[v]=1
return dist

def dijkstra(nodes, wedges, n, src):
int2node={i:node for (node,i) in node2int.items()}
src_int=node2int[src]
return {int2node[i]:d for (i, d) in enumerate(dijkstra_int(adj, src_int)) if d!=float("inf")}

print("------------  Generating Graph --------------------")
debut=clock()

n=3000
p=0.3

Gnx, wedges=make_graph(n, p)
src=sample(Gnx.nodes(),1)[0]
nodes=Gnx.nodes()
gtime=(clock()-debut)
print "Number of nodes:", len(Gnx)
print "Number of edges:", len(Gnx.edges())
print("Graph generation: %.2fs" %gtime)

print("------------  Boost --------------------")

G = Graph()
G.weighted(True)

debut=clock()
dist_boost = G.shortest_path_lengths(src,by_weight = True, algorithm='Dijkstra_Boost')

boost=(clock()-debut)
print("Boost: %.2fs" %boost)

print("------------  Pure Python --------------------")
debut=clock()
dist_pp=dijkstra(nodes, wedges, n, src)
py_time=(clock()-debut)
print("Pure Python: %.2fs" %py_time)

print"----------"
print "Checking:", dist_boost==dist_pp


Output :

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1350005
Graph generation: 11.76s
------------  Boost --------------------
Boost: 1.61s
------------  Pure Python --------------------
Pure Python: 1.48s
----------
Checking: True


### Boost implementation of Dijkstra's algorithm

I have benchmarked Dijkstra's algorithm using the Boost Graph library interface provided by SageMath against a basic and academic one but written in pure Python. And the later performs always better. This is very surprising since Boost Graph library is a templated C++ library. Perphaps I'using it in the wrong way. Here is the code :

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
import networkx as nx
G=nx.erdos_renyi_graph(n, p)

G=nx.relabel_nodes(G, lambda k:'v'+str(k))
edges=list(G.edges())
wedges=[]
for u, v in edges:
w=float(randrange(1, 10))
G[u][v]["weight"]=w
wedges.append((u, v, w))

return G, wedges

"From weighted wedges to weighted adjacency list"
n=len(nodes)
node2int={node:i for (i,node) in enumerate(nodes)}
for a, b,w in wedges:
i=node2int[a]
j=node2int[b]

"""
vertices indexed fropm 0 to n-1
Returns shortest path list from src to all vertices"""

processed=[0]*n
processed[src]=1
distances=[(0,src)]
INF=float("inf")
dist=[INF]*n
dist[src]=0
while distances:
d, u=heappop(distances)
if processed[u]==1:
processed[u]=2

dd=d+w
if processed[v]==0 or dd<dist[v]:
heappush(distances, (dd,v))
dist[v]=dd
if processed[v]==0:
processed[v]=1
return dist

def dijkstra(nodes, wedges, n, src):
int2node={i:node for (node,i) in node2int.items()}
src_int=node2int[src]
return {int2node[i]:d for (i, d) in enumerate(dijkstra_int(adj, src_int)) if d!=float("inf")}

print("------------  Generating Graph --------------------")
debut=clock()

n=3000
p=0.3

Gnx, wedges=make_graph(n, p)
src=sample(Gnx.nodes(),1)[0]
nodes=Gnx.nodes()
gtime=(clock()-debut)
print "Number of nodes:", len(Gnx)
print "Number of edges:", len(Gnx.edges())
print("Graph generation: %.2fs" %gtime)

print("------------  Boost --------------------")

G = Graph()
G.weighted(True)

debut=clock()
dist_boost = G.shortest_path_lengths(src,by_weight = True, algorithm='Dijkstra_Boost')

boost=(clock()-debut)
print("Boost: %.2fs" %boost)

print("------------  Pure Python --------------------")
debut=clock()
dist_pp=dijkstra(nodes, wedges, n, src)
py_time=(clock()-debut)
print("Pure Python: %.2fs" %py_time)

print"----------"
print "Checking:", dist_boost==dist_pp


Output :

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1350005
Graph generation: 11.76s
------------  Boost --------------------
Boost: 1.61s
------------  Pure Python --------------------
Pure Python: 1.48s
----------
Checking: True


EDIT

Now, the tested functions have the same interface:

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
G = Graph()
for (u, v, _) in graphs.RandomGNP(n, p).edges()])
G.weighted(True)

return G

def dijkstra(G, src):
adj = [[] for _ in range(len(G))]

for i, j, w in G.edges():

processed = [0] * n
processed[src] = 1
distances = [(0, src)]
INF = float("inf")
dist = [INF] * n
dist[src] = 0
while distances:
d, u = heappop(distances)
if processed[u] == 1:
processed[u] = 2

dd = d + w
if processed[v] == 0 or dd < dist[v]:
heappush(distances, (dd, v))
dist[v] = dd
if processed[v] == 0:
processed[v] = 1
return {i: dist[i] for i in G if dist[i] != INF}

print("------------  Generating Graph --------------------")
debut = clock()

n = 3000
p = 0.3

G = make_graph(n, p)
src = randrange(n)
nodes = G.vertices()
gtime = (clock() - debut)
print "Number of nodes:", len(G)
print "Number of edges:", len(G.edges())
print("Graph generation: %.2fs" % gtime)

print("------------  Sage --------------------")

debut = clock()
dist_sage = G.shortest_path_lengths(
src, by_weight=True)
default_time = (clock() - debut)
print("Default: %.2fs" % default_time)

print("------------  Boost --------------------")

debut = clock()
dist_boost = G.shortest_path_lengths(
src, by_weight=True, algorithm='Dijkstra_Boost')
boost_time = (clock() - debut)
print("Boost: %.2fs" % boost_time)

print("------------  Pure Python --------------------")
debut = clock()
dist_pp = dijkstra(G, src)
py_time = (clock() - debut)
print("Pure Python: %.2fs" % py_time)

print "----------"
print "Checking:", dist_sage == dist_boost == dist_pp

print "Time ratio: %.2f" %(py_time/boost_time)


output:

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1349390
Graph generation: 7.74s
------------  Sage --------------------
Default: 2.23s
------------  Boost --------------------
Boost: 1.53s
------------  Pure Python --------------------
Pure Python: 2.54s
----------
Checking: True
Time ratio: 1.66


By the way, the docs explain about the default implementation:

(default): Sage chooses the best algorithm: 'BFS' if by_weight is False, 'Dijkstra_Boost' if all weights are positive, 'Bellman-Ford_Boost' otherwise.

### Boost implementation of Dijkstra's algorithm

I have benchmarked Dijkstra's algorithm using the Boost Graph library interface provided by SageMath against a basic and academic one but written in pure Python. And the later performs always better. This is very surprising since Boost Graph library is a templated C++ library. Perphaps I'using it in the wrong way. Here is the code :

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
import networkx as nx
G=nx.erdos_renyi_graph(n, p)

G=nx.relabel_nodes(G, lambda k:'v'+str(k))
edges=list(G.edges())
wedges=[]
for u, v in edges:
w=float(randrange(1, 10))
G[u][v]["weight"]=w
wedges.append((u, v, w))

return G, wedges

"From weighted wedges to weighted adjacency list"
n=len(nodes)
node2int={node:i for (i,node) in enumerate(nodes)}
for a, b,w in wedges:
i=node2int[a]
j=node2int[b]

"""
vertices indexed fropm 0 to n-1
Returns shortest path list from src to all vertices"""

processed=[0]*n
processed[src]=1
distances=[(0,src)]
INF=float("inf")
dist=[INF]*n
dist[src]=0
while distances:
d, u=heappop(distances)
if processed[u]==1:
processed[u]=2

dd=d+w
if processed[v]==0 or dd<dist[v]:
heappush(distances, (dd,v))
dist[v]=dd
if processed[v]==0:
processed[v]=1
return dist

def dijkstra(nodes, wedges, n, src):
int2node={i:node for (node,i) in node2int.items()}
src_int=node2int[src]
return {int2node[i]:d for (i, d) in enumerate(dijkstra_int(adj, src_int)) if d!=float("inf")}

print("------------  Generating Graph --------------------")
debut=clock()

n=3000
p=0.3

Gnx, wedges=make_graph(n, p)
src=sample(Gnx.nodes(),1)[0]
nodes=Gnx.nodes()
gtime=(clock()-debut)
print "Number of nodes:", len(Gnx)
print "Number of edges:", len(Gnx.edges())
print("Graph generation: %.2fs" %gtime)

print("------------  Boost --------------------")

G = Graph()
G.weighted(True)

debut=clock()
dist_boost = G.shortest_path_lengths(src,by_weight = True, algorithm='Dijkstra_Boost')

boost=(clock()-debut)
print("Boost: %.2fs" %boost)

print("------------  Pure Python --------------------")
debut=clock()
dist_pp=dijkstra(nodes, wedges, n, src)
py_time=(clock()-debut)
print("Pure Python: %.2fs" %py_time)

print"----------"
print "Checking:", dist_boost==dist_pp


Output :

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1350005
Graph generation: 11.76s
------------  Boost --------------------
Boost: 1.61s
------------  Pure Python --------------------
Pure Python: 1.48s
----------
Checking: True


EDIT

Now, the tested functions have the same interface:

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
G = Graph()
for (u, v, _) in graphs.RandomGNP(n, p).edges()])
G.weighted(True)

return G

def dijkstra(G, src):
adj = [[] for _ in range(len(G))]

for i, j, w in G.edges():

processed = [0] * n
processed[src] = 1
distances = [(0, src)]
INF = float("inf")
dist = [INF] * n
dist[src] = 0
while distances:
d, u = heappop(distances)
if processed[u] == 1:
processed[u] = 2

dd = d + w
if processed[v] == 0 or dd < dist[v]:
heappush(distances, (dd, v))
dist[v] = dd
if processed[v] == 0:
processed[v] = 1
return {i: dist[i] for i in G if dist[i] != INF}

print("------------  Generating Graph --------------------")
debut = clock()

n = 3000
p = 0.3

G = make_graph(n, p)
src = randrange(n)
nodes = G.vertices()
gtime = (clock() - debut)
print "Number of nodes:", len(G)
print "Number of edges:", len(G.edges())
print("Graph generation: %.2fs" % gtime)

print("------------  Sage --------------------")

debut = clock()
dist_sage = G.shortest_path_lengths(
src, by_weight=True)
default_time = (clock() - debut)
print("Default: %.2fs" % default_time)

print("------------  Boost --------------------")

debut = clock()
dist_boost = G.shortest_path_lengths(
src, by_weight=True, algorithm='Dijkstra_Boost')
boost_time = (clock() - debut)
print("Boost: %.2fs" % boost_time)

print("------------  Pure Python --------------------")
debut = clock()
dist_pp = dijkstra(G, src)
py_time = (clock() - debut)
print("Pure Python: %.2fs" % py_time)

print "----------"
print "Checking:", dist_sage == dist_boost == dist_pp

print "Time ratio: %.2f" %(py_time/boost_time)


output:

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1349390
Graph generation: 7.74s
------------  Sage --------------------
Default: 2.23s
------------  Boost --------------------
Boost: 1.53s
------------  Pure Python --------------------
Pure Python: 2.54s
----------
Checking: True
Time ratio: 1.66


By the way, the docs explain about the default implementation:

(default): Sage chooses the best algorithm: 'BFS' if by_weight is False, 'Dijkstra_Boost' if all weights are positive, 'Bellman-Ford_Boost' otherwise.

This is not consistent with the timing above.

### Boost implementation of Dijkstra's algorithm

I have benchmarked Dijkstra's algorithm using the Boost Graph library interface provided by SageMath against a basic and academic one but written in pure Python. And the later performs always better. This is very surprising since Boost Graph library is a templated C++ library. Perphaps I'using it in the wrong way. Here is the code :

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
import networkx as nx
G=nx.erdos_renyi_graph(n, p)

G=nx.relabel_nodes(G, lambda k:'v'+str(k))
edges=list(G.edges())
wedges=[]
for u, v in edges:
w=float(randrange(1, 10))
G[u][v]["weight"]=w
wedges.append((u, v, w))

return G, wedges

"From weighted wedges to weighted adjacency list"
n=len(nodes)
node2int={node:i for (i,node) in enumerate(nodes)}
for a, b,w in wedges:
i=node2int[a]
j=node2int[b]

"""
vertices indexed fropm 0 to n-1
Returns shortest path list from src to all vertices"""

processed=[0]*n
processed[src]=1
distances=[(0,src)]
INF=float("inf")
dist=[INF]*n
dist[src]=0
while distances:
d, u=heappop(distances)
if processed[u]==1:
processed[u]=2

dd=d+w
if processed[v]==0 or dd<dist[v]:
heappush(distances, (dd,v))
dist[v]=dd
if processed[v]==0:
processed[v]=1
return dist

def dijkstra(nodes, wedges, n, src):
int2node={i:node for (node,i) in node2int.items()}
src_int=node2int[src]
return {int2node[i]:d for (i, d) in enumerate(dijkstra_int(adj, src_int)) if d!=float("inf")}

print("------------  Generating Graph --------------------")
debut=clock()

n=3000
p=0.3

Gnx, wedges=make_graph(n, p)
src=sample(Gnx.nodes(),1)[0]
nodes=Gnx.nodes()
gtime=(clock()-debut)
print "Number of nodes:", len(Gnx)
print "Number of edges:", len(Gnx.edges())
print("Graph generation: %.2fs" %gtime)

print("------------  Boost --------------------")

G = Graph()
G.weighted(True)

debut=clock()
dist_boost = G.shortest_path_lengths(src,by_weight = True, algorithm='Dijkstra_Boost')

boost=(clock()-debut)
print("Boost: %.2fs" %boost)

print("------------  Pure Python --------------------")
debut=clock()
dist_pp=dijkstra(nodes, wedges, n, src)
py_time=(clock()-debut)
print("Pure Python: %.2fs" %py_time)

print"----------"
print "Checking:", dist_boost==dist_pp


Output :

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1350005
Graph generation: 11.76s
------------  Boost --------------------
Boost: 1.61s
------------  Pure Python --------------------
Pure Python: 1.48s
----------
Checking: True


EDIT

Now, the tested functions have the same interface:

from time import clock
from heapq import heappush, heappop

def make_graph(n, p):
G = Graph()
for (u, v, _) in graphs.RandomGNP(n, p).edges()])
G.weighted(True)

return G

def dijkstra(G, src):
adj = [[] for _ in range(len(G))]

for i, j, w in G.edges():

processed = [0] * n
processed[src] = 1
distances = [(0, src)]
INF = float("inf")
dist = [INF] * n
dist[src] = 0
while distances:
d, u = heappop(distances)
if processed[u] == 1:
processed[u] = 2

dd = d + w
if processed[v] == 0 or dd < dist[v]:
heappush(distances, (dd, v))
dist[v] = dd
if processed[v] == 0:
processed[v] = 1
return {i: dist[i] for i in G if dist[i] != INF}

print("------------  Generating Graph --------------------")
debut = clock()

n = 3000
p = 0.3

G = make_graph(n, p)
src = randrange(n)
nodes = G.vertices()
gtime = (clock() - debut)
print "Number of nodes:", len(G)
print "Number of edges:", len(G.edges())
print("Graph generation: %.2fs" % gtime)

print("------------  Sage --------------------")

debut = clock()
dist_sage = G.shortest_path_lengths(
src, by_weight=True)
default_time = (clock() - debut)
print("Default: %.2fs" % default_time)

print("------------  Boost --------------------")

debut = clock()
dist_boost = G.shortest_path_lengths(
src, by_weight=True, algorithm='Dijkstra_Boost')
boost_time = (clock() - debut)
print("Boost: %.2fs" % boost_time)

print("------------  Pure Python --------------------")
debut = clock()
dist_pp = dijkstra(G, src)
py_time = (clock() - debut)
print("Pure Python: %.2fs" % py_time)

print "----------"
print "Checking:", dist_sage == dist_boost == dist_pp

print "Time ratio: %.2f" %(py_time/boost_time)


output:

------------  Generating Graph --------------------
Number of nodes: 3000
Number of edges: 1349390
Graph generation: 7.74s
------------  Sage --------------------
Default: 2.23s
------------  Boost --------------------
Boost: 1.53s
------------  Pure Python --------------------
Pure Python: 2.54s
----------
Checking: True
Time ratio: 1.66


By the way, the docs explain about the default implementation:

(default): Sage chooses the best algorithm: 'BFS' if by_weight is False, 'Dijkstra_Boost' if all weights are positive, 'Bellman-Ford_Boost' otherwise.

This is not consistent with the timing above.above. [As explained by David Coudert in the comments, the difference is due to sign checking]