How to parallelize a function?
I asked a question about finding all non-crossing tuples which have the same content as a given tableau. The following codes in one answer works well:
from numpy import array
from collections import Counter
import itertools
import multiprocessing
from sage.parallel.multiprocessing_sage import Pool
def NonCrossingTupleWithSameContentAsT(T):
"""
Iterator over the non crossing tuples with same content as T.
"""
if not T or not T[0]:
return []
k = len(T[0])
m = len(T) # number of columns of T
content = sorted(flatten(T))
# Unfortunately, we need to record yielded solutions to avoid duplicates
#seen = set()
r=[]
for c in candidates(Counter(content), k, m):
if IsNonCrossingList(c):
r.append(tuple(sorted(c)))
# if t not in seen:
# yield t
# seen.add(t)
r=removeDuplicatesListOfLists(r)
return r
def candidates(content, k, m):
"""
Iterator over the m-tuples of k elements from content.
With this implementation, elements are not repeated in a same tuple.
INPUT:
- ``content`` -- a ``Counter`` associated to each possible element it's
multiplicity
- ``k`` -- number of elements per tuple
- ``m`` -- requested number of tuples
"""
if len(content) < k:
return
for I in itertools.combinations(content, k):
# We ensure that elements inside a k-tuple are sorted
T = (tuple(sorted(I)), )
if m == 1:
yield T
else:
for L in candidates(content - Counter(I), k, m - 1):
yield T + L
def IsNonCrossing(I,J): # I,J have the same size
k = len(I)
for a in range(k - 1):
for b in range(a + 1, k):
if (I[a+1:b] == J[a+1:b] and
not IsWeaklySeparated(I[a:b+1], J[a:b+1])):
return False
return True
def IsNonCrossingList(L):
return all(IsNonCrossing(I, J) for I, J in itertools.combinations(L, 2))
def IsWeaklySeparated(I,J): # I,J have the same size
r1 = sorted(set(I).difference(J))
r2 = sorted(set(J).difference(I))
if not r1 or not r2:
return True
if min(r1) >= min(r2):
r1, r2 = r2, r1
# Search for an index i such that list r1[:i+1] + r2 + r1[i+1:] is sorted
for i in range(len(r1) - 1):
if r2[0] < r1[i+1]:
# We check if list r1[:i+1] + r2 + r1[i+1:] is ordered
return r2[-1] <= r1[i+1]
# It remains to check if list r1 + r2 is ordered
return r1[-1] <= r2[0]
def removeDuplicatesListOfLists(l): # very fast
l.sort()
r=list(l for l,_ in itertools.groupby(l))
return r
I think that the functions candidates and NonCrossingTupleWithSameContentAsT takes most of the time of the program. So I would like to parallelize them. I tried to parallelize NonCrossingTupleWithSameContentAsT as follows:
def NonCrossingTupleWithSameContentAsTParallelMiddleStep(c):
r=[]
if IsNonCrossingList(c):
r.append(tuple(sorted(c)))
return r
def NonCrossingTupleWithSameContentAsTParallel(T):
"""
Iterator over the non crossing tuples with same content as T.
"""
if not T or not T[0]:
return []
k = len(T[0])
m = len(T) # number of columns of T
content = sorted(flatten(T))
# Unfortunately, we need to record yielded solutions to avoid duplicates
#seen = set()
print(content)
N_CPU = multiprocessing.cpu_count()
with Pool(processes=N_CPU) as p: #...map below action to as many cores as available
bb = p.starmap(NonCrossingTupleWithSameContentAsTParallelMiddleStep,[(c) for c in candidates(Counter(content), k, m)])
r=[]
for i in bb:
r=r+i
r=removeDuplicatesListOfLists(r)
return r
But it has some error when I run the following
r1=[[1,2,4],[3,5,6]]
r2=NonCrossingTupleWithSameContentAsTParallel(r1)
r2
TypeError: NonCrossingTupleWithSameContentAsTParallelMiddleStep() takes 1 positional argument but 2 were given
Do you know how to solve this problem? Thank you very much!
The following example takes about 1 hour.
r1=[[1, 3, 6], [1, 3, 7], [2, 4, 7], [2, 4, 9], [5, 8, 9], [5, 8, 9]]
r2=NonCrossingTupleWithSameContentAsT(r1)
r2
Is there some way to parallelize it such that it takes a few minutes or less on an HPC? Thank you very much!