# How to parallelize a function?

I asked a question about finding all non-crossing tuples which have the same content as a given tableau. The following codes in one answer works well:

from numpy import array
from collections import Counter
import itertools
import multiprocessing
from sage.parallel.multiprocessing_sage import Pool

def NonCrossingTupleWithSameContentAsT(T):
"""
Iterator over the non crossing tuples with same content as T.
"""
if not T or not T[0]:
return []

k = len(T[0])
m = len(T)  # number of columns of T
content = sorted(flatten(T))

# Unfortunately, we need to record yielded solutions to avoid duplicates
#seen = set()
r=[]
for c in candidates(Counter(content), k, m):
if IsNonCrossingList(c):
r.append(tuple(sorted(c)))
#             if t not in seen:
#                 yield t
r=removeDuplicatesListOfLists(r)
return r

def candidates(content, k, m):
"""
Iterator over the m-tuples of k elements from content.

With this implementation, elements are not repeated in a same tuple.

INPUT:

- content -- a Counter associated to each possible element it's
multiplicity

- k -- number of elements per tuple

- m -- requested number of tuples
"""
if len(content) < k:
return
for I in itertools.combinations(content, k):
# We ensure that elements inside a k-tuple are sorted
T = (tuple(sorted(I)), )
if m == 1:
yield T
else:
for L in candidates(content - Counter(I), k, m - 1):
yield T + L

def IsNonCrossing(I,J): # I,J have the same size
k = len(I)

for a in range(k - 1):
for b in range(a + 1, k):
if (I[a+1:b] == J[a+1:b] and
not IsWeaklySeparated(I[a:b+1], J[a:b+1])):
return False

return True

def IsNonCrossingList(L):
return all(IsNonCrossing(I, J) for I, J in itertools.combinations(L, 2))

def IsWeaklySeparated(I,J): # I,J have the same size
r1 = sorted(set(I).difference(J))
r2 = sorted(set(J).difference(I))

if not r1 or not r2:
return True

if min(r1) >= min(r2):
r1, r2 = r2, r1

# Search for an index i such that list r1[:i+1] + r2 + r1[i+1:] is sorted
for i in range(len(r1) - 1):
if r2[0] < r1[i+1]:
# We check if list r1[:i+1] + r2 + r1[i+1:] is ordered
return r2[-1] <= r1[i+1]

# It remains to check if list r1 + r2 is ordered
return r1[-1] <= r2[0]

def removeDuplicatesListOfLists(l): # very fast
l.sort()
r=list(l for l,_ in itertools.groupby(l))

return r


I think that the functions candidates and NonCrossingTupleWithSameContentAsT takes most of the time of the program. So I would like to parallelize them. I tried to parallelize NonCrossingTupleWithSameContentAsT as follows:

def NonCrossingTupleWithSameContentAsTParallelMiddleStep(c):
r=[]
if IsNonCrossingList(c):
r.append(tuple(sorted(c)))
return r

def NonCrossingTupleWithSameContentAsTParallel(T):
"""
Iterator over the non crossing tuples with same content as T.
"""
if not T or not T[0]:
return []

k = len(T[0])
m = len(T)  # number of columns of T
content = sorted(flatten(T))

# Unfortunately, we need to record yielded solutions to avoid duplicates
#seen = set()

print(content)

N_CPU = multiprocessing.cpu_count()
with Pool(processes=N_CPU) as p: #...map below action to as many cores as available
bb = p.starmap(NonCrossingTupleWithSameContentAsTParallelMiddleStep,[(c) for c in candidates(Counter(content), k, m)])

r=[]
for i in bb:
r=r+i

r=removeDuplicatesListOfLists(r)
return r


But it has some error when I run the following

r1=[[1,2,4],[3,5,6]]
r2=NonCrossingTupleWithSameContentAsTParallel(r1)
r2


TypeError: NonCrossingTupleWithSameContentAsTParallelMiddleStep() takes 1 positional argument but 2 were given

Do you know how to solve this problem? Thank you very much!

The following example takes about 1 hour.

r1=[[1, 3, 6], [1, 3, 7], [2, 4, 7], [2, 4, 9], [5, 8, 9], [5, 8, 9]]
r2=NonCrossingTupleWithSameContentAsT(r1)
r2


Is there some way to parallelize it such that it takes a few minutes or less on an HPC? Thank you very much!

edit retag close merge delete

Sort by ยป oldest newest most voted

A tuple composed of 1 element should be (c,) not (c).

more