python Pool with worker Processes

Since python 3.3 you can use starmap, also for using multiple arguments AND getting back the results in a very simplistic syntax:

import multiprocessing

nb_cores = multiprocessing.cpu_count()

def caps(nb, letter):
    print('Exec nb:', nb)
    return letter.upper()

if __name__ == '__main__':

    multiprocessing.freeze_support() # for Windows, also requires to be in the statement: if __name__ == '__main__'

    input_data = ['a','b','c','d','e','f','g','h']
    input_order = [1,2,3,4,5,6,7,8,9]

    with multiprocessing.Pool(processes=nb_cores) as pool: # auto closing workers
        results = pool.starmap(caps, zip(input_order, input_data))

    print(results)

I would suggest that you use a Queue for this.

class Worker(Process):
    def __init__(self, queue):
        super(Worker, self).__init__()
        self.queue = queue

    def run(self):
        print('Worker started')
        # do some initialization here

        print('Computing things!')
        for data in iter(self.queue.get, None):
            # Use data

Now you can start a pile of these, all getting work from a single queue

request_queue = Queue()
for i in range(4):
    Worker(request_queue).start()
for data in the_real_source:
    request_queue.put(data)
# Sentinel objects to allow clean shutdown: 1 per worker.
for i in range(4):
    request_queue.put(None) 

That kind of thing should allow you to amortize the expensive startup cost across multiple workers.


initializer expects an arbitrary callable that does initilization e.g., it can set some globals, not a Process subclass; map accepts an arbitrary iterable:

#!/usr/bin/env python
import multiprocessing as mp

def init(val):
    print('do some initialization here')

def compute(data):
    print('Computing things!')
    return data * data

def produce_data():
    yield -100
    for i in range(10):
        yield i
    yield 100

if __name__=="__main__":
  p = mp.Pool(initializer=init, initargs=('arg',))
  print(p.map(compute, produce_data()))