这是一个使用的函数Pool
。您可以传递文本和keyword_list,它就会起作用。你可以使用Pool.starmap
传递元组(text, keyword)
,但是你需要处理一个有 10k 个引用的迭代text
.
from functools import partial
from multiprocessing import Pool
def search_worker(text, keyword):
return int(keyword in text)
def parallel_search_text(text, keyword_list):
processes = 4
chunk_size = 10
total = 0
func = partial(search_worker, text)
with Pool(processes=processes) as pool:
for result in pool.imap_unordered(func, keyword_list, chunksize=chunk_size):
total += result
return total
if __name__ == '__main__':
texts = [] # a list of texts
keywords = [] # a list of keywords
for text in texts:
print(parallel_search_text(text, keywords))
创建工作人员池会产生开销。可能值得针对简单的单进程文本搜索功能进行测试。可以通过创建一个实例来加快重复调用的速度Pool
并将其传递到函数中。
def parallel_search_text2(text, keyword_list, pool):
chunk_size = 10
results = 0
func = partial(search_worker, text)
for result in pool.imap_unordered(func, keyword_list, chunksize=chunk_size):
results += result
return results
if __name__ == '__main__':
pool = Pool(processes=4)
texts = [] # a list of texts
keywords = [] # a list of keywords
for text in texts:
print(parallel_search_text2(text, keywords, pool))