如何创建单个数据库连接并让每个进程与其通信,以最大限度地减少每次迭代生成新连接的开销?
这是一些示例代码来说明我想要做什么:
import multiprocessing
import os.path
import hashlib
import sys
VALID_EXTENSIONS = ('.JPG', '.GIF', '.JPEG')
MAX_FILE_SZ = 1000000
#Declare a global mysql connection
db = MySQLdb.connect(host="localhost",
user=config.mysql_user,
passwd=config.mysql_pass,
db=config.mysql_db)
def md5_file(fname):
try:
with open(fname) as fo:
m = hashlib.md5()
chunk_sz = m.block_size * 128
data = fo.read(chunk_sz)
while data:
m.update(data)
data = fo.read(chunk_sz)
md5_hash = m.hexdigest()
md5_file.queue.put((fname, md5_hash))
#DATABASE LOGIC
cursor = db.cursor()
cursor.execute("""INSERT INTO ...""")
except IOError:
md5_file.queue.put((fname, None))
def is_valid_file(fname):
ext = os.path.splitext(fname)[1].upper()
fsz = os.path.getsize(fname)
return ext in VALID_EXTENSIONS and fsz <= MAX_FILE_SZ
def init(queue):
md5_file.queue = queue
def main():
# Holds tuple (fname, md5sum) / md5sum will be none if an IOError occurs
queue = multiprocessing.Queue()
pool = multiprocessing.Pool(None, init, [queue])
for dirpath, dirnames, filenames in os.walk(sys.argv[1]):
# Convert filenames to full paths...
full_path_fnames = map(lambda fn: os.path.join(dirpath, fn),
filenames)
full_path_fnames = filter(is_valid_file, full_path_fnames)
pool.map(md5_file, full_path_fnames)
# Dump the queue
while not queue.empty():
print queue.get()
return 0
if __name__ == '__main__':
sys.exit(main())
None
本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有涉嫌抄袭侵权的内容,请联系:hwhale#tublm.com(使用前将#替换为@)