1.pycuda._driver.LogicError: cuDeviceGet failed: initialization error报错怎么办?
Traceback (most recent call last):
File “/usr/lib/python3.6/multiprocessing/process.py”, line 258, in _bootstrap
self.run()
File “/usr/lib/python3.6/multiprocessing/process.py”, line 93, in run
self._target(*self._args, **self._kwargs)
File “a.py”, line 15, in dott
ctx = cuda.Device(0).make_context()
pycuda._driver.LogicError: cuDeviceGet failed: initialization error
答:cuDeviceGet失败:初始化错误。显卡初始化失败。
把显卡初始化的代码写到函数内部即可。或者用这句代码试试
mp.set_start_method('spawn')
这行代码执行完,意味着原来的进程和子进程已经隔离开.
2.上面提到隔离开,那父进程和子进程怎么通信?
答:三种方法,1.消息队列。2.pipe管道。3.数据共享
#进程queue传递
from multiprocessing import Process,Queue
def f(qq):
qq.put([42, None, 'hello'])
if __name__ == '__main__':
q = Queue() #父进程queue
p = Process(target=f,args=(q,)) #子进程 父进程的queue传给子进程
#实现数据传递
p.start() #启动子进程
print(q.get()) # prints "[42, None, 'hello']" 父进程q可以get到子进程p 实现父进程子进程数据共享
p.join()
#线程queue不能传给进程
#管道传递
from multiprocessing import Process, Pipe
def f(conn):
conn.send([42, None, 'hello from child'])
conn.send([43,None,'hello from child2'])
print("child recv:",conn.recv())
conn.close()
if __name__ == '__main__':
parent_conn, child_conn = Pipe() #生成管道实例,取出两端
p = Process(target=f, args=(child_conn,))
p.start()
print(parent_conn.recv())# prints "[42, None, 'hello']"
print(parent_conn.recv())
parent_conn.send("hello send by parent")
p.join()
manager数据共享
from multiprocessing import Process, Manager
import os
def f(d, l):#每个子进程执行的函数
d[os.getpid()]=os.getpid()
l.append(os.getpid())
print(l)
if __name__ == '__main__':
with Manager() as manager:#
d = manager.dict() #父进程生成一个字典 多个进程之间可共享的字典
l = manager.list(range(5)) #父进程生成一个列表 多个进程之间可以共享的列表 默认有5个数据
p_list = [] #用于多个进程join
for i in range(10):
p = Process(target=f, args=(d, l))
p.start() #开启10个子进程
p_list.append(p)
for res in p_list: # 等待每个子进程结果
res.join()
print(d)
print(l)
代码如下
# import pycuda.autoinit
import pycuda.driver as cuda
import numpy
import multiprocessing as mp
from pycuda.compiler import SourceModule
# export PATH=/usr/local/cuda/bin:$PATH
def dott(array_a, array_b):
cuda.init()
ctx = cuda.Device(0).make_context()
mod = SourceModule("""
__global__ void dot(float *dest, float *a, float *b)
{
const int i = threadIdx.x;
dest[i] = a[i] * b[i];
}
""")
dot = mod.get_function("dot")
dest = numpy.zeros_like(array_a)
dot(cuda.Out(dest), cuda.In(array_a), cuda.In(array_b),block=(len(array_a),1,1), grid=(1,1))
print(dest)
print("运行成功!")
ctx.pop()
return dest
if __name__ == "__main__":
a = numpy.random.normal(size=40).astype(numpy.float32)
b = numpy.random.normal(size=40).astype(numpy.float32)
mp.set_start_method('spawn')
ps = []
for i in range(2):
p = mp.Process(target=dott, args=((a,b)))
ps.append(p)
p.start()
for p in ps:
p.join()