我有以下测试模块(MyMod.jl)来在 Julia 中存储一些测试函数。一些核心函数是串行编写的。其他函数并行调用核心函数。
module MyMod
export Dummy,distribute_data,getfrom,recombine_data,regular_test,parallel_test
function Dummy(icol,model,data,A,B) # Generate data from a model
nz,nx,nh = size(model) # = size(A) = size(B)
for ih = 1:nh
for ix = 1:nx
for iz = 1:nz
data[iz,icol] += A[iz,ix,ih]*B[iz,ix,ih]*model[iz,ix,ih]
end
end
end
end
function distribute_data(X, obj_name_on_worker::Symbol, dim) # Distributes X over workers
size_per_worker = floor(Int,size(X,1) / nworkers())
StartIdx = 1
EndIdx = size_per_worker
for (idx, pid) in enumerate(workers())
if idx == nworkers()
EndIdx = size(X,1)
end
println(StartIdx:EndIdx)
if dim == 3
@spawnat(pid, eval(Main, Expr(:(=), obj_name_on_worker, X[StartIdx:EndIdx,:,:])))
elseif dim == 2
@spawnat(pid, eval(Main, Expr(:(=), obj_name_on_worker, X[StartIdx:EndIdx,:])))
end
StartIdx = EndIdx + 1
EndIdx = EndIdx + size_per_worker - 1
end
end
getfrom(p::Int, nm::Symbol; mod=Main) = fetch(@spawnat(p, getfield(mod, nm)))
function recombine_data(Data::Symbol) # gather data from workers
Results = cell(nworkers())
for (idx, pid) in enumerate(workers())
Results[idx] = getfrom(pid, Data)
end
return vcat(Results...)
end
function regular_test(model,data,A,B)
ncol=size(data,2)
map((arg)->Dummy(arg,model,data,A,B),[icol for icol = 1:ncol])
end
function parallel_test(model,data,A,B)
distribute_data(model, :model, 3)
distribute_data(A, :A, 3)
distribute_data(B, :B, 3)
distribute_data(data, :data, 2)
@everywhere ncol=size(data,2)
@everywhere begin
if myid() != 1
map((arg)->Dummy(arg,model,data,A,B),[icol for icol = 1:ncol])
end
end
P_Data = recombine_data(:data)
return P_Data
end
end
然后我打开 Julia 会话并运行:
addprocs(3)
using MyMod
nx = 250;
nz = 350;
nh = 150;
ncol = 125;
model = rand(nz,nx,nh);
data = SharedArray(Float64,nz,ncol);
A = rand(nz,nx,nh);
B = rand(nz,nx,nh);
@time P_Data = parallel_test(model,data,A,B);
@time regular_test(model,data,A,B);
P_Data == data
The regular_test
按预期运行,但是parallel_test
产生以下错误:
ERROR: On worker 2:
UndefVarError: Dummy not defined
in anonymous at /home/username/Desktop/MyMod.jl:58
in map at ./essentials.jl:153
in anonymous at /home/username/Desktop/MyMod.jl:58
in eval at ./sysimg.jl:14
in anonymous at multi.jl:1378
in anonymous at multi.jl:907
in run_work_thunk at multi.jl:645
[inlined code] from multi.jl:907
in anonymous at task.jl:63
in remotecall_fetch at multi.jl:731
in remotecall_fetch at multi.jl:734
in anonymous at multi.jl:1380
...and 2 other exceptions.
in sync_end at ./task.jl:413
[inlined code] from multi.jl:1389
in parallel_test at /home/username/Desktop/MyMod.jl:51
我需要做什么调整parallel_test
为了防止这个问题?