1、MATLAB调用CUDA是通过:MATLAB通过mexFunction编译C,通过nvcc编译cu文件实现的,大致流程为:MATLAB调用C文件,C文件调用cu文件。
2、c调用cu之前,使用system,通过nvcc编译cu文件为.o文件,以提供给cpp文件使用,这样C就能调用cu了。而MATLAB通过mex调用mexFuntion文件,就能编译成MATLAB可调用的mexa64/32文件函数。
3、Linux下的MATLAB调用CUDA举例:
(1)matlab下nvmex函数:
function nvmex(cuFileName)
if ispc % Windows %dips('windows'); Host_Compiler_Location = '-ccbin "C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_amd64"'; CUDA_INC_Location = ['"' getenv('CUDA_PATH') '\include"']; CUDA_SAMPLES_Location =['"' getenv('NVCUDASAMPLES5_5_ROOT') '\common\inc"']; PIC_Option = ''; if ( strcmp(computer('arch'),'win32') ==1) disp('') machine_str = ' --machine 32 '; CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\Win32"']; elseif ( strcmp(computer('arch'),'win64') ==1) machine_str = ' --machine 64 '; CUDA_LIB_Location = ['"' getenv('CUDA_PATH') '\lib\x64"']; end else % Mac and Linux (assuming gcc is on the path) %disp('linux'); CUDA_INC_Location = '/usr/local/cuda/include'; CUDA_SAMPLES_Location = '/usr/local/cuda/samples/common/inc'; Host_Compiler_Location = ' '; PIC_Option = ' --compiler-options -fPIC '; machine_str = []; if ( strcmp(computer('arch'),'glnxa32') ==1) %disp('32'); CUDA_LIB_Location = '/usr/local/cuda/lib'; elseif ( strcmp(computer('arch'),'glnxa64') ==1) % disp('64'); CUDA_LIB_Location = '/usr/local/cuda/lib64'; end end
%以上过程是判断系统是Linux还是Windows,是64位的还是32位的系统;但是无论是哪个系统,相应的cuda路径要选对,否则运行会出错
% !!! End of things to modify !!! [~, filename] = fileparts(cuFileName); nvccCommandLine = [ ... 'nvcc --compile ' Host_Compiler_Location ' ' ... '-o ' filename '.o ' ... machine_str PIC_Option ... ' -I' '"' matlabroot '/extern/include "' ... ' -I' CUDA_INC_Location ' -I' CUDA_SAMPLES_Location ... ' "' cuFileName '" ' ]; mexCommandLine = ['mex ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart']; disp(nvccCommandLine); warning off; status = system(nvccCommandLine); %system编译,编译成功则status >=0,封装失败,则status < 0,编译产生.o文件或者.Obj文件 warning on;
if status < 0 error 'Error invoking nvcc'; end
disp(mexCommandLine); eval(mexCommandLine); %mex执行,编译.o文件(在Windows系统下为.obj文件),并封装为MATLAB可调用的mexa64/32文件函数
%同时如果将CUDA和Cpp文件写开的话,可在mexCommandLine加入CPP文件和.o文件: mexCommandLine= ['mex ' filename '.cpp ' filename '.o' ' -L' CUDA_LIB_Location ' -lcudart']; 同时:filename 也可以是自己随意定义的名字
end
(2)nvmex的调用:
clc;clear;close all
nvmex('Addvector.cu');
A=[1 2 3]; B=[4 5 6]; C = AddVectors(A,B)
(3)cu文件的编译:addVectors.cu
#include "AddVectors.h" #include #include "mex.h" #include __global__ void addVectorsMask(double *devPtrA, double *devPtrB, double *devPtrC, int size) { int i = threadIdx.x ;//+ blockIdx.x * blockDim.x; devPtrC[i] = devPtrA[i] + devPtrB[i]; __syncthreads(); } void addVectors(double *A, double *B, double *C, int size)//定义C和CUDA的接口 { double *devPtrA,*devPtrB,*devPtrC; cudaMalloc(&devPtrA,sizeof(double)* size); cudaMalloc(&devPtrB,sizeof(double)* size); cudaMalloc(&devPtrC,sizeof(double)* size); cudaMemcpy(devPtrA,A, sizeof(double)* size, cudaMemcpyHostToDevice); cudaMemcpy(devPtrB,B, sizeof(double)* size, cudaMemcpyHostToDevice); addVectorsMask<<<1,size>>>(devPtrA,devPtrB, devPtrC, size); cudaMemcpy(C,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost); double *d=(double *)malloc(sizeof(double)* size); cudaMemcpy(d,devPtrC, sizeof(double)* size, cudaMemcpyDeviceToHost); free(d); cudaFree(devPtrA); cudaFree(devPtrB); cudaFree(devPtrC); }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, mxArray *prhs[])//定义C和MATLAB的接口 { // 鍒ゆ柇杈撳叆鍙傛暟涓暟鏄惁婊¤冻鏉′欢 if (nrhs != 2) mexErrMsgTxt("Invaidnumber of input arguments"); if (nlhs != 1) mexErrMsgTxt("Invalidnumber of outputs"); // 鍒ゆ柇杈撳叆鍙傛暟鐨勭被鍨嬫槸鍚︽弧瓒虫潯锟? // if (!mxIsSingle(prhs[0]) !mxIsSingle(prhs[1])) // mexErrMsgTxt("inputvector data type must be single"); // 鑾峰彇杈撳叆鍙傛暟缁村害 // mxGetM:寰楀埌杈撳叆鐭╅樀鐨勮锟? // mxGetN:寰楀埌杈撳叆鐭╅樀鐨勫垪锟? int numRowsA = (int)mxGetM(prhs[0]);//閭d箞prhs[0]鎸囧悜绗竴涓彉锟? int numColsA = (int)mxGetN(prhs[0]); int numRowsB = (int)mxGetM(prhs[1]);//prhs[1]鎸囧悜绗簩涓彉锟? int numColsB = (int)mxGetN(prhs[1]); // 鍒ゆ柇杈撳叆鍙傛暟缁村害鏄惁婊¤冻鏉′欢 if (numRowsA != numRowsB || numColsA != numColsB) mexErrMsgTxt("Invalidsize. The sizes of two vectors must be same"); int minSize = (numRowsA < numColsA) ? numRowsA : numColsA; int maxSize = (numRowsA > numColsA) ? numRowsA : numColsA; int size=numRowsA*numColsA; if (minSize != 1) mexErrMsgTxt("Invalidsize. The vector must be one dimentional"); //mxGetData 鑾峰彇鏁版嵁闃靛垪涓殑鏁版嵁 double *A;// = (float*)mxGetPr(prhs[0]); double *B;// = (float*)mxGetPr(prhs[1]); A=mxGetPr(prhs[0]); B=mxGetPr(prhs[1]); plhs[0]=mxCreateDoubleMatrix(numRowsA,numColsA,mxREAL); //plhs[0]= mxCreateNumericMatrix(numRowsA,numColsB, mxSINGLE_CLASS, mxREAL); // 鑾峰彇杈撳嚭鍙傛暟鐨勬寚锟? double *C ;//= (double*)mxGetData(plhs[0]); // C=mxGetPr(plhs[0]); C=mxGetPr(plhs[0]); addVectors(A, B, C, size);
}