cuda内存分配真是乱啊,这次总结一下数组的分配。
概述:数组分配可以通过cudaMallocArray()和cudaMalloc3DArray()
1、cudaMallocArray()
cudaError_t cudaMallocArray ( struct cudaArray ** array,
const struct cudaChannelFormatDesc * desc,
size_t width,
size_t height = 0,
unsigned int flags = 0
)
例:2DArray分配
1 cudaArray *d_volumeArray;
2 cudaChannelFormatDesc desc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat);
3 cudaMallocArray(&d_volumeArray, &desc, 128, 128);
4 cudaMemcpyToArray(d_volumeArray, 0, 0, h_volume, sizeof(float)*128*128, cudaMemcpyDeviceToDevice);
2、cudaMalloc3DArray()
1 cudaError_t cudaMalloc3DArray (
2 struct cudaArray ** array,
3 const struct cudaChannelFormatDesc * desc,
4 struct cudaExtent extent,
5 unsigned int flags = 0
6 )
例:
1 cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<uchar>();
2 cudaArray *d_volumeArray;
3 cudaMalloc3DArray(&d_volumeArray, &channelDesc, volumSize);