Thrust 对内存管理没有什么神奇的作用,默认的分配器只是cudaMalloc
,您所看到的是正在工作的驱动程序内存管理器页面大小选择算法。这没有记录,也没有迹象表明平台和硬件版本之间的行为是一致的。
也就是说,如果我将您的代码扩展为更有用的东西:
#include <iostream>
#include <vector>
#include <thrust/system_error.h>
#include <thrust/device_vector.h>
#include <thrust/execution_policy.h>
void report_mem(size_t allocd, bool first=false)
{
size_t freeMem, totalMem;
cudaMemGetInfo(&freeMem, &totalMem);
if (first)
std::cout << "Allocated | Total Memory | Free Memory "<< std::endl;
std::cout << allocd << ", " << totalMem << ", " << freeMem << std::endl;
}
int main()
{
cudaSetDevice(0);
report_mem(0, true);
std::vector<size_t> asizes;
const int nallocs = 10;
for(int i=0; i < nallocs; i++) asizes.push_back(1<<14);
for(int i=0; i < nallocs; i++) asizes.push_back(1<<16);
for(int i=0; i < nallocs; i++) asizes.push_back(1<<18);
for(int i=0; i < nallocs; i++) asizes.push_back(1<<20);
for(int i=0; i < nallocs; i++) asizes.push_back(1<<22);
typedef thrust::device_vector<float> dvecf_t;
std::vector<dvecf_t*> allocs;
auto it = asizes.begin();
for(; it != asizes.end(); ++it) {
dvecf_t* v = new dvecf_t(*it);
allocs.push_back(v);
report_mem(v->capacity() * sizeof(float));
}
return 0;
}
并在 Windows 64 位的计算 2.1 设备上运行它,我得到:
Allocated | Total Memory | Free Memory
0, 1073741824, 1007849472
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
65536, 1073741824, 1006800896
262144, 1073741824, 1005752320
262144, 1073741824, 1005752320
262144, 1073741824, 1005752320
262144, 1073741824, 1005752320
262144, 1073741824, 1004703744
262144, 1073741824, 1004703744
262144, 1073741824, 1004703744
262144, 1073741824, 1004703744
262144, 1073741824, 1003655168
262144, 1073741824, 1003655168
1048576, 1073741824, 1002606592
1048576, 1073741824, 1001558016
1048576, 1073741824, 1000509440
1048576, 1073741824, 999460864
1048576, 1073741824, 998412288
1048576, 1073741824, 997363712
1048576, 1073741824, 996315136
1048576, 1073741824, 995266560
1048576, 1073741824, 994217984
1048576, 1073741824, 993169408
4194304, 1073741824, 988975104
4194304, 1073741824, 984780800
4194304, 1073741824, 980586496
4194304, 1073741824, 976392192
4194304, 1073741824, 972197888
4194304, 1073741824, 968003584
4194304, 1073741824, 963809280
4194304, 1073741824, 959614976
4194304, 1073741824, 955420672
4194304, 1073741824, 951226368
16777216, 1073741824, 934449152
16777216, 1073741824, 917671936
16777216, 1073741824, 900894720
16777216, 1073741824, 884117504
16777216, 1073741824, 867340288
16777216, 1073741824, 850563072
16777216, 1073741824, 833785856
16777216, 1073741824, 817008640
16777216, 1073741824, 800231424
我将其解释为表明在我测试的平台上分配粒度为 1MiB(1048576 或 2^20 字节)。您的平台可能有所不同。