forked from jeffhammond/stencil-demo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprk_cuda.h
118 lines (104 loc) · 4.56 KB
/
prk_cuda.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#ifndef PRK_CUDA_HPP
#define PRK_CUDA_HPP
#ifndef __NVCC__
#warning Please compile CUDA code with CC=nvcc.
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <cuda_device_runtime_api.h>
#endif
namespace prk
{
namespace CUDA
{
void check(cudaError_t rc)
{
if (rc==cudaSuccess) {
return;
} else {
std::cerr << "PRK CUDA error: " << cudaGetErrorString(rc) << std::endl;
std::abort();
}
}
class info {
private:
int nDevices;
std::vector<cudaDeviceProp> vDevices;
public:
int maxThreadsPerBlock;
std::array<unsigned,3> maxThreadsDim;
std::array<unsigned,3> maxGridSize;
info() {
prk::CUDA::check( cudaGetDeviceCount(&nDevices) );
vDevices.resize(nDevices);
for (int i=0; i<nDevices; ++i) {
cudaGetDeviceProperties(&(vDevices[i]), i);
if (i==0) {
maxThreadsPerBlock = vDevices[i].maxThreadsPerBlock;
for (int j=0; j<3; ++j) {
maxThreadsDim[j] = vDevices[i].maxThreadsDim[j];
maxGridSize[j] = vDevices[i].maxGridSize[j];
}
}
}
}
// do not use cached value as a hedge against weird stuff happening
int num_gpus() {
int g;
prk::CUDA::check( cudaGetDeviceCount(&g) );
return g;
}
int get_gpu() {
int g;
prk::CUDA::check( cudaGetDevice(&g) );
return g;
}
void set_gpu(int g) {
prk::CUDA::check( cudaSetDevice(g) );
}
void print(int n=1000) {
for (int i=0; i<std::min(n,nDevices); ++i) {
std::cout << "device name: " << vDevices[i].name << "\n";
std::cout << "total global memory: " << vDevices[i].totalGlobalMem << "\n";
std::cout << "max threads per block: " << vDevices[i].maxThreadsPerBlock << "\n";
std::cout << "max threads dim: " << vDevices[i].maxThreadsDim[0] << ","
<< vDevices[i].maxThreadsDim[1] << ","
<< vDevices[i].maxThreadsDim[2] << "\n";
std::cout << "max grid size: " << vDevices[i].maxGridSize[0] << ","
<< vDevices[i].maxGridSize[1] << ","
<< vDevices[i].maxGridSize[2] << "\n";
std::cout << "memory clock rate (KHz): " << vDevices[i].memoryClockRate << "\n";
std::cout << "memory bus width (bits): " << vDevices[i].memoryBusWidth << "\n";
}
}
bool checkDims(dim3 dimBlock, dim3 dimGrid) {
if (dimBlock.x > maxThreadsDim[0]) {
std::cout << "dimBlock.x too large" << std::endl;
return false;
}
if (dimBlock.y > maxThreadsDim[1]) {
std::cout << "dimBlock.y too large" << std::endl;
return false;
}
if (dimBlock.z > maxThreadsDim[2]) {
std::cout << "dimBlock.z too large" << std::endl;
return false;
}
if (dimGrid.x > maxGridSize[0]) {
std::cout << "dimGrid.x too large" << std::endl;
return false;
}
if (dimGrid.y > maxGridSize[1]) {
std::cout << "dimGrid.y too large" << std::endl;
return false;
}
if (dimGrid.z > maxGridSize[2]) {
std::cout << "dimGrid.z too large" << std::endl;
return false;
}
return true;
}
};
} // CUDA namespace
} // prk namespace
#endif // PRK_CUDA_HPP