|
1 | 1 | #include "functions.cuh"
|
2 | 2 | #include "hardware/devices/nvidia.h"
|
3 | 3 | #include "hardware/mem_pool.h"
|
4 |
| -#include "memory.cuh" |
| 4 | + |
| 5 | +#ifdef USE_CUDA |
| 6 | +#include "memory.hh" |
| 7 | +#include <cuda_runtime.h> |
| 8 | + |
| 9 | +#define CUDA_ASSERT(STATUS) \ |
| 10 | + if (auto status = (STATUS); status != cudaSuccess) { \ |
| 11 | + RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \ |
| 12 | + cudaGetErrorString(status), (int) status)); \ |
| 13 | + } |
| 14 | +#endif |
5 | 15 |
|
6 | 16 | namespace refactor::hardware {
|
7 | 17 |
|
8 | 18 | static Arc<Memory> cudaMemory(int32_t card) {
|
9 | 19 | #ifdef USE_CUDA
|
10 |
| - ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); |
11 |
| - setDevice(card); |
12 |
| - auto [free, total] = getMemInfo(); |
| 20 | + int deviceCount; |
| 21 | + CUDA_ASSERT(cudaGetDeviceCount(&deviceCount)); |
| 22 | + ASSERT(0 <= card && card < deviceCount, "Invalid card id: {}", card); |
| 23 | + CUDA_ASSERT(cudaSetDevice(card)); |
| 24 | + |
| 25 | + size_t free, total; |
| 26 | + CUDA_ASSERT(cudaMemGetInfo(&free, &total)); |
13 | 27 | auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
|
14 |
| - fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", |
15 |
| - card, free, total, size); |
| 28 | + cudaDeviceProp prop; |
| 29 | + CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0)); |
| 30 | + size_t alignment = prop.textureAlignment; |
| 31 | + fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}", |
| 32 | + card, free, total, size, alignment); |
16 | 33 | return std::make_shared<MemPool>(
|
17 | 34 | std::make_shared<NvidiaMemory>(),
|
18 | 35 | size,
|
19 |
| - 256ul); |
| 36 | + alignment); |
20 | 37 | #else
|
21 | 38 | return nullptr;
|
22 | 39 | #endif
|
23 | 40 | }
|
24 | 41 |
|
25 | 42 | Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {}
|
26 | 43 |
|
27 |
| - void Nvidia::setContext() const noexcept { |
28 |
| - setDevice(_card); |
| 44 | + void Nvidia::setContext() const { |
| 45 | +#ifdef USE_CUDA |
| 46 | + CUDA_ASSERT(cudaSetDevice(_card)); |
| 47 | +#endif |
29 | 48 | }
|
30 | 49 |
|
31 | 50 | }// namespace refactor::hardware
|
0 commit comments