|
1 |
| -#include "functions.cuh" |
2 |
| -#include "hardware/devices/nvidia.h" |
| 1 | +#include "hardware/devices/nvidia.h" |
3 | 2 | #include "hardware/mem_pool.h"
|
4 |
| -#include "memory.cuh" |
| 3 | + |
| 4 | +#ifdef USE_CUDA |
| 5 | +#include "memory.hh" |
| 6 | +#include <cuda_runtime.h> |
| 7 | + |
| 8 | +#define CUDA_ASSERT(STATUS) \ |
| 9 | + if (auto status = (STATUS); status != cudaSuccess) { \ |
| 10 | + RUNTIME_ERROR(fmt::format("cuda failed on \"" #STATUS "\" with \"{}\" ({})", \ |
| 11 | + cudaGetErrorString(status), (int) status)); \ |
| 12 | + } |
| 13 | +#endif |
5 | 14 |
|
6 | 15 | namespace refactor::hardware {
|
7 | 16 |
|
8 | 17 | static Arc<Memory> cudaMemory(int32_t card) {
|
9 | 18 | #ifdef USE_CUDA
|
10 |
| - ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card); |
11 |
| - setDevice(card); |
12 |
| - auto [free, total] = getMemInfo(); |
| 19 | + int deviceCount; |
| 20 | + CUDA_ASSERT(cudaGetDeviceCount(&deviceCount)); |
| 21 | + ASSERT(0 <= card && card < deviceCount, "Invalid card id: {}", card); |
| 22 | + CUDA_ASSERT(cudaSetDevice(card)); |
| 23 | + |
| 24 | + size_t free, total; |
| 25 | + CUDA_ASSERT(cudaMemGetInfo(&free, &total)); |
13 | 26 | auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
|
14 |
| - fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}", |
15 |
| - card, free, total, size); |
| 27 | + cudaDeviceProp prop; |
| 28 | + CUDA_ASSERT(cudaGetDeviceProperties(&prop, 0)); |
| 29 | + size_t alignment = prop.textureAlignment; |
| 30 | + fmt::println("initializing Nvidia GPU {}, memory {} / {}, alloc {}, alignment {}", |
| 31 | + card, free, total, size, alignment); |
16 | 32 | return std::make_shared<MemPool>(
|
17 | 33 | std::make_shared<NvidiaMemory>(),
|
18 | 34 | size,
|
19 |
| - 256ul); |
| 35 | + alignment); |
20 | 36 | #else
|
21 | 37 | return nullptr;
|
22 | 38 | #endif
|
23 | 39 | }
|
24 | 40 |
|
25 | 41 | Nvidia::Nvidia(int32_t card) : Device(card, cudaMemory(card)) {}
|
26 | 42 |
|
27 |
| - void Nvidia::setContext() const noexcept { |
28 |
| - setDevice(_card); |
| 43 | + void Nvidia::setContext() const { |
| 44 | +#ifdef USE_CUDA |
| 45 | + CUDA_ASSERT(cudaSetDevice(_card)); |
| 46 | +#endif |
29 | 47 | }
|
30 | 48 |
|
31 | 49 | }// namespace refactor::hardware
|
0 commit comments