From e3e670f1e63b91f2ffa7da4f691237ed45a9b079 Mon Sep 17 00:00:00 2001 From: Kevin Choi Date: Tue, 29 Apr 2025 11:45:37 -0500 Subject: [PATCH 1/4] [HIP] add test builtin-logb-scalbn --- External/HIP/CMakeLists.txt | 1 + External/HIP/builtin-logb-scalbn.hip | 58 +++++++++++++++++++ .../HIP/builtin-logb-scalbn.reference_output | 32 ++++++++++ 3 files changed, 91 insertions(+) create mode 100644 External/HIP/builtin-logb-scalbn.hip create mode 100644 External/HIP/builtin-logb-scalbn.reference_output diff --git a/External/HIP/CMakeLists.txt b/External/HIP/CMakeLists.txt index 11d92fd127..31ed02461f 100644 --- a/External/HIP/CMakeLists.txt +++ b/External/HIP/CMakeLists.txt @@ -22,6 +22,7 @@ macro(create_local_hip_tests VariantSuffix) list(APPEND HIP_LOCAL_TESTS saxpy) list(APPEND HIP_LOCAL_TESTS memmove) list(APPEND HIP_LOCAL_TESTS split-kernel-args) + list(APPEND HIP_LOCAL_TESTS builtin-logb-scalbn) # TODO: Re-enable InOneWeekend after it is fixed #list(APPEND HIP_LOCAL_TESTS InOneWeekend) diff --git a/External/HIP/builtin-logb-scalbn.hip b/External/HIP/builtin-logb-scalbn.hip new file mode 100644 index 0000000000..87b82c7852 --- /dev/null +++ b/External/HIP/builtin-logb-scalbn.hip @@ -0,0 +1,58 @@ + +#define __device__ __attribute__((device)) + +#include +#include +#include + +__global__ void my_kernel(float a) { + printf("%f\n", logbf(16.0f)); + printf("%f\n", logb(16.0)); + printf("%f\n", __builtin_logbf(16.0f)); + printf("%f\n", __builtin_logb(16.0)); + + printf("%f\n", scalbnf(16.0f, 10)); + printf("%f\n", scalbn(16.0, 10)); + printf("%f\n", __builtin_scalbnf(16.0f, 10)); + printf("%f\n", __builtin_scalbn(16.0, 10)); + + printf("%f\n", logbf(a)); + printf("%f\n", logb(a)); + printf("%f\n", __builtin_logbf(a)); + printf("%f\n", __builtin_logb(a)); + + printf("%f\n", scalbnf(a, 10)); + printf("%f\n", scalbn(a, 10)); + printf("%f\n", __builtin_scalbnf(a, 10)); + printf("%f\n", __builtin_scalbn(a, 10)); +} + +void __attribute__((noinline)) test(float a) { + printf("%f\n", logbf(a)); + printf("%f\n", logb(a)); + printf("%f\n", __builtin_logbf(a)); + printf("%f\n", __builtin_logb(a)); + + printf("%f\n", scalbnf(a, 10)); + printf("%f\n", scalbn(a, 10)); + printf("%f\n", __builtin_scalbnf(a, 10)); + printf("%f\n", __builtin_scalbn(a, 10)); +} + +int main(int argc, char **argv) { + my_kernel<<<1,1>>>(16.0f); + + printf("%f\n", logbf(16.0f)); + printf("%f\n", logb(16.0)); + printf("%f\n", __builtin_logbf(16.0f)); + printf("%f\n", __builtin_logb(16.0)); + + printf("%f\n", scalbnf(16.0f, 10)); + printf("%f\n", scalbn(16.0, 10)); + printf("%f\n", __builtin_scalbnf(16.0f, 10)); + printf("%f\n", __builtin_scalbn(16.0, 10)); + + test(16.0f); + return 0; +} + diff --git a/External/HIP/builtin-logb-scalbn.reference_output b/External/HIP/builtin-logb-scalbn.reference_output new file mode 100644 index 0000000000..af4d6fd479 --- /dev/null +++ b/External/HIP/builtin-logb-scalbn.reference_output @@ -0,0 +1,32 @@ +4.000000 +4.000000 +4.000000 +4.000000 +16384.000000 +16384.000000 +16384.000000 +16384.000000 +4.000000 +4.000000 +4.000000 +4.000000 +16384.000000 +16384.000000 +16384.000000 +16384.000000 +4.000000 +4.000000 +4.000000 +4.000000 +16384.000000 +16384.000000 +16384.000000 +16384.000000 +4.000000 +4.000000 +4.000000 +4.000000 +16384.000000 +16384.000000 +16384.000000 +16384.000000 From 332440fcd5f273d53dd8479ade5611d9535b4775 Mon Sep 17 00:00:00 2001 From: Kevin Choi Date: Wed, 30 Apr 2025 11:44:41 -0500 Subject: [PATCH 2/4] Modified to test array inputs for float and exp parameters. --- External/HIP/builtin-logb-scalbn.hip | 131 ++++++++++++++++++--------- 1 file changed, 89 insertions(+), 42 deletions(-) diff --git a/External/HIP/builtin-logb-scalbn.hip b/External/HIP/builtin-logb-scalbn.hip index 87b82c7852..e89eace9a2 100644 --- a/External/HIP/builtin-logb-scalbn.hip +++ b/External/HIP/builtin-logb-scalbn.hip @@ -1,58 +1,105 @@ - -#define __device__ __attribute__((device)) - #include #include #include +#include + +// Simple error check macro +#define HIP_CHECK(call) \ + do { \ + hipError_t err = call; \ + if (err != hipSuccess) { \ + std::cerr << "HIP error: " << hipGetErrorString(err) \ + << " at " << __FILE__ << ":" << __LINE__ << std::endl; \ + std::exit(EXIT_FAILURE); \ + } \ + } while (0) + +__global__ void my_kernel(float a[], int alen, int exp[], int explen, float *t_res) { + for (int i = 0; i < alen; i++) { + t_res[4*i] = logbf(a[i]); + t_res[4*i + 1] = logb(a[i]); + t_res[4*i + 2] = __builtin_logbf(a[i]); + t_res[4*i + 3] = __builtin_logb(a[i]); + } -__global__ void my_kernel(float a) { - printf("%f\n", logbf(16.0f)); - printf("%f\n", logb(16.0)); - printf("%f\n", __builtin_logbf(16.0f)); - printf("%f\n", __builtin_logb(16.0)); - - printf("%f\n", scalbnf(16.0f, 10)); - printf("%f\n", scalbn(16.0, 10)); - printf("%f\n", __builtin_scalbnf(16.0f, 10)); - printf("%f\n", __builtin_scalbn(16.0, 10)); - - printf("%f\n", logbf(a)); - printf("%f\n", logb(a)); - printf("%f\n", __builtin_logbf(a)); - printf("%f\n", __builtin_logb(a)); - - printf("%f\n", scalbnf(a, 10)); - printf("%f\n", scalbn(a, 10)); - printf("%f\n", __builtin_scalbnf(a, 10)); - printf("%f\n", __builtin_scalbn(a, 10)); + for (int i = 0; i < alen; i++) { + for (int j = 0; j < explen; j++) { + t_res[4*alen + explen*i + 4*j] = scalbnf(a[i], exp[j]); + t_res[4*alen + explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + t_res[4*alen + explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + t_res[4*alen + explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + } + } } -void __attribute__((noinline)) test(float a) { - printf("%f\n", logbf(a)); - printf("%f\n", logb(a)); - printf("%f\n", __builtin_logbf(a)); - printf("%f\n", __builtin_logb(a)); +void __attribute__((noinline)) test(float a[], int alen, int exp[], int explen, float *h_res) { + for (int i = 0; i < alen; i++) { + h_res[4*i] = logbf(a[i]); + h_res[4*i + 1] = logb(a[i]); + h_res[4*i + 2] = __builtin_logbf(a[i]); + h_res[4*i + 3] = __builtin_logb(a[i]); + } - printf("%f\n", scalbnf(a, 10)); - printf("%f\n", scalbn(a, 10)); - printf("%f\n", __builtin_scalbnf(a, 10)); - printf("%f\n", __builtin_scalbn(a, 10)); + for (int i = 0; i < alen; i++) { + for (int j = 0; j < explen; j++) { + h_res[4*alen + explen*i + 4*j] = scalbnf(a[i], exp[j]); + h_res[4*alen + explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + h_res[4*alen + explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + h_res[4*alen + explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + } + } } int main(int argc, char **argv) { - my_kernel<<<1,1>>>(16.0f); + // Init input data + float a[] = {16.0f, 3.14f, 0.0f, -0.0f, INFINITY, NAN}; + int alen = sizeof(a) / sizeof(a[0]); + int exp[] = {10, 0, -5}; + int explen = sizeof(exp) / sizeof(exp[0]); + + // Compute on CPU + int res_len = 4 * alen + 4 * alen * explen; // logb + scalbn + int res_bsize = sizeof(float) * res_len; + float *h_res = (float *)malloc(res_bsize); + test(a, alen, exp, explen, h_res); + + // Make a copy for GPU + float *d_a; + int *d_exp; + float *t_res; + HIP_CHECK(hipMalloc((void**)&d_a, sizeof(a))); + HIP_CHECK(hipMalloc((void**)&d_exp, sizeof(exp))); + HIP_CHECK(hipMalloc((void**)&t_res, res_bsize)); + HIP_CHECK(hipMemcpy(d_a, a, sizeof(a), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemcpy(d_exp, exp, sizeof(exp), hipMemcpyHostToDevice)); + HIP_CHECK(hipMemset(t_res, 0, res_bsize)); + + // Launch a GPU kernel + my_kernel<<<1,1>>>(d_a, alen, d_exp, explen, t_res); - printf("%f\n", logbf(16.0f)); - printf("%f\n", logb(16.0)); - printf("%f\n", __builtin_logbf(16.0f)); - printf("%f\n", __builtin_logb(16.0)); + // Copy the device results to host + float *d_res = (float *)malloc(res_bsize); + HIP_CHECK(hipDeviceSynchronize()); + HIP_CHECK(hipMemcpy(d_res, t_res, res_bsize, hipMemcpyDeviceToHost)); - printf("%f\n", scalbnf(16.0f, 10)); - printf("%f\n", scalbn(16.0, 10)); - printf("%f\n", __builtin_scalbnf(16.0f, 10)); - printf("%f\n", __builtin_scalbn(16.0, 10)); + // Verify the results match CPU. + int errs = 0; + for(int i = 0; i < res_len; i++) { + if (fabs(h_res[i] - d_res[i]) > fabs(h_res[i] * 0.0001f)) { + //printf("found error i=%i h=%f d=%f\n", i, h_res[i], d_res[i]); + errs++; + } + } + if (errs != 0) + printf("%i errors\n", errs); + else + printf("PASSED!\n"); - test(16.0f); + free(h_res); + HIP_CHECK(hipFree(d_a)); + HIP_CHECK(hipFree(d_exp)); + HIP_CHECK(hipFree(t_res)); + free(d_res); return 0; } From 7ac3ec57cc1617f262b06f741e418ccbfd8e52f9 Mon Sep 17 00:00:00 2001 From: Kevin Choi Date: Wed, 30 Apr 2025 11:48:44 -0500 Subject: [PATCH 3/4] fix reference_output and return errs --- External/HIP/builtin-logb-scalbn.hip | 2 +- .../HIP/builtin-logb-scalbn.reference_output | 34 ++----------------- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/External/HIP/builtin-logb-scalbn.hip b/External/HIP/builtin-logb-scalbn.hip index e89eace9a2..fc7a8a9bd7 100644 --- a/External/HIP/builtin-logb-scalbn.hip +++ b/External/HIP/builtin-logb-scalbn.hip @@ -100,6 +100,6 @@ int main(int argc, char **argv) { HIP_CHECK(hipFree(d_exp)); HIP_CHECK(hipFree(t_res)); free(d_res); - return 0; + return errs; } diff --git a/External/HIP/builtin-logb-scalbn.reference_output b/External/HIP/builtin-logb-scalbn.reference_output index af4d6fd479..391efdf648 100644 --- a/External/HIP/builtin-logb-scalbn.reference_output +++ b/External/HIP/builtin-logb-scalbn.reference_output @@ -1,32 +1,2 @@ -4.000000 -4.000000 -4.000000 -4.000000 -16384.000000 -16384.000000 -16384.000000 -16384.000000 -4.000000 -4.000000 -4.000000 -4.000000 -16384.000000 -16384.000000 -16384.000000 -16384.000000 -4.000000 -4.000000 -4.000000 -4.000000 -16384.000000 -16384.000000 -16384.000000 -16384.000000 -4.000000 -4.000000 -4.000000 -4.000000 -16384.000000 -16384.000000 -16384.000000 -16384.000000 +PASSED! +exit 0 From 1c9043a3a3bdb220cb9f61ab8d8e9453d0c39f1a Mon Sep 17 00:00:00 2001 From: Kevin Choi Date: Thu, 1 May 2025 12:04:41 -0500 Subject: [PATCH 4/4] Fix indexing and uncomment printf on error --- External/HIP/builtin-logb-scalbn.hip | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/External/HIP/builtin-logb-scalbn.hip b/External/HIP/builtin-logb-scalbn.hip index fc7a8a9bd7..a923e5a51e 100644 --- a/External/HIP/builtin-logb-scalbn.hip +++ b/External/HIP/builtin-logb-scalbn.hip @@ -24,10 +24,10 @@ __global__ void my_kernel(float a[], int alen, int exp[], int explen, float *t_r for (int i = 0; i < alen; i++) { for (int j = 0; j < explen; j++) { - t_res[4*alen + explen*i + 4*j] = scalbnf(a[i], exp[j]); - t_res[4*alen + explen*i + 4*j + 1] = scalbn(a[i], exp[j]); - t_res[4*alen + explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); - t_res[4*alen + explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + t_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); } } } @@ -42,10 +42,10 @@ void __attribute__((noinline)) test(float a[], int alen, int exp[], int explen, for (int i = 0; i < alen; i++) { for (int j = 0; j < explen; j++) { - h_res[4*alen + explen*i + 4*j] = scalbnf(a[i], exp[j]); - h_res[4*alen + explen*i + 4*j + 1] = scalbn(a[i], exp[j]); - h_res[4*alen + explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); - h_res[4*alen + explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j] = scalbnf(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 1] = scalbn(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 2] = __builtin_scalbnf(a[i], exp[j]); + h_res[4*alen + 4*explen*i + 4*j + 3] = __builtin_scalbn(a[i], exp[j]); } } } @@ -86,7 +86,7 @@ int main(int argc, char **argv) { int errs = 0; for(int i = 0; i < res_len; i++) { if (fabs(h_res[i] - d_res[i]) > fabs(h_res[i] * 0.0001f)) { - //printf("found error i=%i h=%f d=%f\n", i, h_res[i], d_res[i]); + printf("found error i=%i h=%f d=%f\n", i, h_res[i], d_res[i]); errs++; } }