Skip to content

Commit 93c4e23

Browse files
CUDA: fix race condition in MMQ stream-k fixup (#13299)
1 parent 8afbd96 commit 93c4e23

File tree

1 file changed

+1
-0
lines changed

1 file changed

+1
-0
lines changed

ggml/src/ggml-cuda/mmq.cuh

+1
Original file line numberDiff line numberDiff line change
@@ -2958,6 +2958,7 @@ static __global__ void mul_mat_q_stream_k_fixup(
29582958
for (int j = threadIdx.y*WARP_SIZE + threadIdx.x; j < mmq_x; j += nwarps*WARP_SIZE) {
29592959
ids_dst_shared[j] = ids_dst[col_low + j];
29602960
}
2961+
__syncthreads();
29612962

29622963
const int offset_dst = it*mmq_y;
29632964
dst += offset_dst;

0 commit comments

Comments
 (0)