From 2d463d9442619979967e6f328bc1dfc69a8ead7e Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Fri, 26 Jul 2024 16:48:14 +0000 Subject: [PATCH] [Bugfix][Kernel] Promote index over numel in segmented_max_reduction to int64_t --- csrc/quantization/fp8/common.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csrc/quantization/fp8/common.cu b/csrc/quantization/fp8/common.cu index 090f95d1bda71..6dae32b25f9c4 100644 --- a/csrc/quantization/fp8/common.cu +++ b/csrc/quantization/fp8/common.cu @@ -48,7 +48,7 @@ __global__ void segmented_max_reduction(float* __restrict__ scale, const scalar_t* __restrict__ input, int64_t num_elems) { __shared__ float cache[1024]; - int i = blockDim.x * blockIdx.x + threadIdx.x; + int64_t i = blockDim.x * blockIdx.x + threadIdx.x; // First store maximum for all values processes by // the current thread in cache[threadIdx.x]