diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -517,6 +517,13 @@ } else return false; + // The 24 bit mul intrinsics yields the low-order 32 bits. The result's bit + // width should not exceed 32 if `Size` > 32. + if (Size > 32 && + numBitsUnsigned(LHS, Size) + numBitsUnsigned(RHS, Size) > 32) { + return false; + } + SmallVector LHSVals; SmallVector RHSVals; SmallVector ResultVals; diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -173,10 +173,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 ; SI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 ; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 -; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64 +; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i64 [[MUL]] ; ; VI-LABEL: @smul24_i64( @@ -184,10 +181,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 ; VI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 ; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 -; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64 +; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i64 [[MUL]] ; ; DISABLED-LABEL: @smul24_i64( @@ -210,19 +204,13 @@ ; SI-LABEL: @umul24_i64( ; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 ; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 -; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64 +; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i64 [[MUL]] ; ; VI-LABEL: @umul24_i64( ; VI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 ; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 -; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64 +; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i64 [[MUL]] ; ; DISABLED-LABEL: @umul24_i64( @@ -423,10 +411,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 ; SI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 ; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 -; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33 +; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i33 [[MUL]] ; ; VI-LABEL: @smul24_i33( @@ -434,10 +419,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 ; VI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 ; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 -; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33 +; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i33 [[MUL]] ; ; DISABLED-LABEL: @smul24_i33( @@ -460,19 +442,13 @@ ; SI-LABEL: @umul24_i33( ; SI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 ; SI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 -; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33 +; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i33 [[MUL]] ; ; VI-LABEL: @umul24_i33( ; VI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 ; VI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 -; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33 +; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i33 [[MUL]] ; ; DISABLED-LABEL: @umul24_i33( @@ -551,20 +527,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], ; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], ; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], -; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 -; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 -; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 -; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 -; SI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 -; SI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 -; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) -; SI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 -; SI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 -; SI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 -; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) -; SI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 -; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 -; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 +; SI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; SI-NEXT: ret <2 x i33> [[MUL]] ; ; VI-LABEL: @smul24_v2i33( @@ -572,20 +535,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], ; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], ; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], -; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 -; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 -; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 -; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 -; VI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 -; VI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 -; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) -; VI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 -; VI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 -; VI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 -; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) -; VI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 -; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 -; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 +; VI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; VI-NEXT: ret <2 x i33> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i33(