diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -498,12 +498,28 @@ if (Size <= 16 && ST->has16BitInsts()) return false; + Value *LHS = I.getOperand(0); + Value *RHS = I.getOperand(1); + + // The 24 bit mul intrinsics yields the low-order 32 bits. + if (Size > 32) { + unsigned LHSNumBits = numBitsUnsigned(LHS, Size); + unsigned RHSNumBits = numBitsUnsigned(RHS, Size); + + // 24 bit mul instructions considers only the low-order 24 bits of it's + // operands. + if (LHSNumBits > 24 || RHSNumBits > 24) + return false; + + // The product's bit width should not exceed 32. + if (LHSNumBits + RHSNumBits > 32) + return false; + } + // Prefer scalar if this could be s_mul_i32 if (DA->isUniform(&I)) return false; - Value *LHS = I.getOperand(0); - Value *RHS = I.getOperand(1); IRBuilder<> Builder(&I); Builder.SetCurrentDebugLocation(I.getDebugLoc()); diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-mul24.ll @@ -173,10 +173,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 ; SI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 ; SI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 -; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64 +; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i64 [[MUL]] ; ; VI-LABEL: @smul24_i64( @@ -184,10 +181,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr i64 [[SHL_LHS]], 40 ; VI-NEXT: [[LSHR_RHS:%.*]] = shl i64 [[RHS:%.*]], 40 ; VI-NEXT: [[RHS24:%.*]] = ashr i64 [[LHS]], 40 -; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i64 +; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i64 [[MUL]] ; ; DISABLED-LABEL: @smul24_i64( @@ -210,19 +204,13 @@ ; SI-LABEL: @umul24_i64( ; SI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 ; SI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 -; SI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64 +; SI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i64 [[MUL]] ; ; VI-LABEL: @umul24_i64( ; VI-NEXT: [[LHS24:%.*]] = and i64 [[LHS:%.*]], 16777215 ; VI-NEXT: [[RHS24:%.*]] = and i64 [[RHS:%.*]], 16777215 -; VI-NEXT: [[TMP1:%.*]] = trunc i64 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i64 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i64 +; VI-NEXT: [[MUL:%.*]] = mul i64 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i64 [[MUL]] ; ; DISABLED-LABEL: @umul24_i64( @@ -423,10 +411,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 ; SI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 ; SI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 -; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33 +; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i33 [[MUL]] ; ; VI-LABEL: @smul24_i33( @@ -434,10 +419,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr i33 [[SHL_LHS]], 9 ; VI-NEXT: [[LSHR_RHS:%.*]] = shl i33 [[RHS:%.*]], 9 ; VI-NEXT: [[RHS24:%.*]] = ashr i33 [[LHS]], 9 -; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = sext i32 [[TMP3]] to i33 +; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i33 [[MUL]] ; ; DISABLED-LABEL: @smul24_i33( @@ -460,19 +442,13 @@ ; SI-LABEL: @umul24_i33( ; SI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 ; SI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 -; SI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; SI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; SI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; SI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33 +; SI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; SI-NEXT: ret i33 [[MUL]] ; ; VI-LABEL: @umul24_i33( ; VI-NEXT: [[LHS24:%.*]] = and i33 [[LHS:%.*]], 16777215 ; VI-NEXT: [[RHS24:%.*]] = and i33 [[RHS:%.*]], 16777215 -; VI-NEXT: [[TMP1:%.*]] = trunc i33 [[LHS24]] to i32 -; VI-NEXT: [[TMP2:%.*]] = trunc i33 [[RHS24]] to i32 -; VI-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.mul.u24(i32 [[TMP1]], i32 [[TMP2]]) -; VI-NEXT: [[MUL:%.*]] = zext i32 [[TMP3]] to i33 +; VI-NEXT: [[MUL:%.*]] = mul i33 [[LHS24]], [[RHS24]] ; VI-NEXT: ret i33 [[MUL]] ; ; DISABLED-LABEL: @umul24_i33( @@ -551,20 +527,7 @@ ; SI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], ; SI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], ; SI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], -; SI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 -; SI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 -; SI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 -; SI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 -; SI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 -; SI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 -; SI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) -; SI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 -; SI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 -; SI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 -; SI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) -; SI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 -; SI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 -; SI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 +; SI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; SI-NEXT: ret <2 x i33> [[MUL]] ; ; VI-LABEL: @smul24_v2i33( @@ -572,20 +535,7 @@ ; VI-NEXT: [[LHS24:%.*]] = ashr <2 x i33> [[SHL_LHS]], ; VI-NEXT: [[LSHR_RHS:%.*]] = shl <2 x i33> [[RHS:%.*]], ; VI-NEXT: [[RHS24:%.*]] = ashr <2 x i33> [[LHS]], -; VI-NEXT: [[TMP1:%.*]] = extractelement <2 x i33> [[LHS24]], i64 0 -; VI-NEXT: [[TMP2:%.*]] = extractelement <2 x i33> [[LHS24]], i64 1 -; VI-NEXT: [[TMP3:%.*]] = extractelement <2 x i33> [[RHS24]], i64 0 -; VI-NEXT: [[TMP4:%.*]] = extractelement <2 x i33> [[RHS24]], i64 1 -; VI-NEXT: [[TMP5:%.*]] = trunc i33 [[TMP1]] to i32 -; VI-NEXT: [[TMP6:%.*]] = trunc i33 [[TMP3]] to i32 -; VI-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP5]], i32 [[TMP6]]) -; VI-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i33 -; VI-NEXT: [[TMP9:%.*]] = trunc i33 [[TMP2]] to i32 -; VI-NEXT: [[TMP10:%.*]] = trunc i33 [[TMP4]] to i32 -; VI-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.mul.i24(i32 [[TMP9]], i32 [[TMP10]]) -; VI-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i33 -; VI-NEXT: [[TMP13:%.*]] = insertelement <2 x i33> undef, i33 [[TMP8]], i64 0 -; VI-NEXT: [[MUL:%.*]] = insertelement <2 x i33> [[TMP13]], i33 [[TMP12]], i64 1 +; VI-NEXT: [[MUL:%.*]] = mul <2 x i33> [[LHS24]], [[RHS24]] ; VI-NEXT: ret <2 x i33> [[MUL]] ; ; DISABLED-LABEL: @smul24_v2i33(