Index: llvm/include/llvm/Support/KnownBits.h =================================================================== --- llvm/include/llvm/Support/KnownBits.h +++ llvm/include/llvm/Support/KnownBits.h @@ -97,6 +97,9 @@ /// Returns true if this value is known to be non-negative. bool isNonNegative() const { return Zero.isSignBitSet(); } + /// Returns true if this value is known to be positive. + bool isStrictlyPositive() const { return Zero.isSignBitSet() && !One.isNullValue(); } + /// Make this value negative. void makeNegative() { One.setSignBit(); Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4439,7 +4439,6 @@ LHSKnown = LHSKnown.trunc(24); RHSKnown = RHSKnown.trunc(24); - bool Negative = false; if (Opc == AMDGPUISD::MUL_I24) { unsigned LHSValBits = 24 - LHSKnown.countMinSignBits(); unsigned RHSValBits = 24 - RHSKnown.countMinSignBits(); @@ -4447,16 +4446,16 @@ if (MaxValBits >= 32) break; bool LHSNegative = LHSKnown.isNegative(); - bool LHSPositive = LHSKnown.isNonNegative(); + bool LHSNonNegative = LHSKnown.isNonNegative(); + bool LHSPositive = LHSKnown.isStrictlyPositive(); bool RHSNegative = RHSKnown.isNegative(); - bool RHSPositive = RHSKnown.isNonNegative(); - if ((!LHSNegative && !LHSPositive) || (!RHSNegative && !RHSPositive)) - break; - Negative = (LHSNegative && RHSPositive) || (LHSPositive && RHSNegative); - if (Negative) - Known.One.setHighBits(32 - MaxValBits); - else + bool RHSNonNegative = RHSKnown.isNonNegative(); + bool RHSPositive = RHSKnown.isStrictlyPositive(); + + if((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative)) Known.Zero.setHighBits(32 - MaxValBits); + else if((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative)) + Known.One.setHighBits(32 - MaxValBits); } else { unsigned LHSValBits = 24 - LHSKnown.countMinLeadingZeros(); unsigned RHSValBits = 24 - RHSKnown.countMinLeadingZeros(); Index: llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll @@ -0,0 +1,21 @@ + +; FUNC-LABEL: {{^}}test_mul24_knownbits_kernel: +; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-code-object-v3 -O2 -amdgpu-function-calls=0 < %s | FileCheck --check-prefix=GCN %s +; GCN-NOT: -128 +; Function Attrs: alwaysinline convergent norecurse nounwind +define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 { +entry: + %0 = tail call i32 @llvm.amdgcn.workitem.id.x() #28, !range !4 + %tid = and i32 %0, 3 + %1 = mul nsw i32 %tid, -5 + %v1 = and i32 %1, -32 + %v2 = sext i32 %v1 to i64 + %v3 = getelementptr inbounds float, float addrspace(1)* %p, i64 %v2 + store float 0.000, float addrspace(1)* %v3, align 4 + ret void +} + +; Function Attrs: nounwind readnone speculatable +declare i32 @llvm.amdgcn.workitem.id.x() #20 + +!4 = !{i32 0, i32 1024}