diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2822,6 +2822,7 @@ static SDValue simplifyI24(SDNode *Node24, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); @@ -2835,11 +2836,11 @@ APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); - // First try to simplify using GetDemandedBits which allows the operands to - // have other uses, but will only perform simplifications that involve - // bypassing some nodes for this user. - SDValue DemandedLHS = DAG.GetDemandedBits(LHS, Demanded); - SDValue DemandedRHS = DAG.GetDemandedBits(RHS, Demanded); + // First try to simplify using SimplifyMultipleUseDemandedBits which allows + // the operands to have other uses, but will only perform simplifications that + // involve bypassing some nodes for this user. + SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG); + SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG); if (DemandedLHS || DemandedRHS) return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), DemandedLHS ? DemandedLHS : LHS, @@ -2847,7 +2848,6 @@ // Now try SimplifyDemandedBits which can simplify the nodes used by our // operands if this node is the only user. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) return SDValue(Node24, 0); if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py - ; RUN: llc -mtriple amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GCN %s + define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(float addrspace(1)* %p) #4 { ; GCN-LABEL: test_mul24_knownbits_kernel: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: v_and_b32_e32 v0, 3, v0 -; GCN-NEXT: v_mul_i32_i24_e32 v0, 0xfffffb, v0 +; GCN-NEXT: v_mul_i32_i24_e32 v0, -5, v0 ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GCN-NEXT: v_and_b32_e32 v0, 0xffffffe0, v0 ; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll --- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll +++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll @@ -125,7 +125,7 @@ ; GCN-NEXT: v_mov_b32_e32 v4, v2 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_or_b32_e32 v0, 0x800000, v1 -; GCN-NEXT: v_mul_i32_i24_e32 v0, 0xfffff9, v0 +; GCN-NEXT: v_mul_i32_i24_e32 v0, -7, v0 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v0 ; GCN-NEXT: buffer_store_dwordx2 v[1:2], v[3:4], s[0:3], 0 addr64 ; GCN-NEXT: s_endpgm