diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3246,7 +3246,15 @@ SDValue Amt = Src.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); - if ((Known.isConstant() && Known.getConstant().ule(Size)) || + + // When the shift amount is known: + // - For left shifts, do the transform if ShiftAmt <= Size + // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid + // losing information stored in the high bits when truncating. + const unsigned MaxCstSize = (Src.getOpcode() == ISD::SHL) + ? Size + : (32 - VT.getScalarSizeInBits()); + if ((Known.isConstant() && Known.getConstant().ule(MaxCstSize)) || (Known.countMaxActiveBits() <= Log2_32(Size))) { EVT MidVT = VT.isVector() ? EVT::getVectorVT(*DAG.getContext(), MVT::i32, diff --git a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll --- a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll @@ -149,3 +149,19 @@ %trunc = trunc i64 %shift to i16 ret i16 %trunc } + +define i32 @trunc_srl_i64_25_to_i26(i64 %x) { +; GCN-LABEL: trunc_srl_i64_25_to_i26: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0xa000000, v0 +; GCN-NEXT: v_alignbit_b32 v0, 0, v0, 25 +; GCN-NEXT: v_add_u32_e32 v0, 55, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %value.knownbits2 = and i64 %x, 167772160 ; 0xA000000 + %shift = lshr i64 %value.knownbits2, 25 + %trunc = trunc i64 %shift to i26 + %add = add i26 %trunc, 55 + %ext = zext i26 %add to i32 + ret i32 %ext +}