diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3246,8 +3246,15 @@ SDValue Amt = Src.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); - if ((Known.isConstant() && Known.getConstant().ule(Size)) || - (Known.countMaxActiveBits() <= Log2_32(Size))) { + + // When the shift amount is known: + // - For left shifts, do the transform as long as the shift + // amount is still legal for i32, so when ShiftAmt < 32 (<= 31) + // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid + // losing information stored in the high bits when truncating. + const unsigned MaxCstSize = + (Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits()); + if (Known.getMaxValue().ule(MaxCstSize)) { EVT MidVT = VT.isVector() ? EVT::getVectorVT(*DAG.getContext(), MVT::i32, VT.getVectorNumElements()) : MVT::i32; diff --git a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll --- a/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/partial-shift-shrink.ll @@ -128,8 +128,8 @@ ; GCN-LABEL: trunc_srl_i64_var_mask16_to_i16: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_and_b32_e32 v2, 16, v2 -; GCN-NEXT: v_lshrrev_b64 v[0:1], v2, v[0:1] +; GCN-NEXT: v_and_b32_e32 v1, 16, v2 +; GCN-NEXT: v_lshrrev_b32_e32 v0, v1, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] %amt.masked = and i64 %amt, 16 %shift = lshr i64 %x, %amt.masked @@ -149,3 +149,19 @@ %trunc = trunc i64 %shift to i16 ret i16 %trunc } + +define i32 @trunc_srl_i64_25_to_i26(i64 %x) { +; GCN-LABEL: trunc_srl_i64_25_to_i26: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0xa000000, v0 +; GCN-NEXT: v_alignbit_b32 v0, 0, v0, 25 +; GCN-NEXT: v_add_u32_e32 v0, 55, v0 +; GCN-NEXT: s_setpc_b64 s[30:31] + %value.knownbits2 = and i64 %x, 167772160 ; 0xA000000 + %shift = lshr i64 %value.knownbits2, 25 + %trunc = trunc i64 %shift to i26 + %add = add i26 %trunc, 55 + %ext = zext i26 %add to i32 + ret i32 %ext +}