Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1723,7 +1723,8 @@ MatchData.Imm = ShiftAmt; unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one(); - return MinLeadingZeros >= ShiftAmt; + unsigned SrcTySize = MRI.getType(ExtSrc).getSizeInBits(); + return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize; } void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir @@ -17,18 +17,20 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C2]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C3]] ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[UDIV]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64)) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir @@ -327,3 +327,34 @@ %shl:_(s64) = G_SHL %extend, %shiftamt $vgpr0_vgpr1 = COPY %shl ... + +--- +name: do_not_shl_s32_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %zero:_(s16) = G_CONSTANT i16 0 + %extend:_(s32) = G_ZEXT %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir @@ -203,3 +203,34 @@ %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl ... + +--- +name: do_not_shl_s32_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %zero:_(s16) = G_CONSTANT i16 0 + %extend:_(s32) = G_ZEXT %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -876,8 +876,8 @@ ; ; GFX8-LABEL: s_sext_inreg_v4i16_14: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b32 s0, 0xffff -; GFX8-NEXT: s_mov_b32 s1, 0xffff +; GFX8-NEXT: s_mov_b32 s0, 0 +; GFX8-NEXT: s_mov_b32 s1, 0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_v4i16_14: @@ -965,10 +965,10 @@ ; GFX8-LABEL: v_sext_inreg_v8i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v8i16_11: