Index: llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1701,9 +1701,9 @@ !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc)))) return false; - // TODO: Should handle vector splat. Register RHS = MI.getOperand(2).getReg(); - auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI); + MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS); + auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI); if (!MaybeShiftAmtVal) return false; @@ -1718,12 +1718,13 @@ return false; } - int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue(); + int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue(); MatchData.Reg = ExtSrc; MatchData.Imm = ShiftAmt; unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one(); - return MinLeadingZeros >= ShiftAmt; + unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits(); + return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize; } void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI, Index: llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-divrem-insertpt-crash.mir @@ -17,18 +17,20 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; CHECK-NEXT: G_BRCOND [[DEF]](s1), %bb.2 ; CHECK-NEXT: G_BR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C1]](s32) - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C2]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; CHECK-NEXT: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[C3]] ; CHECK-NEXT: [[UDIV:%[0-9]+]]:_(s64) = G_UDIV [[FREEZE]], [[C]] - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[UDIV]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C1]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SHL]], [[UDIV]] ; CHECK-NEXT: G_STORE [[ADD]](s64), [[COPY]](p0) :: (store (s64)) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.postlegal.mir @@ -327,3 +327,105 @@ %shl:_(s64) = G_SHL %extend, %shiftamt $vgpr0_vgpr1 = COPY %shl ... + +--- +name: do_not_shl_s32_zero_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %zero:_(s16) = G_CONSTANT i16 0 + %extend:_(s32) = G_ZEXT %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... + +--- +name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16) + ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) + ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16) + ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) + ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + %zero:_(s16) = G_CONSTANT i16 0 + %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16) + %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>) + %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector + $vgpr0_vgpr1 = COPY %shl +... + +--- +name: do_not_shl_s32_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 + ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %argument:_(s32) = COPY $vgpr0 + %truncate:_(s16) = G_TRUNC %argument:_(s32) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %extend:_(s32) = G_ZEXT %truncate:_(s16) + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shl-from-extend-narrow.prelegal.mir @@ -134,10 +134,10 @@ ; GFX6-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 ; GFX6-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16) ; GFX6-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec - ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %masked(<2 x s16>) - ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX6-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvec(<2 x s32>) + ; GFX6-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; GFX6-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>) + ; GFX6-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>) ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) ; GFX9-LABEL: name: narrow_shl_v2s32_by_2_from_zext_v2s16 ; GFX9: liveins: $vgpr0 @@ -146,10 +146,10 @@ ; GFX9-NEXT: %masklow14:_(s16) = G_CONSTANT i16 16383 ; GFX9-NEXT: %masklow14vec:_(<2 x s16>) = G_BUILD_VECTOR %masklow14(s16), %masklow14(s16) ; GFX9-NEXT: %masked:_(<2 x s16>) = G_AND %narrow, %masklow14vec - ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %masked(<2 x s16>) - ; GFX9-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 - ; GFX9-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvec(<2 x s32>) + ; GFX9-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL %masked, [[BUILD_VECTOR]](<2 x s16>) + ; GFX9-NEXT: %shl:_(<2 x s32>) = G_ZEXT [[SHL]](<2 x s16>) ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) %narrow:_(<2 x s16>) = COPY $vgpr0 %masklow14:_(s16) = G_CONSTANT i16 16383 @@ -176,10 +176,10 @@ ; GFX6-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 ; GFX6-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32) ; GFX6-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec - ; GFX6-NEXT: %extend:_(<2 x s64>) = G_ANYEXT %masked(<2 x s32>) ; GFX6-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 ; GFX6-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX6-NEXT: %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec(<2 x s32>) + ; GFX6-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>) + ; GFX6-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>) ; GFX6-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>) ; GFX9-LABEL: name: narrow_shl_v2s64_by_2_from_anyext_v2s32 ; GFX9: liveins: $vgpr0_vgpr1 @@ -188,10 +188,10 @@ ; GFX9-NEXT: %masklow30:_(s32) = G_CONSTANT i32 1073741823 ; GFX9-NEXT: %masklow30vec:_(<2 x s32>) = G_BUILD_VECTOR %masklow30(s32), %masklow30(s32) ; GFX9-NEXT: %masked:_(<2 x s32>) = G_AND %narrow, %masklow30vec - ; GFX9-NEXT: %extend:_(<2 x s64>) = G_ANYEXT %masked(<2 x s32>) ; GFX9-NEXT: %shiftamt:_(s32) = G_CONSTANT i32 2 ; GFX9-NEXT: %shiftamtvec:_(<2 x s32>) = G_BUILD_VECTOR %shiftamt(s32), %shiftamt(s32) - ; GFX9-NEXT: %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec(<2 x s32>) + ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL %masked, %shiftamtvec(<2 x s32>) + ; GFX9-NEXT: %shl:_(<2 x s64>) = G_ZEXT [[SHL]](<2 x s32>) ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl(<2 x s64>) %narrow:_(<2 x s32>) = COPY $vgpr0_vgpr1 %masklow30:_(s32) = G_CONSTANT i32 1073741823 @@ -203,3 +203,105 @@ %shl:_(<2 x s64>) = G_SHL %extend, %shiftamtvec $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %shl ... + +--- +name: do_not_shl_s32_zero_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_zero_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %zero:_(s16) = G_CONSTANT i16 0 + %extend:_(s32) = G_ZEXT %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... + +--- +name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 + ; GFX6: liveins: $vgpr0, $vgpr1 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16) + ; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) + ; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) + ; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + ; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16 + ; GFX9: liveins: $vgpr0, $vgpr1 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0 + ; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16) + ; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>) + ; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>) + ; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>) + %zero:_(s16) = G_CONSTANT i16 0 + %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt, %shiftamt:_(s16) + %extend:_(<2 x s32>) = G_ZEXT %zerovector:_(<2 x s16>) + %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector + $vgpr0_vgpr1 = COPY %shl +... + +--- +name: do_not_shl_s32_by_16_from_zext_s16 +tracksRegLiveness: true +legalized: true + +body: | + bb.0: + liveins: $vgpr0 + ; GFX6-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX6: liveins: $vgpr0 + ; GFX6-NEXT: {{ $}} + ; GFX6-NEXT: %argument:_(s32) = COPY $vgpr0 + ; GFX6-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) + ; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX6-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) + ; GFX6-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX6-NEXT: $vgpr0 = COPY %shl(s32) + ; GFX9-LABEL: name: do_not_shl_s32_by_16_from_zext_s16 + ; GFX9: liveins: $vgpr0 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: %argument:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: %truncate:_(s16) = G_TRUNC %argument(s32) + ; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16 + ; GFX9-NEXT: %extend:_(s32) = G_ZEXT %truncate(s16) + ; GFX9-NEXT: %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + ; GFX9-NEXT: $vgpr0 = COPY %shl(s32) + %argument:_(s32) = COPY $vgpr0 + %truncate:_(s16) = G_TRUNC %argument:_(s32) + %shiftamt:_(s16) = G_CONSTANT i16 16 + %extend:_(s32) = G_ZEXT %truncate:_(s16) + %shl:_(s32) = G_SHL %extend, %shiftamt(s16) + $vgpr0 = COPY %shl +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll @@ -876,8 +876,8 @@ ; ; GFX8-LABEL: s_sext_inreg_v4i16_14: ; GFX8: ; %bb.0: -; GFX8-NEXT: s_mov_b32 s0, 0xffff -; GFX8-NEXT: s_mov_b32 s1, 0xffff +; GFX8-NEXT: s_mov_b32 s0, 0 +; GFX8-NEXT: s_mov_b32 s1, 0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: s_sext_inreg_v4i16_14: @@ -965,10 +965,10 @@ ; GFX8-LABEL: v_sext_inreg_v8i16_11: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_sext_inreg_v8i16_11: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll @@ -434,12 +434,10 @@ ; GCN-NEXT: s_brev_b32 s2, -4 ; GCN-NEXT: s_mov_b32 s3, s2 ; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: s_lshl_b32 s2, s1, 2 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_mov_b32 s3, 0 -; GCN-NEXT: s_mov_b32 s2, s0 -; GCN-NEXT: s_mov_b32 s4, s1 -; GCN-NEXT: s_mov_b32 s5, s3 -; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 -; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 2 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_v2i64_zext_v2i32: @@ -448,11 +446,9 @@ ; GFX10PLUS-NEXT: s_mov_b32 s3, s2 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] ; GFX10PLUS-NEXT: s_mov_b32 s3, 0 -; GFX10PLUS-NEXT: s_mov_b32 s2, s0 -; GFX10PLUS-NEXT: s_mov_b32 s4, s1 -; GFX10PLUS-NEXT: s_mov_b32 s5, s3 -; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 -; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 2 +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10PLUS-NEXT: s_lshl_b32 s2, s1, 2 +; GFX10PLUS-NEXT: s_mov_b32 s1, 0 ; GFX10PLUS-NEXT: ; return to shader part epilog %and = and <2 x i32> %x, %ext = zext <2 x i32> %and to <2 x i64> @@ -461,59 +457,37 @@ } define <2 x i64> @v_shl_v2i64_zext_v2i32(<2 x i32> %x) { -; GFX7-LABEL: v_shl_v2i64_zext_v2i32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1 -; GFX7-NEXT: v_mov_b32_e32 v1, 0 -; GFX7-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 -; GFX7-NEXT: v_mov_b32_e32 v3, v1 -; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX7-NEXT: v_lshl_b64 v[2:3], v[2:3], 2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_shl_v2i64_zext_v2i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1 -; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 -; GFX8-NEXT: v_mov_b32_e32 v3, v1 -; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_shl_v2i64_zext_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v2, 0x3fffffff, v1 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 -; GFX9-NEXT: v_mov_b32_e32 v3, v1 -; GFX9-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_shl_v2i64_zext_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 +; GCN-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: v_shl_v2i64_zext_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1 ; GFX10-NEXT: v_mov_b32_e32 v3, 0 -; GFX10-NEXT: v_and_b32_e32 v2, 0x3fffffff, v0 -; GFX10-NEXT: v_and_b32_e32 v4, 0x3fffffff, v1 -; GFX10-NEXT: v_mov_b32_e32 v5, v3 -; GFX10-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3] -; GFX10-NEXT: v_lshlrev_b64 v[2:3], 2, v[4:5] +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: v_shl_v2i64_zext_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v2, 0x3fffffff, v0 -; GFX11-NEXT: v_dual_mov_b32 v5, v3 :: v_dual_and_b32 v4, 0x3fffffff, v1 -; GFX11-NEXT: v_lshlrev_b64 v[0:1], 2, v[2:3] -; GFX11-NEXT: v_lshlrev_b64 v[2:3], 2, v[4:5] +; GFX11-NEXT: v_and_b32_e32 v0, 0x3fffffff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0x3fffffff, v1 +; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1 +; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i32> %x, %ext = zext <2 x i32> %and to <2 x i64> @@ -527,12 +501,10 @@ ; GCN-NEXT: s_brev_b32 s2, -8 ; GCN-NEXT: s_mov_b32 s3, s2 ; GCN-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GCN-NEXT: s_ashr_i32 s3, s0, 31 -; GCN-NEXT: s_mov_b32 s2, s0 -; GCN-NEXT: s_ashr_i32 s5, s1, 31 -; GCN-NEXT: s_mov_b32 s4, s1 -; GCN-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 -; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 2 +; GCN-NEXT: s_lshl_b32 s0, s0, 2 +; GCN-NEXT: s_lshl_b32 s2, s1, 2 +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_v2i64_sext_v2i32: @@ -540,12 +512,10 @@ ; GFX10PLUS-NEXT: s_brev_b32 s2, -8 ; GFX10PLUS-NEXT: s_mov_b32 s3, s2 ; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] -; GFX10PLUS-NEXT: s_ashr_i32 s3, s0, 31 -; GFX10PLUS-NEXT: s_mov_b32 s2, s0 -; GFX10PLUS-NEXT: s_ashr_i32 s5, s1, 31 -; GFX10PLUS-NEXT: s_mov_b32 s4, s1 -; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[2:3], 2 -; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 2 +; GFX10PLUS-NEXT: s_mov_b32 s3, 0 +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 2 +; GFX10PLUS-NEXT: s_lshl_b32 s2, s1, 2 +; GFX10PLUS-NEXT: s_mov_b32 s1, 0 ; GFX10PLUS-NEXT: ; return to shader part epilog %and = and <2 x i32> %x, %ext = sext <2 x i32> %and to <2 x i64> @@ -554,50 +524,38 @@ } define <2 x i64> @v_shl_v2i64_sext_v2i32(<2 x i32> %x) { -; GFX7-LABEL: v_shl_v2i64_sext_v2i32: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1 -; GFX7-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX7-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX7-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 -; GFX7-NEXT: v_lshl_b64 v[2:3], v[2:3], 2 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_shl_v2i64_sext_v2i32: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1 -; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX8-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX8-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] -; GFX8-NEXT: s_setpc_b64 s[30:31] +; GCN-LABEL: v_shl_v2i64_sext_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 +; GCN-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1 +; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GCN-NEXT: v_lshlrev_b32_e32 v2, 2, v1 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: v_shl_v2i64_sext_v2i32: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 -; GFX9-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1 -; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX9-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX9-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX10-LABEL: v_shl_v2i64_sext_v2i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 +; GFX10-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1 +; GFX10-NEXT: v_mov_b32_e32 v1, 0 +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX10PLUS-LABEL: v_shl_v2i64_sext_v2i32: -; GFX10PLUS: ; %bb.0: -; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 -; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0x1fffffff, v1 -; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 -; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2 -; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1] -; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3] -; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: v_shl_v2i64_sext_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_and_b32_e32 v0, 0x1fffffff, v0 +; GFX11-NEXT: v_and_b32_e32 v1, 0x1fffffff, v1 +; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_lshlrev_b32 v2, 2, v1 +; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i32> %x, %ext = sext <2 x i32> %and to <2 x i64> %shl = shl <2 x i64> %ext, @@ -680,9 +638,10 @@ ; GFX7-NEXT: s_or_b32 s0, s1, s0 ; GFX7-NEXT: s_and_b32 s0, s0, 0x3fff3fff ; GFX7-NEXT: s_lshr_b32 s1, s0, 16 -; GFX7-NEXT: s_and_b32 s0, s0, 0xffff ; GFX7-NEXT: s_lshl_b32 s0, s0, 2 ; GFX7-NEXT: s_lshl_b32 s1, s1, 2 +; GFX7-NEXT: s_and_b32 s0, s0, 0xffff +; GFX7-NEXT: s_and_b32 s1, s1, 0xffff ; GFX7-NEXT: ; return to shader part epilog ; ; GFX8-LABEL: s_shl_v2i32_zext_v2i16: @@ -700,18 +659,22 @@ ; GFX9: ; %bb.0: ; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff3fff ; GFX9-NEXT: s_lshr_b32 s1, s0, 16 -; GFX9-NEXT: s_and_b32 s0, s0, 0xffff -; GFX9-NEXT: s_lshl_b32 s0, s0, 2 +; GFX9-NEXT: s_lshl_b32 s0, s0, 0x20002 ; GFX9-NEXT: s_lshl_b32 s1, s1, 2 +; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 +; GFX9-NEXT: s_lshr_b32 s1, s0, 16 +; GFX9-NEXT: s_and_b32 s0, s0, 0xffff ; GFX9-NEXT: ; return to shader part epilog ; ; GFX10PLUS-LABEL: s_shl_v2i32_zext_v2i16: ; GFX10PLUS: ; %bb.0: ; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0x3fff3fff -; GFX10PLUS-NEXT: s_and_b32 s1, s0, 0xffff -; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 -; GFX10PLUS-NEXT: s_lshl_b32 s0, s1, 2 -; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, 2 +; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16 +; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x20002 +; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 2 +; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s0, s1 +; GFX10PLUS-NEXT: s_and_b32 s0, s1, 0xffff +; GFX10PLUS-NEXT: s_lshr_b32 s1, s1, 16 ; GFX10PLUS-NEXT: ; return to shader part epilog %and = and <2 x i16> %x, %ext = zext <2 x i16> %and to <2 x i32> @@ -733,9 +696,10 @@ ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 ; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: v_shl_v2i32_zext_v2i16: @@ -743,39 +707,28 @@ ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0 ; GFX8-NEXT: v_mov_b32_e32 v2, 2 -; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, 2, v1 +; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_shl_v2i32_zext_v2i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_mov_b32 s4, 2 -; GFX9-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX9-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 +; GFX9-NEXT: v_pk_lshlrev_b16 v0, 2, v0 op_sel_hi:[0,1] +; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: v_shl_v2i32_zext_v2i16: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v1, 0x3fff3fff, v0 -; GFX10-NEXT: s_mov_b32 s4, 2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_shl_v2i32_zext_v2i16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 -; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v0 -; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v1 -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10PLUS-LABEL: v_shl_v2i32_zext_v2i16: +; GFX10PLUS: ; %bb.0: +; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0x3fff3fff, v0 +; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 2, v0 op_sel_hi:[0,1] +; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v1 +; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v1, 16, v1 +; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] %and = and <2 x i16> %x, %ext = zext <2 x i16> %and to <2 x i32> %shl = shl <2 x i32> %ext,