Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1520,6 +1520,51 @@ } } +static unsigned minMaxToExtend(unsigned Opc) { + switch (Opc) { + case TargetOpcode::G_SMIN: + case TargetOpcode::G_SMAX: + return TargetOpcode::G_SEXT; + case TargetOpcode::G_UMIN: + case TargetOpcode::G_UMAX: + return TargetOpcode::G_ZEXT; + default: + llvm_unreachable("not in integer min/max"); + } +} + +// Emit a legalized extension from <2 x s16> to 2 32-bit components, avoiding +// any illegal vector extend or unmerge operations. +static std::pair +unpackV2S16ToS32(MachineIRBuilder &B, Register Src, unsigned ExtOpcode) { + const LLT S32 = LLT::scalar(32); + auto Bitcast = B.buildBitcast(S32, Src); + + if (ExtOpcode == TargetOpcode::G_SEXT) { + auto ExtLo = B.buildSExtInReg(S32, Bitcast, 16); + auto ShiftHi = B.buildAShr(S32, Bitcast, B.buildConstant(S32, 16)); + return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0)); + } + + auto ShiftHi = B.buildLShr(S32, Bitcast, B.buildConstant(S32, 16)); + if (ExtOpcode == TargetOpcode::G_ZEXT) { + auto ExtLo = B.buildAnd(S32, Bitcast, B.buildConstant(S32, 0xffff)); + return std::make_pair(ExtLo.getReg(0), ShiftHi.getReg(0)); + } + + assert(ExtOpcode == TargetOpcode::G_ANYEXT); + return std::make_pair(Bitcast.getReg(0), ShiftHi.getReg(0)); +} + +static MachineInstr *buildExpandedScalarMinMax(MachineIRBuilder &B, + CmpInst::Predicate Pred, + Register Dst, Register Src0, + Register Src1) { + const LLT CmpType = LLT::scalar(32); + auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1); + return B.buildSelect(Dst, Cmp, Src0, Src1); +} + // FIXME: Duplicated from LegalizerHelper, except changing the boolean type. void AMDGPURegisterBankInfo::lowerScalarMinMax(MachineIRBuilder &B, MachineInstr &MI) const { @@ -1528,12 +1573,10 @@ Register Src1 = MI.getOperand(2).getReg(); const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); - LLT CmpType = LLT::scalar(32); - - auto Cmp = B.buildICmp(Pred, CmpType, Src0, Src1); - B.buildSelect(Dst, Cmp, Src0, Src1); + MachineInstr *Sel = buildExpandedScalarMinMax(B, Pred, Dst, Src0, Src1); - B.getMRI()->setRegBank(Cmp.getReg(0), AMDGPU::SGPRRegBank); + Register CmpReg = Sel->getOperand(1).getReg(); + B.getMRI()->setRegBank(CmpReg, AMDGPU::SGPRRegBank); MI.eraseFromParent(); } @@ -2088,10 +2131,44 @@ // Turn scalar min/max into a compare and select. LLT Ty = MRI.getType(DstReg); - LLT S32 = LLT::scalar(32); - LLT S16 = LLT::scalar(16); + const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + const LLT V2S16 = LLT::vector(2, 16); + + if (Ty == V2S16) { + ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); + GISelObserverWrapper Observer(&ApplySALU); + B.setChangeObserver(Observer); - if (Ty == S16) { + // Need to widen to s32, and expand as cmp + select, and avoid producing + // illegal vector extends or unmerges that would need further + // legalization. + // + // TODO: Should we just readfirstlane? That should probably be handled + // with a UniformVGPR register bank that wouldn't need special + // consideration here. + + Register Dst = MI.getOperand(0).getReg(); + Register Src0 = MI.getOperand(1).getReg(); + Register Src1 = MI.getOperand(2).getReg(); + + Register WideSrc0Lo, WideSrc0Hi; + Register WideSrc1Lo, WideSrc1Hi; + + unsigned ExtendOp = minMaxToExtend(MI.getOpcode()); + + std::tie(WideSrc0Lo, WideSrc0Hi) = unpackV2S16ToS32(B, Src0, ExtendOp); + std::tie(WideSrc1Lo, WideSrc1Hi) = unpackV2S16ToS32(B, Src1, ExtendOp); + + Register Lo = MRI.createGenericVirtualRegister(S32); + Register Hi = MRI.createGenericVirtualRegister(S32); + const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode()); + buildExpandedScalarMinMax(B, Pred, Lo, WideSrc0Lo, WideSrc1Lo); + buildExpandedScalarMinMax(B, Pred, Hi, WideSrc0Hi, WideSrc1Hi); + + B.buildBuildVectorTrunc(Dst, {Lo, Hi}); + MI.eraseFromParent(); + } else if (Ty == S16) { ApplyRegBankMapping ApplySALU(*this, MRI, &AMDGPU::SGPRRegBank); GISelObserverWrapper Observer(&ApplySALU); LegalizerHelper Helper(*MF, Observer, B); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smax.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s --- name: smax_s32_ss @@ -158,3 +158,93 @@ $vgpr0 = COPY %5 ... + +--- +name: smax_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: smax_v2s16_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(sgt), [[ASHR]](s32), [[ASHR1]] + ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_SMAX %0, %1 + $sgpr0 = COPY %2 +... + +--- +name: smax_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: smax_v2s16_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY2]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: smax_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: smax_v2s16_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY2]] + ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: smax_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: smax_v2s16_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[SMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_SMAX [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SMAX %0, %1 + $vgpr0 = COPY %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-smin.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s +# XUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s --- name: smin_s32_ss @@ -161,3 +161,93 @@ $vgpr0 = COPY %5 ... + +--- +name: smin_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: smin_v2s16_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[SEXT_INREG:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST]], 16 + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[ASHR:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST]], [[C]](s32) + ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[SEXT_INREG1:%[0-9]+]]:sgpr(s32) = G_SEXT_INREG [[BITCAST1]], 16 + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[ASHR1:%[0-9]+]]:sgpr(s32) = G_ASHR [[BITCAST1]], [[C1]](s32) + ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]] + ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[SEXT_INREG]], [[SEXT_INREG1]] + ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(slt), [[ASHR]](s32), [[ASHR1]] + ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[ASHR]], [[ASHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_SMIN %0, %1 + $sgpr0 = COPY %2 +... + +--- +name: smin_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: smin_v2s16_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY2]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: smin_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: smin_v2s16_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY2]] + ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: smin_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: smin_v2s16_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[SMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_SMIN [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[SMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SMIN %0, %1 + $vgpr0 = COPY %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umax.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s --- name: umax_s32_ss @@ -161,3 +161,95 @@ $vgpr0 = COPY %5 ... + +--- +name: umax_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: umax_v2s16_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] + ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[AND]](s32), [[AND1]] + ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]] + ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ugt), [[LSHR]](s32), [[LSHR1]] + ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_UMAX %0, %1 + $sgpr0 = COPY %2 +... + +--- +name: umax_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: umax_v2s16_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY2]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: umax_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: umax_v2s16_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY2]] + ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: umax_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: umax_v2s16_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[UMAX:%[0-9]+]]:vgpr(<2 x s16>) = G_UMAX [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMAX]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_UMAX %0, %1 + $vgpr0 = COPY %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-umin.mir @@ -1,6 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-fast -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=regbankselect -verify-machineinstrs -regbankselect-greedy -o - %s | FileCheck %s --- name: umin_s32_ss @@ -165,3 +165,95 @@ $vgpr0 = COPY %5 ... + +--- +name: umin_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + + ; CHECK-LABEL: name: umin_v2s16_ss + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST]], [[C1]] + ; CHECK: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C2]](s32) + ; CHECK: [[C3:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 65535 + ; CHECK: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[BITCAST1]], [[C3]] + ; CHECK: [[ICMP:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[AND]](s32), [[AND1]] + ; CHECK: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP]](s32), [[AND]], [[AND1]] + ; CHECK: [[ICMP1:%[0-9]+]]:sgpr(s32) = G_ICMP intpred(ult), [[LSHR]](s32), [[LSHR1]] + ; CHECK: [[SELECT1:%[0-9]+]]:sgpr(s32) = G_SELECT [[ICMP1]](s32), [[LSHR]], [[LSHR1]] + ; CHECK: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SELECT]](s32), [[SELECT1]](s32) + ; CHECK: $sgpr0 = COPY [[BUILD_VECTOR_TRUNC]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_UMIN %0, %1 + $sgpr0 = COPY %2 +... + +--- +name: umin_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: umin_v2s16_sv + ; CHECK: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY2]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: umin_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; CHECK-LABEL: name: umin_v2s16_vs + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY2]] + ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2 +... + +--- +name: umin_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + + ; CHECK-LABEL: name: umin_v2s16_vv + ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK: [[UMIN:%[0-9]+]]:vgpr(<2 x s16>) = G_UMIN [[COPY]], [[COPY1]] + ; CHECK: $vgpr0 = COPY [[UMIN]](<2 x s16>) + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_UMIN %0, %1 + $vgpr0 = COPY %2 +...