Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -87,7 +87,7 @@ bool selectG_CONSTANT(MachineInstr &I) const; bool selectG_AND_OR_XOR(MachineInstr &I) const; bool selectG_ADD_SUB(MachineInstr &I) const; - bool selectG_UADDO_USUBO(MachineInstr &I) const; + bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const; bool selectG_EXTRACT(MachineInstr &I) const; bool selectG_MERGE_VALUES(MachineInstr &I) const; bool selectG_UNMERGE_VALUES(MachineInstr &I) const; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -386,21 +386,26 @@ return true; } -bool AMDGPUInstructionSelector::selectG_UADDO_USUBO(MachineInstr &I) const { +bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE( + MachineInstr &I) const { MachineBasicBlock *BB = I.getParent(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); const DebugLoc &DL = I.getDebugLoc(); Register Dst0Reg = I.getOperand(0).getReg(); Register Dst1Reg = I.getOperand(1).getReg(); - const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO; + const bool IsAdd = I.getOpcode() == AMDGPU::G_UADDO || + I.getOpcode() == AMDGPU::G_UADDE; + const bool HasCarryIn = I.getOpcode() == AMDGPU::G_UADDE || + I.getOpcode() == AMDGPU::G_USUBE; if (isVCC(Dst1Reg, MRI)) { - // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned - // carry out despite the _i32 name. These were renamed in VI to _U32. - // FIXME: We should probably rename the opcodes here. - unsigned NewOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; - I.setDesc(TII.get(NewOpc)); + // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned + // carry out despite the _i32 name. These were renamed in VI to _U32. + // FIXME: We should probably rename the opcodes here. + unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64; + unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64; + I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc)); I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true)); I.addOperand(*MF, MachineOperand::CreateImm(0)); return constrainSelectedInstRegOperands(I, TII, TRI, RBI); @@ -408,8 +413,16 @@ Register Src0Reg = I.getOperand(2).getReg(); Register Src1Reg = I.getOperand(3).getReg(); - unsigned NewOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; - BuildMI(*BB, &I, DL, TII.get(NewOpc), Dst0Reg) + + if (HasCarryIn) { + BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::SCC) + .addReg(I.getOperand(4).getReg()); + } + + unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32; + unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; + + BuildMI(*BB, &I, DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg) .add(I.getOperand(2)) .add(I.getOperand(3)); BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), Dst1Reg) @@ -423,6 +436,11 @@ !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, MRI)) return false; + if (HasCarryIn && + !RBI.constrainGenericRegister(I.getOperand(4).getReg(), + AMDGPU::SReg_32RegClass, MRI)) + return false; + I.eraseFromParent(); return true; } @@ -1629,7 +1647,9 @@ return selectG_ADD_SUB(I); case TargetOpcode::G_UADDO: case TargetOpcode::G_USUBO: - return selectG_UADDO_USUBO(I); + case TargetOpcode::G_UADDE: + case TargetOpcode::G_USUBE: + return selectG_UADDO_USUBO_UADDE_USUBE(I); case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_BITCAST: case TargetOpcode::G_PTRTOINT: Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.gfx10.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s + +# These violate the constant bus restriction pre-gfx10 + +--- +name: uadde_s32_s1_vsv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GFX10-LABEL: name: uadde_s32_s1_vsv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 + %7:vgpr(s32) = G_CONSTANT i32 0 + %8:vgpr(s32) = G_CONSTANT i32 1 + %9:vgpr(s32) = G_SELECT %6, %7, %8 + S_ENDPGM 0, implicit %5, implicit %9 +... + +--- +name: uadde_s32_s1_vvs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GFX10-LABEL: name: uadde_s32_s1_vvs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_ADDC_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 + %7:vgpr(s32) = G_CONSTANT i32 0 + %8:vgpr(s32) = G_CONSTANT i32 1 + %9:vgpr(s32) = G_SELECT %6, %7, %8 + S_ENDPGM 0, implicit %5, implicit %9 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uadde.mir @@ -0,0 +1,89 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s + +--- +name: uadde_s32_s1_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; WAVE64-LABEL: name: uadde_s32_s1_sss + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64: $scc = COPY [[COPY3]] + ; WAVE64: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc + ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64: $scc = COPY [[COPY4]] + ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] + ; WAVE32-LABEL: name: uadde_s32_s1_sss + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32: $scc = COPY [[COPY3]] + ; WAVE32: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc + ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32: $scc = COPY [[COPY4]] + ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32: S_ENDPGM 0, implicit [[S_ADDC_U32_]], implicit [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_CONSTANT i32 0 + %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 + %5:sgpr(s32), %6:sgpr(s32) = G_UADDE %0, %1, %4 + %7:sgpr(s32) = G_SELECT %6, %0, %1 + S_ENDPGM 0, implicit %5, implicit %7 +... + +--- +name: uadde_s32_s1_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: uadde_s32_s1_vvv + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; WAVE32-LABEL: name: uadde_s32_s1_vvv + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADDC_U32_e64_1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_UADDE %0, %1, %4 + %7:vgpr(s32) = G_SELECT %6, %0, %1 + S_ENDPGM 0, implicit %5, implicit %7 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.gfx10.mir @@ -0,0 +1,70 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s + +# These violate the constant bus restriction pre-gfx10 + +--- +name: usube_s32_s1_vsv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GFX10-LABEL: name: usube_s32_s1_vsv + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = COPY $vgpr0 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 + %7:vgpr(s32) = G_CONSTANT i32 0 + %8:vgpr(s32) = G_CONSTANT i32 1 + %9:vgpr(s32) = G_SELECT %6, %7, %8 + S_ENDPGM 0, implicit %5, implicit %9 +... + +--- +name: usube_s32_s1_vvs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + + ; GFX10-LABEL: name: usube_s32_s1_vvs + ; GFX10: $vcc_hi = IMPLICIT_DEF + ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; GFX10: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX10: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_2]], 0, [[V_MOV_B32_e32_1]], [[V_SUBB_U32_e64_1]], implicit $exec + ; GFX10: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:sgpr(s32) = COPY $sgpr0 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 + %7:vgpr(s32) = G_CONSTANT i32 0 + %8:vgpr(s32) = G_CONSTANT i32 1 + %9:vgpr(s32) = G_SELECT %6, %7, %8 + S_ENDPGM 0, implicit %5, implicit %9 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usube.mir @@ -0,0 +1,89 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE64 %s +# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=WAVE32 %s + +--- +name: usube_s32_s1_sss +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + + ; WAVE64-LABEL: name: usube_s32_s1_sss + ; WAVE64: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE64: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE64: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE64: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64: $scc = COPY [[COPY3]] + ; WAVE64: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc + ; WAVE64: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE64: $scc = COPY [[COPY4]] + ; WAVE64: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE64: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] + ; WAVE32-LABEL: name: usube_s32_s1_sss + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; WAVE32: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; WAVE32: S_CMP_EQ_U32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; WAVE32: [[COPY3:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32: $scc = COPY [[COPY3]] + ; WAVE32: [[S_SUBB_U32_:%[0-9]+]]:sreg_32 = S_SUBB_U32 [[COPY]], [[COPY1]], implicit-def $scc, implicit $scc + ; WAVE32: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; WAVE32: $scc = COPY [[COPY4]] + ; WAVE32: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY]], [[COPY1]], implicit $scc + ; WAVE32: S_ENDPGM 0, implicit [[S_SUBB_U32_]], implicit [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = G_CONSTANT i32 0 + %4:sgpr(s32) = G_ICMP intpred(eq), %2, %3 + %5:sgpr(s32), %6:sgpr(s32) = G_USUBE %0, %1, %4 + %7:sgpr(s32) = G_SELECT %6, %0, %1 + S_ENDPGM 0, implicit %5, implicit %7 +... + +--- +name: usube_s32_s1_vvv +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; WAVE64-LABEL: name: usube_s32_s1_vvv + ; WAVE64: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE64: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE64: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE64: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE64: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE64: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE64: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE64: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + ; WAVE32-LABEL: name: usube_s32_s1_vvv + ; WAVE32: $vcc_hi = IMPLICIT_DEF + ; WAVE32: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; WAVE32: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; WAVE32: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; WAVE32: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; WAVE32: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[V_MOV_B32_e32_]], implicit $exec + ; WAVE32: [[V_SUBB_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUBB_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUBB_U32_e64 [[COPY]], [[COPY1]], [[V_CMP_EQ_U32_e64_]], 0, implicit $exec + ; WAVE32: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUBB_U32_e64_1]], implicit $exec + ; WAVE32: S_ENDPGM 0, implicit [[V_SUBB_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = G_CONSTANT i32 0 + %4:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %5:vgpr(s32), %6:vcc(s1) = G_USUBE %0, %1, %4 + %7:vgpr(s32) = G_SELECT %6, %0, %1 + S_ENDPGM 0, implicit %5, implicit %7 +...