Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -252,7 +252,6 @@ bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; - bool SelectVOP3Mods_f32(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const; bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; @@ -2430,15 +2429,6 @@ return isNoNanSrc(Src); } -bool AMDGPUDAGToDAGISel::SelectVOP3Mods_f32(SDValue In, SDValue &Src, - SDValue &SrcMods) const { - if (In.getValueType() == MVT::f32) - return SelectVOP3Mods(In, Src, SrcMods); - Src = In; - SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);; - return true; -} - bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) return false; Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -1317,6 +1317,9 @@ } bool AMDGPUInstructionSelector::selectG_SELECT(MachineInstr &I) const { + if (selectImpl(I, *CoverageInfo)) + return true; + MachineBasicBlock *BB = I.getParent(); const DebugLoc &DL = I.getDebugLoc(); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1278,8 +1278,8 @@ def VOP3NoMods : ComplexPattern; // VOP3Mods, but the input source is known to never be NaN. def VOP3Mods_nnan : ComplexPattern; -// VOP3Mods, but only allowed for f32 operands. -def VOP3Mods_f32 : ComplexPattern; +// VOP3Mods, but only allowed for i32/f32 operands. +def VOP3Mods_f32 : ComplexPattern; def VOP3OMods : ComplexPattern; Index: llvm/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstructions.td +++ llvm/lib/Target/AMDGPU/SIInstructions.td @@ -888,18 +888,22 @@ let SubtargetPredicate = Has16BitInsts; } -multiclass SelectPat { - def : GCNPat < - (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods), - (VOP3Mods_f32 vt:$src2, i32:$src2_mods))), - (V_CNDMASK_B32_e64 $src2_mods, $src2, $src1_mods, $src1, $src0) - >; -} +class VOPSelectModsPat : GCNPat < + (vt (select i1:$src0, (VOP3Mods_f32 vt:$src1, i32:$src1_mods), + (VOP3Mods_f32 vt:$src2, i32:$src2_mods))), + (V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2, + FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0) +>; + +class VOPSelectPat : GCNPat < + (vt (select i1:$src0, vt:$src1, vt:$src2)), + (V_CNDMASK_B32_e64 0, VSrc_b32:$src2, 0, VSrc_b32:$src1, SSrc_i1:$src0) +>; -defm : SelectPat ; -defm : SelectPat ; -defm : SelectPat ; -defm : SelectPat ; +def : VOPSelectModsPat ; +def : VOPSelectModsPat ; +def : VOPSelectPat ; +def : VOPSelectPat ; let AddedComplexity = 1 in { def : GCNPat < Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-select.mir @@ -344,3 +344,219 @@ S_ENDPGM 0, implicit %5 ... + +# Fold source modifiers into VOP select +--- +name: select_s32_vcc_fneg_lhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: select_s32_vcc_fneg_lhs + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY3]], 1, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s32) = G_FNEG %2 + %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %6:vgpr(s32) = G_SELECT %5, %4, %3 + S_ENDPGM 0, implicit %6 + +... + +--- +name: select_s32_vcc_fneg_rhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: select_s32_vcc_fneg_rhs + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 1, [[COPY3]], 0, [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s32) = G_FNEG %3 + %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %6:vgpr(s32) = G_SELECT %5, %2, %4 + S_ENDPGM 0, implicit %6 + +... + +--- +name: select_s32_vcc_fneg_fabs_lhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: select_s32_vcc_fneg_fabs_lhs + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 3, [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s32) = G_FABS %3 + %5:vgpr(s32) = G_FNEG %4 + %6:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %7:vgpr(s32) = G_SELECT %6, %5, %2 + S_ENDPGM 0, implicit %7 + +... + +# Make sure we don't try to fold source modifiers into non-32 bit value. +--- +name: select_s16_vcc_fneg_lhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: select_s16_vcc_fneg_lhs + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32768 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY]], implicit $exec + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY2]], [[COPY3]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(s32) = COPY $vgpr2 + %3:vgpr(s32) = COPY $vgpr3 + %4:vgpr(s16) = G_TRUNC %0 + %5:vgpr(s16) = G_TRUNC %1 + %6:vgpr(s16) = G_FNEG %4 + %7:vcc(s1) = G_ICMP intpred(eq), %2, %3 + %8:vgpr(s16) = G_SELECT %7, %6, %5 + S_ENDPGM 0, implicit %8 + +... + + +# Make sure we don't try to fold source modifiers into a vector +--- +name: select_v2s16_vcc_fneg_lhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GCN-LABEL: name: select_v2s16_vcc_fneg_lhs + ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147516416 + ; GCN: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[S_MOV_B32_]], [[COPY2]], implicit $exec + ; GCN: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec + ; GCN: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY2]], 0, [[V_XOR_B32_e32_]], [[V_CMP_EQ_U32_e64_]], implicit $exec + ; GCN: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = COPY $vgpr1 + %2:vgpr(<2 x s16>) = COPY $vgpr2 + %3:vgpr(<2 x s16>) = COPY $vgpr3 + %4:vgpr(<2 x s16>) = G_FNEG %3 + %5:vcc(s1) = G_ICMP intpred(eq), %0, %1 + %6:vgpr(<2 x s16>) = G_SELECT %5, %4, %3 + S_ENDPGM 0, implicit %6 + +... + +# Make sure we don't try to fold source modifiers into a scalar select + +--- +name: select_s32_scc_fneg_lhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; GCN-LABEL: name: select_s32_scc_fneg_lhs + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY4]] + ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_XOR_B32_]], [[COPY3]], implicit $scc + ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = COPY $sgpr3 + %4:sgpr(s32) = G_FNEG %2 + %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1 + %6:sgpr(s32) = G_SELECT %5, %4, %3 + S_ENDPGM 0, implicit %6 + +... + +--- +name: select_s32_scc_fneg_rhs +legalized: true +regBankSelected: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 + + ; GCN-LABEL: name: select_s32_scc_fneg_rhs + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1 + ; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 + ; GCN: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 + ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483648 + ; GCN: [[S_XOR_B32_:%[0-9]+]]:sreg_32 = S_XOR_B32 [[COPY3]], [[S_MOV_B32_]], implicit-def $scc + ; GCN: S_CMP_EQ_U32 [[COPY]], [[COPY1]], implicit-def $scc + ; GCN: [[COPY4:%[0-9]+]]:sreg_32 = COPY $scc + ; GCN: $scc = COPY [[COPY4]] + ; GCN: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[COPY2]], [[S_XOR_B32_]], implicit $scc + ; GCN: S_ENDPGM 0, implicit [[S_CSELECT_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = COPY $sgpr1 + %2:sgpr(s32) = COPY $sgpr2 + %3:sgpr(s32) = COPY $sgpr3 + %4:sgpr(s32) = G_FNEG %3 + %5:sgpr(s32) = G_ICMP intpred(eq), %0, %1 + %6:sgpr(s32) = G_SELECT %5, %2, %4 + S_ENDPGM 0, implicit %6 + +...