Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -8008,7 +8008,8 @@ return false; const auto optimizeCmpAnd = [&CmpInstr, SrcReg, CmpValue, MRI, - this](int64_t ExpectedValue) -> bool { + this](int64_t ExpectedValue, + unsigned SrcSize) -> bool { // s_cmp_eq_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 // s_cmp_eq_i32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 // s_cmp_ge_u32 (s_and_b32 $src, 1), 1 => s_and_b32 $src, 1 @@ -8019,10 +8020,9 @@ // s_cmp_gt_u32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1 // s_cmp_gt_i32 (s_and_b32 $src, 1), 0 => s_and_b32 $src, 1 // s_cmp_lg_u64 (s_and_b64 $src, 1), 0 => s_and_b64 $src, 1 - - // TODO: Fold this into s_bitcmp* if result of an AND is unused. - // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can - // process any power of 2. + // + // If result of the AND is unused except in the compare: + // s_and_b(32|64) $src, 1 => s_bitcmp1_b(32|64) $src, 0 if (CmpValue != ExpectedValue) return false; @@ -8035,8 +8035,10 @@ Def->getOpcode() != AMDGPU::S_AND_B64) return false; - if ((!Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 1) && - (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1)) + MachineOperand *SrcOp = &Def->getOperand(1); + if (SrcOp->isImm() && SrcOp->getImm() == 1) + SrcOp = &Def->getOperand(2); + else if (!Def->getOperand(2).isImm() || Def->getOperand(2).getImm() != 1) return false; for (auto I = std::next(Def->getIterator()), E = CmpInstr.getIterator(); @@ -8050,6 +8052,23 @@ SccDef->setIsDead(false); CmpInstr.eraseFromParent(); + if (!MRI->use_nodbg_empty(Def->getOperand(0).getReg())) + return true; + + // Replace AND with unused result with a S_BITCMP. + // TODO: If s_bitcmp can be used we are not limited to 1 and 0 but can + // process any power of 2. + MachineBasicBlock *MBB = Def->getParent(); + + // TODO: Reverese conditions can use S_BITCMP0_*. + unsigned NewOpc = (SrcSize == 32) ? AMDGPU::S_BITCMP1_B32 + : AMDGPU::S_BITCMP1_B64; + + BuildMI(*MBB, Def, Def->getDebugLoc(), get(NewOpc)) + .add(*SrcOp) + .addImm(0); + Def->eraseFromParent(); + return true; }; @@ -8060,22 +8079,24 @@ case AMDGPU::S_CMP_EQ_I32: case AMDGPU::S_CMP_GE_U32: case AMDGPU::S_CMP_GE_I32: - case AMDGPU::S_CMP_EQ_U64: case AMDGPU::S_CMPK_EQ_U32: case AMDGPU::S_CMPK_EQ_I32: case AMDGPU::S_CMPK_GE_U32: case AMDGPU::S_CMPK_GE_I32: - return optimizeCmpAnd(1); + return optimizeCmpAnd(1, 32); + case AMDGPU::S_CMP_EQ_U64: + return optimizeCmpAnd(1, 64); case AMDGPU::S_CMP_LG_U32: case AMDGPU::S_CMP_LG_I32: case AMDGPU::S_CMP_GT_U32: case AMDGPU::S_CMP_GT_I32: - case AMDGPU::S_CMP_LG_U64: case AMDGPU::S_CMPK_LG_U32: case AMDGPU::S_CMPK_LG_I32: case AMDGPU::S_CMPK_GT_U32: case AMDGPU::S_CMPK_GT_I32: - return optimizeCmpAnd(0); + return optimizeCmpAnd(0, 32); + case AMDGPU::S_CMP_LG_U64: + return optimizeCmpAnd(0, 64); } return false; Index: llvm/test/CodeGen/AMDGPU/optimize-compare.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/optimize-compare.mir +++ llvm/test/CodeGen/AMDGPU/optimize-compare.mir @@ -8,7 +8,40 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = COPY $sgpr0 + %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +... + +--- +name: and_1_cmp_eq_u32_1_used_and +body: | + ; GCN-LABEL: name: and_1_cmp_eq_u32_1_used_and + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_NOP 0, implicit [[S_AND_B32_]] ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -21,6 +54,7 @@ %0:sreg_32 = COPY $sgpr0 %1:sreg_32 = S_AND_B32 1, killed %0, implicit-def dead $scc + S_NOP 0, implicit %1 S_CMP_EQ_U32 killed %1:sreg_32, 1, implicit-def $scc S_CBRANCH_SCC0 %bb.2, implicit $scc S_BRANCH %bb.1 @@ -40,7 +74,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -177,7 +211,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[COPY]], 1, implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -446,7 +480,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -478,7 +512,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -510,7 +544,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -542,7 +576,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -607,7 +641,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -639,7 +673,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -671,7 +705,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -703,7 +737,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 - ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B64 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -735,7 +769,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -767,7 +801,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -799,7 +833,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -831,7 +865,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -863,7 +897,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -895,7 +929,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -927,7 +961,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -959,7 +993,7 @@ ; GCN: bb.0: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 - ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 1, killed [[COPY]], implicit-def $scc + ; GCN: S_BITCMP1_B32 killed [[COPY]], 0, implicit-def $scc ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc ; GCN: S_BRANCH %bb.1 ; GCN: bb.1: @@ -983,3 +1017,33 @@ S_ENDPGM 0 ... + +--- +name: and_1_cmp_eq_u32_1_imm_src +body: | + ; GCN-LABEL: name: and_1_cmp_eq_u32_1_imm_src + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: S_BITCMP1_B32 11, 0, implicit-def $scc + ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN: S_BRANCH %bb.1 + ; GCN: bb.1: + ; GCN: successors: %bb.2(0x80000000) + ; GCN: bb.2: + ; GCN: S_ENDPGM 0 + bb.0: + successors: %bb.1(0x40000000), %bb.2(0x40000000) + liveins: $sgpr0, $vgpr0_vgpr1 + + %0:sreg_32 = S_AND_B32 1, 11, implicit-def dead $scc + S_CMP_EQ_U32 killed %0:sreg_32, 1, implicit-def $scc + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2(0x80000000) + + bb.2: + S_ENDPGM 0 + +...