Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp =================================================================== --- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1011,7 +1011,7 @@ // By default, we'll try and emit a CSEL. unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; bool Optimized = false; - auto TryOptNegIntoSelect = [&Opc, &False, Is32Bit, &MRI]() { + auto TryFoldBinOpIntoSelect = [&Opc, &False, Is32Bit, &MRI]() { // Attempt to fold: // // sub = G_SUB 0, x @@ -1019,10 +1019,27 @@ // // Into: // select = CSNEG true, x, cc - if (!mi_match(False, MRI, m_Neg(m_Reg(False)))) - return false; - Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; - return true; + Register MatchReg; + if (mi_match(False, MRI, m_Neg(m_Reg(MatchReg)))) { + Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; + False = MatchReg; + return true; + } + + // Attempt to fold: + // + // xor = G_XOR x, -1 + // select = G_SELECT cc, true, xor + // + // Into: + // select = CSINV true, x, cc + if (mi_match(False, MRI, m_Not(m_Reg(MatchReg)))) { + Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; + False = MatchReg; + return true; + } + + return false; }; // Helper lambda which tries to use CSINC/CSINV for the instruction when its @@ -1100,7 +1117,7 @@ return false; }; - Optimized |= TryOptNegIntoSelect(); + Optimized |= TryFoldBinOpIntoSelect(); Optimized |= TryOptSelectCst(); auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); Index: llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/select-select.mir @@ -437,3 +437,98 @@ %select:gpr(s32) = G_SELECT %cond(s1), %t, %sub $w0 = COPY %select(s32) RET_ReallyLR implicit $w0 +... +--- +name: csinv_s32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $w0, $w1, $w2 + ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc + + ; CHECK-LABEL: name: csinv_s32 + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: %reg0:gpr32 = COPY $w0 + ; CHECK: %reg1:gpr32 = COPY $w1 + ; CHECK: %t:gpr32 = COPY $w2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %reg0, 0, implicit-def $nzcv + ; CHECK: %select:gpr32 = CSINVWr %t, %reg1, 1, implicit $nzcv + ; CHECK: $w0 = COPY %select + ; CHECK: RET_ReallyLR implicit $w0 + %reg0:gpr(s32) = COPY $w0 + %reg1:gpr(s32) = COPY $w1 + %cond:gpr(s1) = G_TRUNC %reg0(s32) + %t:gpr(s32) = COPY $w2 + %negative_one:gpr(s32) = G_CONSTANT i32 -1 + %xor:gpr(s32) = G_XOR %reg1(s32), %negative_one + %select:gpr(s32) = G_SELECT %cond(s1), %t, %xor + $w0 = COPY %select(s32) + RET_ReallyLR implicit $w0 + +... +--- +name: csinv_s64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; G_SELECT cc, true, (G_XOR x, -1) -> CSINV true, x, cc + + ; CHECK-LABEL: name: csinv_s64 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %cond:gpr32 = COPY %reg0.sub_32 + ; CHECK: %t:gpr64 = COPY $x2 + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv + ; CHECK: %select:gpr64 = CSINVXr %t, %reg1, 1, implicit $nzcv + ; CHECK: $x0 = COPY %select + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %cond:gpr(s1) = G_TRUNC %reg0(s64) + %t:gpr(s64) = COPY $x2 + %negative_one:gpr(s64) = G_CONSTANT i64 -1 + %xor:gpr(s64) = G_XOR %reg1(s64), %negative_one + %select:gpr(s64) = G_SELECT %cond(s1), %t, %xor + $x0 = COPY %select(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: xor_not_negative_one +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + ; zext(s32 -1) != s64 -1, so we can't fold it away. + + ; CHECK-LABEL: name: xor_not_negative_one + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: %reg0:gpr64 = COPY $x0 + ; CHECK: %reg1:gpr64 = COPY $x1 + ; CHECK: %cond:gpr32 = COPY %reg0.sub_32 + ; CHECK: %t:gpr64 = COPY $x2 + ; CHECK: %negative_one:gpr32 = MOVi32imm -1 + ; CHECK: %zext:gpr64 = SUBREG_TO_REG 0, %negative_one, %subreg.sub_32 + ; CHECK: %xor:gpr64 = EORXrr %reg1, %zext + ; CHECK: [[ANDSWri:%[0-9]+]]:gpr32 = ANDSWri %cond, 0, implicit-def $nzcv + ; CHECK: %select:gpr64 = CSELXr %t, %xor, 1, implicit $nzcv + ; CHECK: $x0 = COPY %select + ; CHECK: RET_ReallyLR implicit $x0 + %reg0:gpr(s64) = COPY $x0 + %reg1:gpr(s64) = COPY $x1 + %cond:gpr(s1) = G_TRUNC %reg0(s64) + %t:gpr(s64) = COPY $x2 + %negative_one:gpr(s32) = G_CONSTANT i32 -1 + %zext:gpr(s64) = G_ZEXT %negative_one(s32) + %xor:gpr(s64) = G_XOR %reg1(s64), %zext + %select:gpr(s64) = G_SELECT %cond(s1), %t, %xor + $x0 = COPY %select(s64) + RET_ReallyLR implicit $x0