Index: lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- lib/Target/AArch64/AArch64FastISel.cpp +++ lib/Target/AArch64/AArch64FastISel.cpp @@ -2275,7 +2275,6 @@ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2307,7 +2306,7 @@ // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2338,37 +2337,6 @@ finishCondBranch(BI->getParent(), TBB, FBB); return true; } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - - finishCondBranch(BI->getParent(), TBB, FBB); - return true; - } } else if (const auto *CI = dyn_cast(BI->getCondition())) { uint64_t Imm = CI->getZExtValue(); MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; @@ -2383,20 +2351,23 @@ } else FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2404,26 +2375,19 @@ return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - // - // FIXME: Optimize this with TBZW/TBZNW. - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - + // i1 conditions come as a i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); Index: test/CodeGen/AArch64/arm64-fast-isel-br.ll =================================================================== --- test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,9 +94,7 @@ store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: b.eq LBB4_2 +; CHECK: tbz w0, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -106,9 +104,7 @@ if.end: ; preds = %if.then, %entry %1 = load i32, i32* %c.addr, align 4 -; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1 -; CHECK: cmp w[[REG]], #0 -; CHECK: b.eq LBB4_4 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4 %conv1 = trunc i32 %1 to i1 br i1 %conv1, label %if.then3, label %if.end4 @@ -118,8 +114,7 @@ if.end4: ; preds = %if.then3, %if.end %2 = load i64, i64* %d.addr, align 8 -; CHECK: cmp w{{[0-9]+}}, #0 -; CHECK: b.eq LBB4_6 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6 %conv5 = trunc i64 %2 to i1 br i1 %conv5, label %if.then7, label %if.end8 @@ -139,9 +134,7 @@ ; CHECK: trunc64 ; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 ; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 -; CHECK: cmp [[REG3]], #0 -; CHECK: b.eq LBB5_2 +; CHECK: tbz w[[REG2]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else Index: test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll +++ test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll @@ -4,8 +4,7 @@ ; CHECK-LABEL: test ; CHECK: and [[REG1:w[0-9]+]], w8, #0x3 ; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}} -; CHECK: and [[REG2:w[0-9]+]], w8, #0x1 -; CHECK-NEXT: cmp [[REG2]], #0 +; CHECK-NEXT: tbz w9, #0, %1 = trunc i64 %a to i2 %2 = trunc i64 %b to i1 ; Force fast-isel to fall back to SDAG. Index: test/CodeGen/AArch64/fast-isel-branch-cond-split.ll =================================================================== --- test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -44,9 +44,7 @@ ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne +; CHECK-NEXT: tbnz w8, #0, define i64 @test_or_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp eq i32 %a, 0 @@ -68,9 +66,7 @@ ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.eq +; CHECK-NEXT: tbz w8, #0, define i64 @test_and_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp ne i32 %a, 0