diff --git a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h --- a/llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -57,7 +57,9 @@ /// [...] /// /// Returns true if the containing function was modified. -bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg); +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); } // end namespace llvm diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/AtomicExpandUtils.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/TargetLowering.h" @@ -48,6 +49,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LowerAtomic.h" #include #include @@ -63,6 +65,16 @@ const TargetLowering *TLI = nullptr; const DataLayout *DL = nullptr; + // Atomic-expand pass creates new basic blocks at several places. + // The new control flow introduced by this pass can be simplified + // by running SimplifyCFG on such basic blocks when the pass finishes. + // CmpXchgLoopBlocks holds such basic blocks that + // needs simplification and atomic-expand runs SimplifyCFG on these + // blocks. For now, this simplification is carried out on CmpXchg Loop + // blocks only. Based on other use-cases, blocks that can benefit + // from this simplification can be added to this list. + SmallVector CmpXchgLoopBlocks; + public: static char ID; // Pass identification, replacement for typeid @@ -72,6 +84,10 @@ bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } + private: bool bracketInstWithFences(Instruction *I, AtomicOrdering Order); IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); @@ -104,7 +120,8 @@ IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI); bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); @@ -123,7 +140,8 @@ friend bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg); + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks); }; // IRBuilder to be used for replacement atomic instructions. @@ -142,9 +160,11 @@ char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, - false) - +INITIALIZE_PASS_BEGIN(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", + false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } // Helper functions to retrieve the size of atomic instructions. @@ -190,6 +210,7 @@ return false; TLI = Subtarget->getTargetLowering(); DL = &F.getParent()->getDataLayout(); + CmpXchgLoopBlocks.clear(); SmallVector AtomicInsts; @@ -337,6 +358,12 @@ } else if (CASI) MadeChange |= tryExpandAtomicCmpXchg(CASI); } + + // Run CFG Simplification on CAS loop blocks. + auto TTI = &getAnalysis().getTTI(F); + for (BasicBlock *BB : CmpXchgLoopBlocks) + simplifyCFG(BB, *TTI); + return MadeChange; } @@ -602,7 +629,7 @@ << AI->getOperationName(AI->getOperation()) << " operation at " << MemScope << " memory scope"; }); - expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun, CmpXchgLoopBlocks); } return true; } @@ -873,7 +900,8 @@ if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) { OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment, MemOpOrder, SSID, - PerformPartwordOp, createCmpXchgInstFun); + PerformPartwordOp, createCmpXchgInstFun, + CmpXchgLoopBlocks); } else { assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC); OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr, @@ -1479,7 +1507,8 @@ IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, function_ref PerformOp, - CreateCmpXchgInstFun CreateCmpXchg) { + CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { LLVMContext &Ctx = Builder.getContext(); BasicBlock *BB = Builder.GetInsertBlock(); Function *F = BB->getParent(); @@ -1501,8 +1530,9 @@ // [...] BasicBlock *ExitBB = BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end"); + CmpXchgLoopBlocks.push_back(ExitBB); BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); - + CmpXchgLoopBlocks.push_back(LoopBB); // The split call above "helpfully" added a branch at the end of BB (to the // wrong place), but we want a load. It's easiest to just remove // the branch entirely. @@ -1559,8 +1589,9 @@ } // Note: This function is exposed externally by AtomicExpandUtils.h -bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, - CreateCmpXchgInstFun CreateCmpXchg) { +bool llvm::expandAtomicRMWToCmpXchg( + AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg, + SmallVector &CmpXchgLoopBlocks) { ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout()); Builder.setIsFPConstrained( AI->getFunction()->hasFnAttribute(Attribute::StrictFP)); @@ -1574,7 +1605,7 @@ return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded, AI->getValOperand()); }, - CreateCmpXchg); + CreateCmpXchg, CmpXchgLoopBlocks); AI->replaceAllUsesWith(Loaded); AI->eraseFromParent(); @@ -1722,9 +1753,10 @@ // CAS libcall, via a CAS loop, instead. if (!Success) { expandAtomicRMWToCmpXchg( - I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, - Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, - SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { + I, + [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded, + Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder, + SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) { // Create the CAS instruction normally... AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg( Addr, Loaded, NewVal, Alignment, MemOpOrder, @@ -1734,7 +1766,8 @@ // ...and then expand the CAS into a libcall. expandAtomicCASToLibcall(Pair); - }); + }, + CmpXchgLoopBlocks); } } diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -550,18 +550,6 @@ if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive) addPass(createSVEIntrinsicOptsPass()); - // Cmpxchg instructions are often used with a subsequent comparison to - // determine whether it succeeded. We can exploit existing control-flow in - // ldrex/strex loops to simplify this, but it needs tidying up. - if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) - addPass(createCFGSimplificationPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchRangeToICmp(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); - // Run LoopDataPrefetch // // Run this before LSR to remove the multiplies involved in computing the diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -12,7 +12,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp w8, w1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB0_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stxr w9, w2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB0_1 @@ -63,7 +63,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp w8, w1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB1_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stxr w10, w9, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB1_1 @@ -117,7 +117,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp w8, w1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB2_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB2_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stlxr w9, w2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB2_1 @@ -167,7 +167,7 @@ ; CHECK-NOLSE-O1-NEXT: ldxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB3_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB3_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stxr w9, x2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB3_1 @@ -217,7 +217,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB4_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB4_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB4_1 @@ -267,7 +267,7 @@ ; CHECK-NOLSE-O1-NEXT: ldaxr x8, [x0] ; CHECK-NOLSE-O1-NEXT: cmp x8, x1 ; CHECK-NOLSE-O1-NEXT: b.ne LBB5_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB5_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stlxr w9, x2, [x0] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB5_1 @@ -2736,7 +2736,7 @@ ; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xff ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxtb ; CHECK-NOLSE-O1-NEXT: b.ne LBB47_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB47_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stxrb w9, w2, [x8] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB47_1 @@ -2802,7 +2802,7 @@ ; CHECK-NOLSE-O1-NEXT: and w9, w0, #0xffff ; CHECK-NOLSE-O1-NEXT: cmp w9, w1, uxth ; CHECK-NOLSE-O1-NEXT: b.ne LBB48_4 -; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NOLSE-O1-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NOLSE-O1-NEXT: ; in Loop: Header=BB48_1 Depth=1 ; CHECK-NOLSE-O1-NEXT: stxrh w9, w2, [x8] ; CHECK-NOLSE-O1-NEXT: cbnz w9, LBB48_1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} @@ -57,7 +57,7 @@ ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $w1, $x0, $x8, $x9 ; CHECK-NEXT: {{ $}} @@ -97,7 +97,7 @@ ; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $w1, $w2, $x0, $x8 ; CHECK-NEXT: {{ $}} @@ -136,7 +136,7 @@ ; CHECK-NEXT: $xzr = SUBSXrs renamable $x8, renamable $x1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} @@ -175,7 +175,7 @@ ; CHECK-NEXT: $xzr = SUBSXrs renamable $x8, renamable $x1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} @@ -214,7 +214,7 @@ ; CHECK-NEXT: $xzr = SUBSXrs renamable $x8, renamable $x1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $x0, $x1, $x2, $x8 ; CHECK-NEXT: {{ $}} @@ -1262,7 +1262,7 @@ ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 0, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8 ; CHECK-NEXT: {{ $}} @@ -1305,7 +1305,7 @@ ; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 8, implicit-def $nzcv, pcsections !0 ; CHECK-NEXT: Bcc 1, %bb.4, implicit killed $nzcv, pcsections !0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.cmpxchg.trystore: + ; CHECK-NEXT: bb.2.cmpxchg.fencedstore: ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) ; CHECK-NEXT: liveins: $w1, $x0, $x2, $x8 ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll @@ -5,28 +5,33 @@ define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x20000000), %bb.4(0x60000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.3 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x20000000), %bb.4(0x60000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x2aaaaaab), %bb.3(0x55555555) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 @@ -44,29 +49,34 @@ define void @or_cond_select(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond_select ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x20000000), %bb.4(0x60000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT [[ICMP1]](s1), [[C2]], [[ICMP]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.3 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x20000000), %bb.4(0x60000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT [[ICMP1]](s1), [[C2]], [[ICMP]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x2aaaaaab), %bb.3(0x55555555) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 @@ -84,28 +94,33 @@ define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: and_cond ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.4(0x60000000), %bb.2(0x20000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.3(0x55555555), %bb.2(0x2aaaaaab) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.4(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 @@ -123,29 +138,34 @@ define void @and_cond_select(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: and_cond_select ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.4(0x60000000), %bb.2(0x20000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT [[ICMP1]](s1), [[ICMP]], [[C2]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.3(0x55555555), %bb.2(0x2aaaaaab) - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.4(0x60000000), %bb.3(0x20000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s1) = G_SELECT [[ICMP1]](s1), [[ICMP]], [[C2]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: G_BRCOND [[ICMP2]](s1), %bb.4 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x55555555), %bb.3(0x2aaaaaab) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 @@ -164,21 +184,24 @@ define void @or_cond_same_values_cmp(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond_same_values_cmp ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] - ; CHECK: G_BRCOND [[OR]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 5 %tmp3 = icmp slt i32 %X, 5 @@ -197,34 +220,41 @@ define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond_multiple_cases ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x10000000), %bb.5(0x70000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] - ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[OR]], [[ICMP2]] - ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.3 - ; CHECK: G_BR %bb.5 - ; CHECK: bb.5.entry: - ; CHECK: successors: %bb.3(0x12492492), %bb.4(0x6db6db6e) - ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.3 - ; CHECK: G_BR %bb.4 - ; CHECK: bb.4.entry: - ; CHECK: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555) - ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] - ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x10000000), %bb.5(0x70000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s1) = G_OR [[OR]], [[ICMP2]] + ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.entry: + ; CHECK-NEXT: successors: %bb.2(0x12492492), %bb.4(0x6db6db6e) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP4]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.entry: + ; CHECK-NEXT: successors: %bb.2(0x2aaaaaab), %bb.3(0x55555555) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK-NEXT: G_BRCOND [[ICMP5]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 5 %tmp3 = icmp slt i32 %X, 5 @@ -246,21 +276,24 @@ define void @or_cond_ne_null(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: or_cond_ne_null ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] - ; CHECK: G_BRCOND [[OR]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp ne i32 %X, 0 %tmp3 = icmp ne i32 %Y, 0 @@ -281,22 +314,25 @@ define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { ; CHECK-LABEL: name: unpredictable ; CHECK: bb.1.entry: - ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK: liveins: $w0, $w1, $w2 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] - ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] - ; CHECK: G_BRCOND [[OR]](s1), %bb.3 - ; CHECK: G_BR %bb.2 - ; CHECK: bb.2.common.ret: - ; CHECK: RET_ReallyLR - ; CHECK: bb.3.cond_true: - ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: liveins: $w0, $w1, $w2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK-NEXT: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK-NEXT: G_BR %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.cond_true: + ; CHECK-NEXT: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.UnifiedReturnBlock: + ; CHECK-NEXT: RET_ReallyLR entry: %tmp1 = icmp eq i32 %X, 0 %tmp3 = icmp slt i32 %Y, 5 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-hoisted-constants.ll @@ -11,7 +11,7 @@ define i32 @test(i32 %a, i1 %c) { ; TRANSLATED-LABEL: name: test ; TRANSLATED: bb.1.entry: - ; TRANSLATED-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; TRANSLATED-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; TRANSLATED-NEXT: liveins: $w0, $w1 ; TRANSLATED-NEXT: {{ $}} ; TRANSLATED-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 @@ -22,78 +22,75 @@ ; TRANSLATED-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 100000 ; TRANSLATED-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; TRANSLATED-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C]] - ; TRANSLATED-NEXT: G_BRCOND [[TRUNC1]](s1), %bb.3 - ; TRANSLATED-NEXT: G_BR %bb.2 - ; TRANSLATED-NEXT: {{ $}} - ; TRANSLATED-NEXT: bb.2.common.ret: - ; TRANSLATED-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.3, [[C1]](s32), %bb.1 - ; TRANSLATED-NEXT: $w0 = COPY [[PHI]](s32) - ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0 - ; TRANSLATED-NEXT: {{ $}} - ; TRANSLATED-NEXT: bb.3.cont: - ; TRANSLATED-NEXT: successors: %bb.2(0x80000000) + ; TRANSLATED-NEXT: G_BRCOND [[TRUNC1]](s1), %bb.2 + ; TRANSLATED-NEXT: G_BR %bb.3 ; TRANSLATED-NEXT: {{ $}} + ; TRANSLATED-NEXT: bb.2.cont: ; TRANSLATED-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[CONSTANT_FOLD_BARRIER]] ; TRANSLATED-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; TRANSLATED-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; TRANSLATED-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; TRANSLATED-NEXT: G_BR %bb.2 + ; TRANSLATED-NEXT: $w0 = COPY [[ADD]](s32) + ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0 + ; TRANSLATED-NEXT: {{ $}} + ; TRANSLATED-NEXT: bb.3.end: + ; TRANSLATED-NEXT: $w0 = COPY [[C1]](s32) + ; TRANSLATED-NEXT: RET_ReallyLR implicit $w0 + ; ; PRESELECTION-LABEL: name: test ; PRESELECTION: bb.1.entry: - ; PRESELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; PRESELECTION-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; PRESELECTION-NEXT: liveins: $w0, $w1 ; PRESELECTION-NEXT: {{ $}} ; PRESELECTION-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 ; PRESELECTION-NEXT: [[COPY1:%[0-9]+]]:gpr(s32) = COPY $w1 ; PRESELECTION-NEXT: [[TRUNC:%[0-9]+]]:gpr(s8) = G_TRUNC [[COPY1]](s32) ; PRESELECTION-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:gpr(s8) = G_ASSERT_ZEXT [[TRUNC]], 1 - ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 - ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000 - ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] - ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; PRESELECTION-NEXT: [[C:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 100000 + ; PRESELECTION-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:gpr(s32) = G_CONSTANT_FOLD_BARRIER [[C]] ; PRESELECTION-NEXT: [[ANYEXT:%[0-9]+]]:gpr(s32) = G_ANYEXT [[ASSERT_ZEXT]](s8) - ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[ANYEXT]], [[C2]] + ; PRESELECTION-NEXT: [[C1:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 1 + ; PRESELECTION-NEXT: [[XOR:%[0-9]+]]:gpr(s32) = G_XOR [[ANYEXT]], [[C1]] + ; PRESELECTION-NEXT: [[AND:%[0-9]+]]:gpr(s32) = G_AND [[XOR]], [[C1]] ; PRESELECTION-NEXT: G_BRCOND [[AND]](s32), %bb.3 ; PRESELECTION-NEXT: G_BR %bb.2 ; PRESELECTION-NEXT: {{ $}} - ; PRESELECTION-NEXT: bb.2.common.ret: - ; PRESELECTION-NEXT: [[PHI:%[0-9]+]]:gpr(s32) = G_PHI %7(s32), %bb.3, [[C]](s32), %bb.1 - ; PRESELECTION-NEXT: $w0 = COPY [[PHI]](s32) - ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0 - ; PRESELECTION-NEXT: {{ $}} - ; PRESELECTION-NEXT: bb.3.cont: - ; PRESELECTION-NEXT: successors: %bb.2(0x80000000) - ; PRESELECTION-NEXT: {{ $}} + ; PRESELECTION-NEXT: bb.2.cont: ; PRESELECTION-NEXT: [[ADD:%[0-9]+]]:gpr(s32) = G_ADD [[COPY]], [[CONSTANT_FOLD_BARRIER]] ; PRESELECTION-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; PRESELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; PRESELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; PRESELECTION-NEXT: G_BR %bb.2 + ; PRESELECTION-NEXT: $w0 = COPY [[ADD]](s32) + ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0 + ; PRESELECTION-NEXT: {{ $}} + ; PRESELECTION-NEXT: bb.3.end: + ; PRESELECTION-NEXT: [[C2:%[0-9]+]]:gpr(s32) = G_CONSTANT i32 0 + ; PRESELECTION-NEXT: $w0 = COPY [[C2]](s32) + ; PRESELECTION-NEXT: RET_ReallyLR implicit $w0 + ; ; POSTSELECTION-LABEL: name: test ; POSTSELECTION: bb.1.entry: - ; POSTSELECTION-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; POSTSELECTION-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; POSTSELECTION-NEXT: liveins: $w0, $w1 ; POSTSELECTION-NEXT: {{ $}} ; POSTSELECTION-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 ; POSTSELECTION-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 - ; POSTSELECTION-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr ; POSTSELECTION-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 100000 - ; POSTSELECTION-NEXT: TBNZW [[COPY1]], 0, %bb.3 + ; POSTSELECTION-NEXT: TBZW [[COPY1]], 0, %bb.3 ; POSTSELECTION-NEXT: B %bb.2 ; POSTSELECTION-NEXT: {{ $}} - ; POSTSELECTION-NEXT: bb.2.common.ret: - ; POSTSELECTION-NEXT: [[PHI:%[0-9]+]]:gpr32 = PHI %7, %bb.3, [[COPY2]], %bb.1 - ; POSTSELECTION-NEXT: $w0 = COPY [[PHI]] - ; POSTSELECTION-NEXT: RET_ReallyLR implicit $w0 - ; POSTSELECTION-NEXT: {{ $}} - ; POSTSELECTION-NEXT: bb.3.cont: - ; POSTSELECTION-NEXT: successors: %bb.2(0x80000000) - ; POSTSELECTION-NEXT: {{ $}} + ; POSTSELECTION-NEXT: bb.2.cont: ; POSTSELECTION-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[MOVi32imm]] ; POSTSELECTION-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; POSTSELECTION-NEXT: BL @callee, csr_darwin_aarch64_aapcs, implicit-def $lr, implicit $sp ; POSTSELECTION-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp - ; POSTSELECTION-NEXT: B %bb.2 + ; POSTSELECTION-NEXT: $w0 = COPY [[ADDWrr]] + ; POSTSELECTION-NEXT: RET_ReallyLR implicit $w0 + ; POSTSELECTION-NEXT: {{ $}} + ; POSTSELECTION-NEXT: bb.3.end: + ; POSTSELECTION-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $wzr + ; POSTSELECTION-NEXT: $w0 = COPY [[COPY2]] + ; POSTSELECTION-NEXT: RET_ReallyLR implicit $w0 entry: %hc = bitcast i32 100000 to i32 br i1 %c, label %cont, label %end diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-dot-crash.ll @@ -10,13 +10,15 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, #1.00000000 ; CHECK-NEXT: fadd d0, d0, d1 -; CHECK-NEXT: fadd d0, d0, d1 -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: fadd d0, d0, d1 -; CHECK-NEXT: fsqrt d0, d0 -; CHECK-NEXT: fmul d2, d0, d1 -; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: fcsel d0, d1, d2, gt +; CHECK-NEXT: fadd d1, d0, d1 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: fadd d1, d1, d0 +; CHECK-NEXT: fsqrt d1, d1 +; CHECK-NEXT: fcmp d1, #0.0 +; CHECK-NEXT: b.gt .LBB0_2 +; CHECK-NEXT: // %bb.1: // %bb.1 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: .LBB0_2: // %exit ; CHECK-NEXT: ret entry: %fadd = call double @llvm.vector.reduce.fadd.v3f64(double %a, <3 x double> ) diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -133,14 +133,18 @@ define i64 @multi_use_non_memory(i64 %a, i64 %b) { ; CHECK-LABEL: multi_use_non_memory: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsl x8, x0, #3 -; CHECK-NEXT: lsl x9, x1, #3 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.lt .LBB3_2 +; CHECK-NEXT: lsl x0, x0, #3 +; CHECK-NEXT: lsl x8, x1, #3 +; CHECK-NEXT: cmp x0, x8 +; CHECK-NEXT: b.lt .LBB3_4 ; CHECK-NEXT: // %bb.1: // %falsebb -; CHECK-NEXT: csel x0, x8, x9, gt +; CHECK-NEXT: b.le .LBB3_3 +; CHECK-NEXT: // %bb.2: // %exitbb +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_3: // %endbb +; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_2: // %truebb +; CHECK-NEXT: .LBB3_4: // %truebb ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -193,9 +193,9 @@ ; CHECK-NEXT: b.lt .LBB3_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: cmp w3, #15 +; CHECK-NEXT: cmp w3, #16 ; CHECK-NEXT: mov w9, w3 -; CHECK-NEXT: b.hi .LBB3_3 +; CHECK-NEXT: b.hs .LBB3_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB3_6 @@ -220,7 +220,7 @@ ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB3_8 -; CHECK-NEXT: .LBB3_6: // %for.body.preheader1 +; CHECK-NEXT: .LBB3_6: // %for.body.preheader14 ; CHECK-NEXT: add x11, x2, x10, lsl #2 ; CHECK-NEXT: add x12, x0, x10, lsl #1 ; CHECK-NEXT: sub x9, x9, x10 @@ -304,10 +304,10 @@ ; CHECK-NEXT: cmp w3, #1 ; CHECK-NEXT: b.lt .LBB4_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w3, #15 +; CHECK-NEXT: cmp w3, #16 ; CHECK-NEXT: and w8, w1, #0xffff ; CHECK-NEXT: mov w9, w3 -; CHECK-NEXT: b.hi .LBB4_3 +; CHECK-NEXT: b.hs .LBB4_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB4_6 @@ -332,7 +332,7 @@ ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB4_8 -; CHECK-NEXT: .LBB4_6: // %for.body.preheader1 +; CHECK-NEXT: .LBB4_6: // %for.body.preheader14 ; CHECK-NEXT: add x11, x2, x10, lsl #2 ; CHECK-NEXT: add x12, x0, x10, lsl #1 ; CHECK-NEXT: sub x9, x9, x10 @@ -416,9 +416,9 @@ ; CHECK-NEXT: cbz w2, .LBB5_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxtb w9, w1 -; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: cmp w2, #16 ; CHECK-NEXT: mov w10, w2 -; CHECK-NEXT: b.hi .LBB5_4 +; CHECK-NEXT: b.hs .LBB5_4 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x11, xzr ; CHECK-NEXT: mov w8, wzr @@ -450,7 +450,7 @@ ; CHECK-NEXT: addv h0, v0.8h ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: b.eq .LBB5_9 -; CHECK-NEXT: .LBB5_7: // %for.body.preheader1 +; CHECK-NEXT: .LBB5_7: // %for.body.preheader17 ; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: add x11, x0, x11 ; CHECK-NEXT: .LBB5_8: // %for.body diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -301,13 +301,13 @@ ; CHECK-NEXT: b.gt .LBB16_6 ; CHECK-NEXT: // %bb.4: // %test5 ; CHECK-NEXT: add w11, w9, #4 -; CHECK-NEXT: cmn w10, #443 +; CHECK-NEXT: cmn w10, #444 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: b.ge .LBB16_6 +; CHECK-NEXT: b.gt .LBB16_6 ; CHECK-NEXT: // %bb.5: // %test6 ; CHECK-NEXT: add w9, w9, #5 ; CHECK-NEXT: str w9, [x8] -; CHECK-NEXT: .LBB16_6: // %common.ret +; CHECK-NEXT: .LBB16_6: // %ret ; CHECK-NEXT: ret %val = load i32, ptr @var_i32 %val2 = load i32, ptr @var2_i32 diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -69,7 +69,12 @@ ; CHECK-NEXT: add w8, w0, #74 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: cmp w8, #236 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: b.hs LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %ret_true +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %ret_false +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %0 = add i8 %x, 74 @@ -86,14 +91,24 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: sub w8, w0, #10 ; CHECK-SD-NEXT: cmp w8, #89 -; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: b.ls LBB2_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB2_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_1: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: sub w8, w0, #10 ; CHECK-GI-NEXT: cmp w8, #90 -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB2_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB2_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, 246 @@ -109,7 +124,12 @@ ; CHECK-SD-LABEL: test8_2: ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: cmp w0, #208 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB3_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB3_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_2: @@ -117,7 +137,12 @@ ; CHECK-GI-NEXT: sub w8, w0, #29 ; CHECK-GI-NEXT: and w8, w8, #0xff ; CHECK-GI-NEXT: cmp w8, #179 -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB3_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB3_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, 227 @@ -133,7 +158,12 @@ ; CHECK-SD-LABEL: test8_3: ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: cmp w0, #209 -; CHECK-SD-NEXT: cset w0, eq +; CHECK-SD-NEXT: b.ne LBB4_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB4_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_3: @@ -141,7 +171,12 @@ ; CHECK-GI-NEXT: sub w8, w0, #55 ; CHECK-GI-NEXT: and w8, w8, #0xff ; CHECK-GI-NEXT: cmp w8, #154 -; CHECK-GI-NEXT: cset w0, eq +; CHECK-GI-NEXT: b.ne LBB4_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB4_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, 201 @@ -157,7 +192,12 @@ ; CHECK-SD-LABEL: test8_4: ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: cmp w0, #39 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB5_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB5_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_4: @@ -165,7 +205,12 @@ ; CHECK-GI-NEXT: sub w8, w0, #79 ; CHECK-GI-NEXT: and w8, w8, #0xff ; CHECK-GI-NEXT: cmp w8, #216 -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB5_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB5_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, -79 @@ -182,14 +227,24 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: sub w8, w0, #123 ; CHECK-SD-NEXT: cmn w8, #106 -; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: b.ls LBB6_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB6_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_5: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: sub w8, w0, #123 ; CHECK-GI-NEXT: cmn w8, #105 -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB6_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB6_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, 133 @@ -206,14 +261,24 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: sub w8, w0, #58 ; CHECK-SD-NEXT: cmp w8, #154 -; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: b.ls LBB7_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB7_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_6: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: sub w8, w0, #58 ; CHECK-GI-NEXT: cmp w8, #155 -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB7_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB7_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, -58 @@ -230,7 +295,12 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: sub w8, w0, #31 ; CHECK-NEXT: cmp w8, #124 -; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: b.hs LBB8_2 +; CHECK-NEXT: ; %bb.1: ; %ret_true +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB8_2: ; %ret_false +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret entry: %0 = add i8 %x, 225 @@ -248,14 +318,24 @@ ; CHECK-SD-LABEL: test8_8: ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: cmp w0, #66 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB9_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB9_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test8_8: ; CHECK-GI: ; %bb.0: ; %entry ; CHECK-GI-NEXT: sub w8, w0, #66 ; CHECK-GI-NEXT: cmp w8, #1 -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB9_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB9_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i8 %x, 190 @@ -272,7 +352,12 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: mov w8, #5086 ; =0x13de ; CHECK-SD-NEXT: cmp w0, w8 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB10_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB10_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_0: @@ -281,7 +366,12 @@ ; CHECK-GI-NEXT: mov w9, #23633 ; =0x5c51 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB10_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB10_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, -46989 @@ -300,7 +390,12 @@ ; CHECK-SD-NEXT: mov w9, #40700 ; =0x9efc ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth -; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: b.ls LBB11_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB11_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_2: @@ -309,7 +404,12 @@ ; CHECK-GI-NEXT: mov w9, #40699 ; =0x9efb ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB11_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB11_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, 16882 @@ -326,7 +426,12 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: mov w8, #53200 ; =0xcfd0 ; CHECK-SD-NEXT: cmp w0, w8 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB12_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB12_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_3: @@ -335,7 +440,12 @@ ; CHECK-GI-NEXT: mov w9, #16947 ; =0x4233 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB12_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB12_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, 29283 @@ -354,7 +464,12 @@ ; CHECK-SD-NEXT: mov w9, #15676 ; =0x3d3c ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth -; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: b.hs LBB13_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB13_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_4: @@ -363,7 +478,12 @@ ; CHECK-GI-NEXT: mov w9, #15677 ; =0x3d3d ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, ls +; CHECK-GI-NEXT: b.hi LBB13_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB13_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, -35551 @@ -380,7 +500,12 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: mov w8, #23282 ; =0x5af2 ; CHECK-SD-NEXT: cmp w0, w8 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB14_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB14_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_5: @@ -389,7 +514,12 @@ ; CHECK-GI-NEXT: mov w9, #63604 ; =0xf874 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB14_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB14_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, -25214 @@ -408,7 +538,12 @@ ; CHECK-SD-NEXT: mov w9, #24320 ; =0x5f00 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w8, w9 -; CHECK-SD-NEXT: cset w0, hi +; CHECK-SD-NEXT: b.ls LBB15_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB15_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_6: @@ -417,7 +552,12 @@ ; CHECK-GI-NEXT: mov w9, #24321 ; =0x5f01 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w8, w9 -; CHECK-GI-NEXT: cset w0, hs +; CHECK-GI-NEXT: b.lo LBB15_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB15_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, -32194 @@ -436,7 +576,12 @@ ; CHECK-SD-NEXT: mov w9, #22619 ; =0x585b ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth -; CHECK-SD-NEXT: cset w0, lo +; CHECK-SD-NEXT: b.hs LBB16_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB16_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_7: @@ -445,7 +590,12 @@ ; CHECK-GI-NEXT: mov w9, #22620 ; =0x585c ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth -; CHECK-GI-NEXT: cset w0, ls +; CHECK-GI-NEXT: b.hi LBB16_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB16_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, 9272 @@ -462,7 +612,12 @@ ; CHECK-SD: ; %bb.0: ; %entry ; CHECK-SD-NEXT: mov w8, #4919 ; =0x1337 ; CHECK-SD-NEXT: cmp w0, w8 -; CHECK-SD-NEXT: cset w0, ne +; CHECK-SD-NEXT: b.eq LBB17_2 +; CHECK-SD-NEXT: ; %bb.1: ; %ret_true +; CHECK-SD-NEXT: mov w0, #1 ; =0x1 +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: LBB17_2: ; %ret_false +; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_8: @@ -470,7 +625,12 @@ ; CHECK-GI-NEXT: mov w8, #6706 ; =0x1a32 ; CHECK-GI-NEXT: add w9, w0, #1787 ; CHECK-GI-NEXT: cmp w8, w9, uxth -; CHECK-GI-NEXT: cset w0, ne +; CHECK-GI-NEXT: b.eq LBB17_2 +; CHECK-GI-NEXT: ; %bb.1: ; %ret_true +; CHECK-GI-NEXT: mov w0, #1 ; =0x1 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB17_2: ; %ret_false +; CHECK-GI-NEXT: mov w0, wzr ; CHECK-GI-NEXT: ret entry: %0 = add i16 %x, -63749 diff --git a/llvm/test/CodeGen/AArch64/andorbrcompare.ll b/llvm/test/CodeGen/AArch64/andorbrcompare.ll --- a/llvm/test/CodeGen/AArch64/andorbrcompare.ll +++ b/llvm/test/CodeGen/AArch64/andorbrcompare.ll @@ -13,12 +13,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.lo .LBB0_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB0_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_eq_ne_ult: @@ -31,13 +32,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB0_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.lo .LBB0_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp eq i32 %s0, %s1 @@ -64,12 +66,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.ls .LBB1_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB1_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ne_ult_ule: @@ -82,13 +85,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB1_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.ls .LBB1_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp ne i32 %s0, %s1 @@ -115,12 +119,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.hi .LBB2_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB2_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ult_ule_ugt: @@ -133,13 +138,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB2_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.hi .LBB2_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB2_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp ult i32 %s0, %s1 @@ -166,12 +172,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.hs .LBB3_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB3_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ule_ugt_uge: @@ -184,13 +191,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB3_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.hs .LBB3_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB3_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp ule i32 %s0, %s1 @@ -217,12 +225,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.lt .LBB4_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB4_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ugt_uge_slt: @@ -235,13 +244,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB4_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.lt .LBB4_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB4_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp ugt i32 %s0, %s1 @@ -268,12 +278,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.le .LBB5_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB5_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_uge_slt_sle: @@ -286,13 +297,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB5_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.le .LBB5_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB5_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp uge i32 %s0, %s1 @@ -319,12 +331,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.gt .LBB6_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB6_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_slt_sle_sgt: @@ -337,13 +350,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB6_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.gt .LBB6_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB6_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp slt i32 %s0, %s1 @@ -370,12 +384,13 @@ ; SDISEL-NEXT: // %bb.1: // %entry ; SDISEL-NEXT: cmp w4, w5 ; SDISEL-NEXT: b.ge .LBB7_3 -; SDISEL-NEXT: // %bb.2: +; SDISEL-NEXT: // %bb.2: // %else ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret ; SDISEL-NEXT: .LBB7_3: // %if +; SDISEL-NEXT: mov w8, #1 // =0x1 ; SDISEL-NEXT: mov w0, #1 // =0x1 -; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: str w8, [x6] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_sle_sgt_sge: @@ -388,13 +403,14 @@ ; GISEL-NEXT: tbnz w8, #0, .LBB7_3 ; GISEL-NEXT: // %bb.1: // %entry ; GISEL-NEXT: cmp w4, w5 -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.ge .LBB7_3 -; GISEL-NEXT: // %bb.2: // %common.ret +; GISEL-NEXT: // %bb.2: // %else +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB7_3: // %if +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: mov w0, #1 // =0x1 -; GISEL-NEXT: str w0, [x6] +; GISEL-NEXT: str w8, [x6] ; GISEL-NEXT: ret entry: %c0 = icmp sle i32 %s0, %s1 diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB0_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -42,7 +42,7 @@ ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; SDISEL-NEXT: LBB1_2: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_different: @@ -55,7 +55,7 @@ ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; GISEL-NEXT: LBB1_2: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sle i32 %a, 5 @@ -88,7 +88,7 @@ ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; SDISEL-NEXT: LBB2_3: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_flagclobber: @@ -106,7 +106,7 @@ ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; GISEL-NEXT: LBB2_3: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -144,7 +144,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB3_3: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -178,13 +178,13 @@ ; SDISEL-NEXT: ccmp w8, #16, #0, ge ; SDISEL-NEXT: b.le LBB4_2 ; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; SDISEL-NEXT: LBB4_2: ; %if.then ; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: speculate_division: @@ -194,13 +194,13 @@ ; GISEL-NEXT: ccmp w8, #17, #0, gt ; GISEL-NEXT: b.lt LBB4_2 ; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB4_2: ; %if.then ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 @@ -230,13 +230,13 @@ ; SDISEL-NEXT: fccmp s0, s1, #8, ge ; SDISEL-NEXT: b.ge LBB5_2 ; SDISEL-NEXT: ; %bb.1: ; %if.end -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; SDISEL-NEXT: LBB5_2: ; %if.then ; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; SDISEL-NEXT: bl _foo ; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; SDISEL-NEXT: mov w0, #7 +; SDISEL-NEXT: mov w0, #7 ; =0x7 ; SDISEL-NEXT: ret ; ; GISEL-LABEL: single_fcmp: @@ -248,13 +248,13 @@ ; GISEL-NEXT: fccmp s0, s1, #8, gt ; GISEL-NEXT: b.ge LBB5_2 ; GISEL-NEXT: ; %bb.1: ; %if.end -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret ; GISEL-NEXT: LBB5_2: ; %if.then ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; GISEL-NEXT: bl _foo ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; GISEL-NEXT: mov w0, #7 +; GISEL-NEXT: mov w0, #7 ; =0x7 ; GISEL-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 @@ -318,7 +318,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB7_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -346,13 +346,13 @@ ; CHECK-NEXT: cmp w1, #32 ; CHECK-NEXT: b.eq LBB8_3 ; CHECK-NEXT: ; %bb.2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB8_3: ; %if.then ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 @@ -380,7 +380,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB9_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -408,7 +408,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB10_2: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 @@ -431,9 +431,40 @@ ; Test case distilled from 126.gcc. ; The phi in sw.bb.i.i gets multiple operands for the %entry predecessor. define void @build_modify_expr() nounwind ssp { -; CHECK-LABEL: build_modify_expr: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ret +; SDISEL-LABEL: build_modify_expr: +; SDISEL: ; %bb.0: ; %entry +; SDISEL-NEXT: cmp w8, #37 +; SDISEL-NEXT: mov w8, #1 ; =0x1 +; SDISEL-NEXT: lsl x8, x8, xzr +; SDISEL-NEXT: mov x9, #31 ; =0x1f +; SDISEL-NEXT: movk x9, #48, lsl #32 +; SDISEL-NEXT: and x8, x8, x9 +; SDISEL-NEXT: ccmp x8, #0, #4, ls +; SDISEL-NEXT: b.eq LBB11_2 +; SDISEL-NEXT: ; %bb.1: ; %if.end85 +; SDISEL-NEXT: ret +; SDISEL-NEXT: LBB11_2: ; %sw.bb.i.i.preheader +; SDISEL-NEXT: ; implicit-def: $x8 +; SDISEL-NEXT: LBB11_3: ; %sw.bb.i.i +; SDISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; SDISEL-NEXT: ldr x8, [x8, #32] +; SDISEL-NEXT: b LBB11_3 +; +; GISEL-LABEL: build_modify_expr: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: ; implicit-def: $x8 +; GISEL-NEXT: cmp w8, #37 +; GISEL-NEXT: b.hi LBB11_3 +; GISEL-NEXT: ; %bb.1: ; %entry +; GISEL-NEXT: mov w9, #1 ; =0x1 +; GISEL-NEXT: ; implicit-def: $x8 +; GISEL-NEXT: tbz w9, #0, LBB11_3 +; GISEL-NEXT: ; %bb.2: ; %if.end85 +; GISEL-NEXT: ret +; GISEL-NEXT: LBB11_3: ; %sw.bb.i.i +; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1 +; GISEL-NEXT: ldr x8, [x8, #32] +; GISEL-NEXT: b LBB11_3 entry: switch i32 undef, label %sw.bb.i.i [ i32 69, label %if.end85 @@ -466,7 +497,7 @@ ; ; GISEL-LABEL: select_and: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #0, ne ; GISEL-NEXT: csel x0, x2, x3, lt @@ -488,7 +519,7 @@ ; ; GISEL-LABEL: select_or: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #8, eq ; GISEL-NEXT: csel x0, x2, x3, lt @@ -510,7 +541,7 @@ ; ; GISEL-LABEL: select_or_float: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #5 +; GISEL-NEXT: mov w8, #5 ; =0x5 ; GISEL-NEXT: cmp w8, w1 ; GISEL-NEXT: ccmp w0, w1, #8, eq ; GISEL-NEXT: fcsel s0, s0, s1, lt @@ -528,13 +559,13 @@ ; SDISEL-NEXT: cmp x0, #2 ; SDISEL-NEXT: ccmp x0, #4, #4, ne ; SDISEL-NEXT: ccmp x1, #0, #0, eq -; SDISEL-NEXT: mov w8, #1 +; SDISEL-NEXT: mov w8, #1 ; =0x1 ; SDISEL-NEXT: cinc x0, x8, eq ; SDISEL-NEXT: ret ; ; GISEL-LABEL: gccbug: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #2 +; GISEL-NEXT: mov w8, #2 ; =0x2 ; GISEL-NEXT: cmp x0, #2 ; GISEL-NEXT: ccmp x0, #4, #4, ne ; GISEL-NEXT: ccmp x1, #0, #0, eq @@ -592,7 +623,7 @@ ; SDISEL-LABEL: select_andor32: ; SDISEL: ; %bb.0: ; SDISEL-NEXT: cmp w1, w2 -; SDISEL-NEXT: mov w8, #32 +; SDISEL-NEXT: mov w8, #32 ; =0x20 ; SDISEL-NEXT: ccmp w0, w8, #4, lt ; SDISEL-NEXT: ccmp w0, w1, #0, eq ; SDISEL-NEXT: csel w0, w0, w1, eq @@ -600,7 +631,7 @@ ; ; GISEL-LABEL: select_andor32: ; GISEL: ; %bb.0: -; GISEL-NEXT: mov w8, #32 +; GISEL-NEXT: mov w8, #32 ; =0x20 ; GISEL-NEXT: cmp w1, w2 ; GISEL-NEXT: ccmp w0, w8, #4, lt ; GISEL-NEXT: ccmp w0, w1, #0, eq @@ -701,11 +732,11 @@ ; SDISEL-NEXT: ccmp w0, #13, #0, ge ; SDISEL-NEXT: cset w8, gt ; SDISEL-NEXT: cmp w0, #22 -; SDISEL-NEXT: mov w9, #44 +; SDISEL-NEXT: mov w9, #44 ; =0x2c ; SDISEL-NEXT: ccmp w0, w9, #0, ge ; SDISEL-NEXT: csel w8, wzr, w8, le ; SDISEL-NEXT: cmp w0, #99 -; SDISEL-NEXT: mov w9, #77 +; SDISEL-NEXT: mov w9, #77 ; =0x4d ; SDISEL-NEXT: ccmp w0, w9, #4, ne ; SDISEL-NEXT: cset w9, eq ; SDISEL-NEXT: tst w8, w9 diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -78,9 +78,13 @@ ; make sure we can handle branch instruction in optimizeCompare. define i32@foo6(i32 %a, i32 %b) nounwind ssp { ; CHECK-LABEL: foo6: -; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: csinc w0, w8, wzr, le +; CHECK: // %bb.0: +; CHECK-NEXT: sub w0, w0, w1 +; CHECK-NEXT: cmp w0, #1 +; CHECK-NEXT: b.lt .LBB5_2 +; CHECK-NEXT: // %bb.1: // %l.if +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: .LBB5_2: // %l.else ; CHECK-NEXT: ret %sub = sub nsw i32 %a, %b %cmp = icmp sgt i32 %sub, 0 @@ -97,12 +101,14 @@ define i32 @foo7(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: foo7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: subs w8, w0, w1 -; CHECK-NEXT: cneg w9, w8, mi -; CHECK-NEXT: cmn w8, #1 -; CHECK-NEXT: csel w10, w9, w0, lt -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w0, w10, w9, ge +; CHECK-NEXT: subs w9, w0, w1 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cneg w0, w9, mi +; CHECK-NEXT: tbnz w9, #31, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: cmn w9, #1 +; CHECK-NEXT: csel w0, w0, w8, lt +; CHECK-NEXT: .LBB6_2: // %if.else ; CHECK-NEXT: ret entry: %sub = sub nsw i32 %a, %b diff --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll --- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll +++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll @@ -33,14 +33,15 @@ ; CHECK-NEXT: add x9, x0, x2 ; CHECK-NEXT: sub x9, x9, #244, lsl #12 ; =999424 ; CHECK-NEXT: cmp x9, #575 -; CHECK-NEXT: b.eq LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %else +; CHECK-NEXT: b.ne LBB0_2 +; CHECK-NEXT: ; %bb.1: ; %then +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_2: ; %else ; CHECK-NEXT: mul w9, w0, w1 ; CHECK-NEXT: mul w0, w9, w1 ; CHECK-NEXT: mov w9, #10 ; =0xa ; CHECK-NEXT: str w9, [x8] -; CHECK-NEXT: LBB0_2: ; %common.ret -; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret entry: %l = load i32, ptr %ptr, !dbg !4 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll @@ -87,13 +87,18 @@ define i32 @test_br_extr_cmp(<1 x i64> %v1, <1 x i64> %v2) { ; CHECK-LABEL: test_br_extr_cmp: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: b.ne .LBB6_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %if.then +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %1 = icmp eq <1 x i64> %v1, %v2 %2 = extractelement <1 x i1> %1, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -808,24 +808,33 @@ ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 -; ENABLE-NEXT: cbnz wzr, LBB11_3 +; ENABLE-NEXT: cbnz wzr, LBB11_5 ; ENABLE-NEXT: ; %bb.1: ; %if.then ; ENABLE-NEXT: sub x8, sp, #16 ; ENABLE-NEXT: mov sp, x8 ; ENABLE-NEXT: mov w9, wzr -; ENABLE-NEXT: LBB11_2: ; %for.body -; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: mov x10, #0 ; =0x0 ; ENABLE-NEXT: ; InlineAsm End -; ENABLE-NEXT: add w10, w10, w9 +; ENABLE-NEXT: b LBB11_3 +; ENABLE-NEXT: LBB11_2: ; %body2 +; ENABLE-NEXT: ; in Loop: Header=BB11_3 Depth=1 +; ENABLE-NEXT: ; InlineAsm Start +; ENABLE-NEXT: nop +; ENABLE-NEXT: ; InlineAsm End ; ENABLE-NEXT: mov w9, #1 ; =0x1 -; ENABLE-NEXT: str w10, [x8] +; ENABLE-NEXT: LBB11_3: ; %for.body +; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; ENABLE-NEXT: add w9, w10, w9 +; ENABLE-NEXT: str w9, [x8] +; ENABLE-NEXT: cbnz wzr, LBB11_2 +; ENABLE-NEXT: ; %bb.4: ; %body1 +; ENABLE-NEXT: ; in Loop: Header=BB11_3 Depth=1 ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End -; ENABLE-NEXT: b LBB11_2 -; ENABLE-NEXT: LBB11_3: ; %if.end +; ENABLE-NEXT: b LBB11_3 +; ENABLE-NEXT: LBB11_5: ; %if.end ; ENABLE-NEXT: sub sp, x29, #16 ; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload @@ -841,24 +850,33 @@ ; DISABLE-NEXT: .cfi_offset w29, -16 ; DISABLE-NEXT: .cfi_offset w19, -24 ; DISABLE-NEXT: .cfi_offset w20, -32 -; DISABLE-NEXT: cbnz wzr, LBB11_3 +; DISABLE-NEXT: cbnz wzr, LBB11_5 ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: sub x8, sp, #16 ; DISABLE-NEXT: mov sp, x8 ; DISABLE-NEXT: mov w9, wzr -; DISABLE-NEXT: LBB11_2: ; %for.body -; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: mov x10, #0 ; =0x0 ; DISABLE-NEXT: ; InlineAsm End -; DISABLE-NEXT: add w10, w10, w9 +; DISABLE-NEXT: b LBB11_3 +; DISABLE-NEXT: LBB11_2: ; %body2 +; DISABLE-NEXT: ; in Loop: Header=BB11_3 Depth=1 +; DISABLE-NEXT: ; InlineAsm Start +; DISABLE-NEXT: nop +; DISABLE-NEXT: ; InlineAsm End ; DISABLE-NEXT: mov w9, #1 ; =0x1 -; DISABLE-NEXT: str w10, [x8] +; DISABLE-NEXT: LBB11_3: ; %for.body +; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 +; DISABLE-NEXT: add w9, w10, w9 +; DISABLE-NEXT: str w9, [x8] +; DISABLE-NEXT: cbnz wzr, LBB11_2 +; DISABLE-NEXT: ; %bb.4: ; %body1 +; DISABLE-NEXT: ; in Loop: Header=BB11_3 Depth=1 ; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: nop ; DISABLE-NEXT: ; InlineAsm End -; DISABLE-NEXT: b LBB11_2 -; DISABLE-NEXT: LBB11_3: ; %if.end +; DISABLE-NEXT: b LBB11_3 +; DISABLE-NEXT: LBB11_5: ; %if.end ; DISABLE-NEXT: sub sp, x29, #16 ; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload @@ -893,50 +911,54 @@ define void @infiniteloop3() { ; ENABLE-LABEL: infiniteloop3: ; ENABLE: ; %bb.0: ; %entry -; ENABLE-NEXT: cbnz wzr, LBB12_5 -; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; ENABLE-NEXT: cbnz wzr, LBB12_2 +; ENABLE-NEXT: ; %bb.1: ; %body +; ENABLE-NEXT: cbnz wzr, LBB12_6 +; ENABLE-NEXT: LBB12_2: ; %loop2a.preheader ; ENABLE-NEXT: mov x8, xzr ; ENABLE-NEXT: mov x9, xzr ; ENABLE-NEXT: mov x11, xzr -; ENABLE-NEXT: b LBB12_3 -; ENABLE-NEXT: LBB12_2: ; %loop2b -; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: b LBB12_4 +; ENABLE-NEXT: LBB12_3: ; %loop2b +; ENABLE-NEXT: ; in Loop: Header=BB12_4 Depth=1 ; ENABLE-NEXT: str x10, [x11] ; ENABLE-NEXT: mov x11, x10 -; ENABLE-NEXT: LBB12_3: ; %loop1 +; ENABLE-NEXT: LBB12_4: ; %loop1 ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: mov x10, x9 ; ENABLE-NEXT: ldr x9, [x8] -; ENABLE-NEXT: cbnz x8, LBB12_2 -; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: cbnz x8, LBB12_3 +; ENABLE-NEXT: ; %bb.5: ; in Loop: Header=BB12_4 Depth=1 ; ENABLE-NEXT: mov x8, x10 ; ENABLE-NEXT: mov x11, x10 -; ENABLE-NEXT: b LBB12_3 -; ENABLE-NEXT: LBB12_5: ; %end +; ENABLE-NEXT: b LBB12_4 +; ENABLE-NEXT: LBB12_6: ; %end ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: infiniteloop3: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: cbnz wzr, LBB12_5 -; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader +; DISABLE-NEXT: cbnz wzr, LBB12_2 +; DISABLE-NEXT: ; %bb.1: ; %body +; DISABLE-NEXT: cbnz wzr, LBB12_6 +; DISABLE-NEXT: LBB12_2: ; %loop2a.preheader ; DISABLE-NEXT: mov x8, xzr ; DISABLE-NEXT: mov x9, xzr ; DISABLE-NEXT: mov x11, xzr -; DISABLE-NEXT: b LBB12_3 -; DISABLE-NEXT: LBB12_2: ; %loop2b -; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: b LBB12_4 +; DISABLE-NEXT: LBB12_3: ; %loop2b +; DISABLE-NEXT: ; in Loop: Header=BB12_4 Depth=1 ; DISABLE-NEXT: str x10, [x11] ; DISABLE-NEXT: mov x11, x10 -; DISABLE-NEXT: LBB12_3: ; %loop1 +; DISABLE-NEXT: LBB12_4: ; %loop1 ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: mov x10, x9 ; DISABLE-NEXT: ldr x9, [x8] -; DISABLE-NEXT: cbnz x8, LBB12_2 -; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: cbnz x8, LBB12_3 +; DISABLE-NEXT: ; %bb.5: ; in Loop: Header=BB12_4 Depth=1 ; DISABLE-NEXT: mov x8, x10 ; DISABLE-NEXT: mov x11, x10 -; DISABLE-NEXT: b LBB12_3 -; DISABLE-NEXT: LBB12_5: ; %end +; DISABLE-NEXT: b LBB12_4 +; DISABLE-NEXT: LBB12_6: ; %end ; DISABLE-NEXT: ret entry: br i1 undef, label %loop2a, label %body diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll --- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -2126,23 +2126,35 @@ ; SDAG-LABEL: saddo.br.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn w0, w1 -; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: b.vc .LBB69_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB69_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: b.vc .LBB69_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB69_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, vs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.br.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn w0, w1 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.vc .LBB69_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB69_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -2161,23 +2173,35 @@ ; SDAG-LABEL: saddo.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn x0, x1 -; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: b.vc .LBB70_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB70_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: b.vc .LBB70_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB70_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, vs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x1 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.vc .LBB70_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB70_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -2196,23 +2220,35 @@ ; SDAG-LABEL: uaddo.br.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn w0, w1 -; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: b.lo .LBB71_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB71_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 +; FAST-NEXT: b.lo .LBB71_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB71_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, hs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.br.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn w0, w1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.lo .LBB71_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB71_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2) @@ -2231,23 +2267,35 @@ ; SDAG-LABEL: uaddo.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn x0, x1 -; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: b.lo .LBB72_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB72_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 +; FAST-NEXT: b.lo .LBB72_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB72_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, hs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x1 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.lo .LBB72_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB72_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %v1, i64 %v2) @@ -2266,23 +2314,35 @@ ; SDAG-LABEL: ssubo.br.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmp w0, w1 -; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: b.vc .LBB73_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB73_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: b.vc .LBB73_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB73_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, vs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.br.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.vc .LBB73_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB73_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2) @@ -2301,23 +2361,35 @@ ; SDAG-LABEL: ssubo.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmp x0, x1 -; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: b.vc .LBB74_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB74_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: b.vc .LBB74_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB74_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, vs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp x0, x1 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.vc .LBB74_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB74_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %v1, i64 %v2) @@ -2336,23 +2408,35 @@ ; SDAG-LABEL: usubo.br.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmp w0, w1 -; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: b.hs .LBB75_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB75_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 +; FAST-NEXT: b.hs .LBB75_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB75_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, lo -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.br.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp w0, w1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.hs .LBB75_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB75_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2) @@ -2371,23 +2455,35 @@ ; SDAG-LABEL: usubo.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmp x0, x1 -; SDAG-NEXT: cset w0, hs +; SDAG-NEXT: b.hs .LBB76_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB76_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 +; FAST-NEXT: b.hs .LBB76_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB76_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, lo -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmp x0, x1 -; GISEL-NEXT: cset w8, lo -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.hs .LBB76_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB76_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %v1, i64 %v2) @@ -2407,16 +2503,24 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x8, w0, w1 ; SDAG-NEXT: cmp x8, w8, sxtw -; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: b.eq .LBB77_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB77_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.br.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: smull x9, w0, w1 +; FAST-NEXT: smull x8, w0, w1 +; FAST-NEXT: cmp x8, w8, sxtw +; FAST-NEXT: b.eq .LBB77_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB77_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cmp x9, w9, sxtw -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2426,8 +2530,12 @@ ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: asr x8, x8, #32 ; GISEL-NEXT: cmp w8, w9, asr #31 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.eq .LBB77_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB77_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) @@ -2448,17 +2556,25 @@ ; SDAG-NEXT: mul x8, x0, x1 ; SDAG-NEXT: smulh x9, x0, x1 ; SDAG-NEXT: cmp x9, x8, asr #63 -; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: b.eq .LBB78_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB78_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.br.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mul x9, x0, x1 +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: b.eq .LBB78_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB78_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: smulh x10, x0, x1 -; FAST-NEXT: cmp x10, x9, asr #63 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2467,8 +2583,12 @@ ; GISEL-NEXT: smulh x8, x0, x1 ; GISEL-NEXT: mul x9, x0, x1 ; GISEL-NEXT: cmp x8, x9, asr #63 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.eq .LBB78_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB78_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -2487,23 +2607,35 @@ ; SDAG-LABEL: smulo2.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn x0, x0 -; SDAG-NEXT: cset w0, vc +; SDAG-NEXT: b.vc .LBB79_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB79_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 +; FAST-NEXT: b.vc .LBB79_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB79_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, vs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo2.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x0 -; GISEL-NEXT: cset w8, vs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.vc .LBB79_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB79_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 2) @@ -2523,16 +2655,24 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x8, w0, w1 ; SDAG-NEXT: tst x8, #0xffffffff00000000 -; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: b.eq .LBB80_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB80_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.br.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umull x9, w0, w1 +; FAST-NEXT: umull x8, w0, w1 +; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: b.eq .LBB80_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB80_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: tst x9, #0xffffffff00000000 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2541,8 +2681,12 @@ ; GISEL-NEXT: umull x8, w0, w1 ; GISEL-NEXT: lsr x8, x8, #32 ; GISEL-NEXT: cmp w8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.eq .LBB80_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB80_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) @@ -2561,17 +2705,24 @@ ; SDAG-LABEL: umulo.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umulh x8, x0, x1 -; SDAG-NEXT: cmp xzr, x8 -; SDAG-NEXT: cset w0, eq +; SDAG-NEXT: cbz x8, .LBB81_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB81_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.br.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: umulh x9, x0, x1 +; FAST-NEXT: umulh x8, x0, x1 +; FAST-NEXT: cmp xzr, x8 +; FAST-NEXT: b.eq .LBB81_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB81_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cmp xzr, x9 -; FAST-NEXT: cset w9, ne -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2579,8 +2730,12 @@ ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umulh x8, x0, x1 ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.eq .LBB81_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB81_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 %v2) @@ -2599,23 +2754,35 @@ ; SDAG-LABEL: umulo2.br.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: cmn x0, x0 -; SDAG-NEXT: cset w0, lo +; SDAG-NEXT: b.lo .LBB82_2 +; SDAG-NEXT: // %bb.1: // %overflow +; SDAG-NEXT: mov w0, wzr +; SDAG-NEXT: ret +; SDAG-NEXT: .LBB82_2: // %continue +; SDAG-NEXT: mov w0, #1 // =0x1 ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 +; FAST-NEXT: b.lo .LBB82_2 +; FAST-NEXT: // %bb.1: // %overflow +; FAST-NEXT: and w0, wzr, #0x1 +; FAST-NEXT: ret +; FAST-NEXT: .LBB82_2: // %continue ; FAST-NEXT: mov w8, #1 // =0x1 -; FAST-NEXT: cset w9, hs -; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo2.br.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: cmn x0, x0 -; GISEL-NEXT: cset w8, hs -; GISEL-NEXT: eor w0, w8, #0x1 +; GISEL-NEXT: b.lo .LBB82_2 +; GISEL-NEXT: // %bb.1: // %overflow +; GISEL-NEXT: mov w0, wzr +; GISEL-NEXT: ret +; GISEL-NEXT: .LBB82_2: // %continue +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.umul.with.overflow.i64(i64 %v1, i64 2) diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll @@ -772,7 +772,7 @@ ; CHECK-NEXT: ldaxrb w0, [x9] ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: b.ne .LBB40_4 -; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // %bb.2: // %cmpxchg.fencedstore ; CHECK-NEXT: // in Loop: Header=BB40_1 Depth=1 ; CHECK-NEXT: stxrb w10, w1, [x9] ; CHECK-NEXT: cbnz w10, .LBB40_1 @@ -800,7 +800,7 @@ ; CHECK-NEXT: ldaxrh w0, [x9] ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: b.ne .LBB41_4 -; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // %bb.2: // %cmpxchg.fencedstore ; CHECK-NEXT: // in Loop: Header=BB41_1 Depth=1 ; CHECK-NEXT: stlxrh w10, w1, [x9] ; CHECK-NEXT: cbnz w10, .LBB41_1 @@ -828,7 +828,7 @@ ; CHECK-NEXT: ldxr w0, [x9] ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: b.ne .LBB42_4 -; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // %bb.2: // %cmpxchg.fencedstore ; CHECK-NEXT: // in Loop: Header=BB42_1 Depth=1 ; CHECK-NEXT: stlxr w10, w1, [x9] ; CHECK-NEXT: cbnz w10, .LBB42_1 @@ -854,7 +854,7 @@ ; CHECK-NEXT: ldxr x8, [x9] ; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: b.ne .LBB43_3 -; CHECK-NEXT: // %bb.2: // %cmpxchg.trystore +; CHECK-NEXT: // %bb.2: // %cmpxchg.fencedstore ; CHECK-NEXT: // in Loop: Header=BB43_1 Depth=1 ; CHECK-NEXT: stxr w10, x1, [x9] ; CHECK-NEXT: cbnz w10, .LBB43_1 diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -987,7 +987,7 @@ ; INLINE_ATOMICS-NEXT: ldaxrb w0, [x9] ; INLINE_ATOMICS-NEXT: cmp w0, w8 ; INLINE_ATOMICS-NEXT: b.ne .LBB40_4 -; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore +; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.fencedstore ; INLINE_ATOMICS-NEXT: // in Loop: Header=BB40_1 Depth=1 ; INLINE_ATOMICS-NEXT: stxrb w10, w1, [x9] ; INLINE_ATOMICS-NEXT: cbnz w10, .LBB40_1 @@ -1024,7 +1024,7 @@ ; INLINE_ATOMICS-NEXT: ldaxrh w0, [x9] ; INLINE_ATOMICS-NEXT: cmp w0, w8 ; INLINE_ATOMICS-NEXT: b.ne .LBB41_4 -; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore +; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.fencedstore ; INLINE_ATOMICS-NEXT: // in Loop: Header=BB41_1 Depth=1 ; INLINE_ATOMICS-NEXT: stlxrh w10, w1, [x9] ; INLINE_ATOMICS-NEXT: cbnz w10, .LBB41_1 @@ -1060,7 +1060,7 @@ ; INLINE_ATOMICS-NEXT: ldxr w0, [x9] ; INLINE_ATOMICS-NEXT: cmp w0, w8 ; INLINE_ATOMICS-NEXT: b.ne .LBB42_4 -; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore +; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.fencedstore ; INLINE_ATOMICS-NEXT: // in Loop: Header=BB42_1 Depth=1 ; INLINE_ATOMICS-NEXT: stlxr w10, w1, [x9] ; INLINE_ATOMICS-NEXT: cbnz w10, .LBB42_1 @@ -1095,7 +1095,7 @@ ; INLINE_ATOMICS-NEXT: ldxr x8, [x9] ; INLINE_ATOMICS-NEXT: cmp x8, x0 ; INLINE_ATOMICS-NEXT: b.ne .LBB43_3 -; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.trystore +; INLINE_ATOMICS-NEXT: // %bb.2: // %cmpxchg.fencedstore ; INLINE_ATOMICS-NEXT: // in Loop: Header=BB43_1 Depth=1 ; INLINE_ATOMICS-NEXT: stxr w10, x1, [x9] ; INLINE_ATOMICS-NEXT: cbnz w10, .LBB43_1 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll @@ -6,12 +6,19 @@ define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 { ; CHECK-LABEL: invert_bcc_block_align_higher_func: -; CHECK: ; %bb.0: ; %common.ret -; CHECK-NEXT: mov w8, #9 ; =0x9 +; CHECK: ; %bb.0: ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w9, #42 ; =0x2a -; CHECK-NEXT: cset w0, ne -; CHECK-NEXT: csel w8, w9, w8, eq +; CHECK-NEXT: b.eq LBB0_1 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_1: ; %bb1 +; CHECK-NEXT: mov w8, #42 ; =0x2a +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: LBB0_2: ; %bb2 +; CHECK-NEXT: mov w8, #9 ; =0x9 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret %1 = icmp eq i32 %x, %y diff --git a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-asm.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-asm.ll @@ -7,12 +7,10 @@ ; condition. ; CHECK-LABEL: test_asm_length: ; CHECK: ; %bb.0: -; CHECK-NEXT: tbz w0, #0, LBB0_2 -; CHECK-NEXT: ; %bb.1: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: LBB0_2: ; %true -; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: tbz w0, #0, LBB0_1 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_1: ; %true +; CHECK-NEXT: mov w0, #4 ; =0x4 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -21,6 +19,9 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_2: ; %false +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret %val = and i32 %in, 1 %tst = icmp eq i32 %val, 0 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll @@ -5,23 +5,24 @@ ; CHECK-LABEL: invert_bcc: ; CHECK: ; %bb.0: ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: mov w8, #42 ; =0x2a -; CHECK-NEXT: b.pl LBB0_3 +; CHECK-NEXT: b.ne LBB0_3 ; CHECK-NEXT: b LBB0_2 ; CHECK-NEXT: LBB0_3: -; CHECK-NEXT: b.gt LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %common.ret -; CHECK-NEXT: str w8, [x8] -; CHECK-NEXT: ret -; CHECK-NEXT: LBB0_2: ; %bb2 -; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: b.vc LBB0_1 +; CHECK-NEXT: b LBB0_2 +; CHECK-NEXT: LBB0_1: ; %bb2 ; CHECK-NEXT: mov w8, #9 ; =0x9 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: str w8, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_2: ; %bb1 +; CHECK-NEXT: mov w8, #42 ; =0x2a +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret %1 = fcmp ueq float %x, %y br i1 %1, label %bb1, label %bb2 diff --git a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-cbz.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: cbnz w8, LBB0_2 ; CHECK-NEXT: b LBB0_4 -; CHECK-NEXT: LBB0_2: ; %common.ret +; CHECK-NEXT: LBB0_2: ; %b8 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_3: ; %b2 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll --- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -127,12 +127,15 @@ define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_sub_dominates_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: tbz w3, #0, .LBB7_2 +; CHECK-NEXT: tbz w3, #0, .LBB7_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: subs x8, x0, x1 -; CHECK-NEXT: cset w3, lo ; CHECK-NEXT: str x8, [x2] -; CHECK-NEXT: .LBB7_2: // %common.ret +; CHECK-NEXT: tbz w3, #0, .LBB7_3 +; CHECK-NEXT: // %bb.2: // %end +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_3: // %f ; CHECK-NEXT: and w0, w3, #0x1 ; CHECK-NEXT: ret entry: @@ -154,29 +157,30 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) nounwind { ; CHECK-LABEL: usubo_ult_cmp_dominates_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w19, w3 +; CHECK-NEXT: mov w20, w3 ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: tbz w3, #0, .LBB8_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x23, x0 -; CHECK-NEXT: mov x20, x2 -; CHECK-NEXT: cset w21, lo -; CHECK-NEXT: mov x22, x1 -; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: mov x19, x2 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: mov x21, x1 ; CHECK-NEXT: bl call -; CHECK-NEXT: subs x8, x23, x22 +; CHECK-NEXT: subs x8, x22, x21 ; CHECK-NEXT: b.hs .LBB8_3 ; CHECK-NEXT: // %bb.2: // %end -; CHECK-NEXT: mov w19, w21 -; CHECK-NEXT: str x8, [x20] -; CHECK-NEXT: .LBB8_3: // %common.ret -; CHECK-NEXT: and w0, w19, #0x1 +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: str x8, [x19] +; CHECK-NEXT: b .LBB8_4 +; CHECK-NEXT: .LBB8_3: // %f +; CHECK-NEXT: and w0, w20, #0x1 +; CHECK-NEXT: .LBB8_4: // %f ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: br i1 %cond, label %t, label %f diff --git a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll --- a/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll +++ b/llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll @@ -13,10 +13,13 @@ define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: f_i8_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w8, w1, w2, ge -; CHECK-NEXT: add w0, w8, w0, uxtb +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: tbnz w0, #7, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %A +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %B +; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i8 %in, -1 @@ -35,10 +38,13 @@ define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: f_i16_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w8, w1, w2, ge -; CHECK-NEXT: add w0, w8, w0, uxth +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: tbnz w0, #15, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %A +; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %B +; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i16 %in, -1 @@ -57,9 +63,13 @@ define i64 @f_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: f_i32_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel x8, x1, x2, ge -; CHECK-NEXT: add x0, x8, w0, uxtw +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: tbnz w0, #31, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %A +; CHECK-NEXT: add x0, x8, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %B +; CHECK-NEXT: add x0, x8, x2 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %in, -1 @@ -78,10 +88,13 @@ define i32 @g_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: g_i8_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w8, w1, w2, lt -; CHECK-NEXT: add w0, w8, w0, uxtb +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: tbnz w0, #7, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %B +; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %A +; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret entry: %cmp = icmp slt i8 %in, 0 @@ -100,10 +113,13 @@ define i32 @g_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind { ; CHECK-LABEL: g_i16_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel w8, w1, w2, lt -; CHECK-NEXT: add w0, w8, w0, uxth +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: tbnz w0, #15, .LBB4_2 +; CHECK-NEXT: // %bb.1: // %B +; CHECK-NEXT: add w0, w8, w2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %A +; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret entry: %cmp = icmp slt i16 %in, 0 @@ -122,9 +138,13 @@ define i64 @g_i32_sign_extend_inreg(i32 %in, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: g_i32_sign_extend_inreg: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel x8, x1, x2, lt -; CHECK-NEXT: add x0, x8, w0, uxtw +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: tbnz w0, #31, .LBB5_2 +; CHECK-NEXT: // %bb.1: // %B +; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %A +; CHECK-NEXT: add x0, x8, x1 ; CHECK-NEXT: ret entry: %cmp = icmp slt i32 %in, 0 @@ -143,11 +163,13 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: f_i32_sign_extend_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel x8, x1, x2, ge -; CHECK-NEXT: add x0, x8, w0, uxtw +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: tbnz w0, #31, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %A +; CHECK-NEXT: add x0, x8, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %B +; CHECK-NEXT: add x0, x8, x2 ; CHECK-NEXT: ret entry: %inext = sext i32 %in to i64 @@ -167,11 +189,13 @@ define i64 @g_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind { ; CHECK-LABEL: g_i32_sign_extend_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel x8, x1, x2, lt -; CHECK-NEXT: add x0, x8, w0, uxtw +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: tbnz w0, #31, .LBB7_2 +; CHECK-NEXT: // %bb.1: // %B +; CHECK-NEXT: add x0, x8, x2 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %A +; CHECK-NEXT: add x0, x8, x1 ; CHECK-NEXT: ret entry: %inext = sext i32 %in to i64 diff --git a/llvm/test/CodeGen/AArch64/cmp-frameindex.ll b/llvm/test/CodeGen/AArch64/cmp-frameindex.ll --- a/llvm/test/CodeGen/AArch64/cmp-frameindex.ll +++ b/llvm/test/CodeGen/AArch64/cmp-frameindex.ll @@ -11,7 +11,7 @@ ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb1 ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB0_2: // %common.ret +; CHECK-NEXT: .LBB0_2: // %bb2 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %stack = alloca i8 diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: ldaxr w8, [x0] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.ne LBB0_4 -; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1 ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 @@ -59,7 +59,7 @@ ; CHECK-NEXT: ldaxrb w9, [x0] ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: b.ne LBB1_4 -; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1 ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 @@ -106,16 +106,22 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldaxr w8, [x0] ; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: b.ne LBB2_4 -; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NEXT: b.ne LBB2_5 +; CHECK-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1 ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB2_1 -; CHECK-NEXT: ; %bb.3: ; %true -; CHECK-NEXT: b _bar -; CHECK-NEXT: LBB2_4: ; %cmpxchg.nostore -; CHECK-NEXT: clrex +; CHECK-NEXT: ; %bb.3: +; CHECK-NEXT: mov w8, #1 ; =0x1 +; CHECK-NEXT: cbnz w8, LBB2_6 +; CHECK-NEXT: LBB2_4: ; %false ; CHECK-NEXT: b _baz +; CHECK-NEXT: LBB2_5: ; %cmpxchg.nostore +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: clrex +; CHECK-NEXT: cbz w8, LBB2_4 +; CHECK-NEXT: LBB2_6: ; %true +; CHECK-NEXT: b _bar ; ; OUTLINE-ATOMICS-LABEL: test_conditional: ; OUTLINE-ATOMICS: ; %bb.0: @@ -183,7 +189,7 @@ ; CHECK-NEXT: ldaxr w8, [x19] ; CHECK-NEXT: cmp w8, w21 ; CHECK-NEXT: b.ne LBB3_4 -; CHECK-NEXT: ; %bb.2: ; %cmpxchg.trystore +; CHECK-NEXT: ; %bb.2: ; %cmpxchg.fencedstore ; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1 ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 @@ -193,26 +199,29 @@ ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: clrex -; CHECK-NEXT: LBB3_5: ; %for.cond.preheader -; CHECK-NEXT: mov w22, #2 ; =0x2 -; CHECK-NEXT: LBB3_6: ; %for.cond +; CHECK-NEXT: LBB3_5: ; %cmpxchg.end +; CHECK-NEXT: mov w22, #4 ; =0x4 +; CHECK-NEXT: b LBB3_7 +; CHECK-NEXT: LBB3_6: ; %if.end +; CHECK-NEXT: ; in Loop: Header=BB3_7 Depth=1 +; CHECK-NEXT: sub x22, x22, #4 +; CHECK-NEXT: LBB3_7: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cbz w22, LBB3_9 -; CHECK-NEXT: ; %bb.7: ; %for.body -; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 -; CHECK-NEXT: sub w22, w22, #1 +; CHECK-NEXT: cmn w22, #4 +; CHECK-NEXT: b.eq LBB3_10 +; CHECK-NEXT: ; %bb.8: ; %for.body +; CHECK-NEXT: ; in Loop: Header=BB3_7 Depth=1 +; CHECK-NEXT: ldr w10, [x19, x22] ; CHECK-NEXT: orr w9, w21, w20 -; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2] ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: b.eq LBB3_6 -; CHECK-NEXT: ; %bb.8: ; %if.then -; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 -; CHECK-NEXT: sxtw x8, w22 -; CHECK-NEXT: str w9, [x19, x8, lsl #2] +; CHECK-NEXT: ; %bb.9: ; %if.then +; CHECK-NEXT: ; in Loop: Header=BB3_7 Depth=1 +; CHECK-NEXT: str w9, [x19, x22] ; CHECK-NEXT: bl _foo ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b LBB3_6 -; CHECK-NEXT: LBB3_9: ; %for.cond.cleanup +; CHECK-NEXT: LBB3_10: ; %for.cond.cleanup ; CHECK-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload @@ -236,26 +245,29 @@ ; OUTLINE-ATOMICS-NEXT: mov w21, w0 ; OUTLINE-ATOMICS-NEXT: bl ___aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: cmp w0, w21 -; OUTLINE-ATOMICS-NEXT: mov w22, #2 ; =0x2 +; OUTLINE-ATOMICS-NEXT: mov w22, #4 ; =0x4 ; OUTLINE-ATOMICS-NEXT: cset w8, eq -; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond +; OUTLINE-ATOMICS-NEXT: b LBB3_2 +; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %if.end +; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; OUTLINE-ATOMICS-NEXT: sub x22, x22, #4 +; OUTLINE-ATOMICS-NEXT: LBB3_2: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4 -; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body -; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1 -; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1 +; OUTLINE-ATOMICS-NEXT: cmn w22, #4 +; OUTLINE-ATOMICS-NEXT: b.eq LBB3_5 +; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %for.body +; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, x22] ; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20 -; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2] ; OUTLINE-ATOMICS-NEXT: cmp w9, w10 ; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1 -; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then -; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1 -; OUTLINE-ATOMICS-NEXT: sxtw x8, w22 -; OUTLINE-ATOMICS-NEXT: str w9, [x19, x8, lsl #2] +; OUTLINE-ATOMICS-NEXT: ; %bb.4: ; %if.then +; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_2 Depth=1 +; OUTLINE-ATOMICS-NEXT: str w9, [x19, x22] ; OUTLINE-ATOMICS-NEXT: bl _foo ; OUTLINE-ATOMICS-NEXT: mov w8, wzr ; OUTLINE-ATOMICS-NEXT: b LBB3_1 -; OUTLINE-ATOMICS-NEXT: LBB3_4: ; %for.cond.cleanup +; OUTLINE-ATOMICS-NEXT: LBB3_5: ; %for.cond.cleanup ; OUTLINE-ATOMICS-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: and w0, w8, #0x1 ; OUTLINE-ATOMICS-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -683,11 +683,14 @@ ; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: cinc w0, w19, gt ; CHECK-NEXT: bl xoo -; CHECK-NEXT: fmov d0, #-1.00000000 ; CHECK-NEXT: fcmp d8, #0.0 +; CHECK-NEXT: b.gt .LBB9_5 +; CHECK-NEXT: // %bb.4: // %cond.false12 +; CHECK-NEXT: fmov d0, #-1.00000000 +; CHECK-NEXT: fadd d8, d8, d0 +; CHECK-NEXT: .LBB9_5: // %cond.end14 +; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: fmov d1, #-2.00000000 -; CHECK-NEXT: fadd d0, d8, d0 -; CHECK-NEXT: fcsel d0, d8, d0, gt ; CHECK-NEXT: bl woo ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w0, #4 // =0x4 @@ -736,16 +739,24 @@ define void @cmp_shifted(i32 %in, i32 %lhs, i32 %rhs) #0 { ; CHECK-LABEL: cmp_shifted: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w8, #42 // =0x2a -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #128 // =0x80 -; CHECK-NEXT: csinc w8, w8, wzr, gt ; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 -; CHECK-NEXT: csel w0, w9, w8, ge +; CHECK-NEXT: b.lt .LBB10_2 +; CHECK-NEXT: // %bb.1: // %true +; CHECK-NEXT: mov w0, #128 // =0x80 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_2: // %false +; CHECK-NEXT: cmp w0, #1 +; CHECK-NEXT: b.lt .LBB10_4 +; CHECK-NEXT: // %bb.3: // %truer +; CHECK-NEXT: mov w0, #42 // =0x2a +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_4: // %falser +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: .LBB10_5: // %true ; CHECK-NEXT: bl zoo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -8,9 +8,14 @@ define void @test_add_cbz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-LABEL: test_add_cbz: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: cmn w0, w1 -; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %L2 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b @@ -26,10 +31,12 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, ptr %ptr) { ; CHECK-LABEL: test_add_cbz_multiple_use: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: adds w8, w0, w1 -; CHECK-NEXT: mov w9, #10 // =0xa -; CHECK-NEXT: csel w8, w9, w8, ne +; CHECK-NEXT: b.eq .LBB1_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: .LBB1_2: // %L2 ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b @@ -45,9 +52,14 @@ define void @test_add_cbz_64(i64 %a, i64 %b, ptr %ptr) { ; CHECK-LABEL: test_add_cbz_64: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: cmn x0, x1 -; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: b.eq .LBB2_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str xzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %L2 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str x8, [x2] ; CHECK-NEXT: ret %c = add nsw i64 %a, %b @@ -63,9 +75,14 @@ define void @test_and_cbz(i32 %a, ptr %ptr) { ; CHECK-LABEL: test_and_cbz: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: tst w0, #0x6 -; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: b.eq .LBB3_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x1] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %L2 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %c = and i32 %a, 6 @@ -81,9 +98,14 @@ define void @test_bic_cbnz(i32 %a, i32 %b, ptr %ptr) { ; CHECK-LABEL: test_bic_cbnz: -; CHECK: // %bb.0: // %common.ret +; CHECK: // %bb.0: ; CHECK-NEXT: bics wzr, w1, w0 -; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: b.ne .LBB4_2 +; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str wzr, [x2] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %L2 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = and i32 %a, %b diff --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll --- a/llvm/test/CodeGen/AArch64/csr-split.ll +++ b/llvm/test/CodeGen/AArch64/csr-split.ll @@ -88,29 +88,29 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: .cfi_remember_state -; CHECK-NEXT: cbz x0, .LBB1_3 -; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cbz x0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.end ; CHECK-NEXT: adrp x8, a ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] ; CHECK-NEXT: cmp x8, x0 -; CHECK-NEXT: b.ne .LBB1_3 -; CHECK-NEXT: // %bb.2: // %if.then2 -; CHECK-NEXT: bl callVoid -; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: b.eq .LBB1_3 +; CHECK-NEXT: .LBB1_2: // %return +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 ; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: b callNonVoid -; CHECK-NEXT: .LBB1_3: // %return +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_3: // %if.then2 ; CHECK-NEXT: .cfi_restore_state -; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: bl callVoid +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 ; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: ret +; CHECK-NEXT: b callNonVoid ; ; CHECK-APPLE-LABEL: test2: ; CHECK-APPLE: ; %bb.0: ; %entry @@ -122,37 +122,37 @@ ; CHECK-APPLE-NEXT: .cfi_offset w19, -24 ; CHECK-APPLE-NEXT: .cfi_offset w20, -32 ; CHECK-APPLE-NEXT: .cfi_remember_state -; CHECK-APPLE-NEXT: cbz x0, LBB1_3 -; CHECK-APPLE-NEXT: ; %bb.1: ; %entry +; CHECK-APPLE-NEXT: cbz x0, LBB1_2 +; CHECK-APPLE-NEXT: ; %bb.1: ; %if.end ; CHECK-APPLE-NEXT: Lloh2: ; CHECK-APPLE-NEXT: adrp x8, _a@PAGE ; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: Lloh3: ; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF] ; CHECK-APPLE-NEXT: cmp x8, x0 -; CHECK-APPLE-NEXT: b.ne LBB1_3 -; CHECK-APPLE-NEXT: ; %bb.2: ; %if.then2 -; CHECK-APPLE-NEXT: bl _callVoid +; CHECK-APPLE-NEXT: b.eq LBB1_3 +; CHECK-APPLE-NEXT: LBB1_2: ; %return ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: mov x0, x19 +; CHECK-APPLE-NEXT: mov w0, wzr ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 0 ; CHECK-APPLE-NEXT: .cfi_restore w30 ; CHECK-APPLE-NEXT: .cfi_restore w29 ; CHECK-APPLE-NEXT: .cfi_restore w19 ; CHECK-APPLE-NEXT: .cfi_restore w20 -; CHECK-APPLE-NEXT: b _callNonVoid -; CHECK-APPLE-NEXT: LBB1_3: ; %return +; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: LBB1_3: ; %if.then2 ; CHECK-APPLE-NEXT: .cfi_restore_state +; CHECK-APPLE-NEXT: bl _callVoid ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: mov w0, wzr +; CHECK-APPLE-NEXT: mov x0, x19 ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 0 ; CHECK-APPLE-NEXT: .cfi_restore w30 ; CHECK-APPLE-NEXT: .cfi_restore w29 ; CHECK-APPLE-NEXT: .cfi_restore w19 ; CHECK-APPLE-NEXT: .cfi_restore w20 -; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: b _callNonVoid ; CHECK-APPLE-NEXT: .loh AdrpLdr Lloh2, Lloh3 entry: %tobool = icmp eq ptr %p1, null diff --git a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -4,13 +4,11 @@ define i64 @test_or(i32 %a, i32 %b) { ; CHECK-LABEL: test_or: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB0_2 +; CHECK-NEXT: LBB0_1: ; %bb3 ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB0_2 -; CHECK-NEXT: LBB0_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB0_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -37,13 +35,11 @@ define i64 @test_or_select(i32 %a, i32 %b) { ; CHECK-LABEL: test_or_select: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB1_2 +; CHECK-NEXT: LBB1_1: ; %bb3 ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB1_2 -; CHECK-NEXT: LBB1_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB1_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -70,13 +66,11 @@ define i64 @test_and(i32 %a, i32 %b) { ; CHECK-LABEL: test_and: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB2_2 +; CHECK-NEXT: LBB2_1: ; %bb3 ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB2_2 -; CHECK-NEXT: LBB2_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB2_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB2_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -103,13 +97,11 @@ define i64 @test_and_select(i32 %a, i32 %b) { ; CHECK-LABEL: test_and_select: ; CHECK: ; %bb.0: ; %bb1 -; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: cbnz w0, LBB3_2 +; CHECK-NEXT: LBB3_1: ; %bb3 ; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: cbnz w8, LBB3_2 -; CHECK-NEXT: LBB3_1: ; %common.ret ; CHECK-NEXT: ret ; CHECK-NEXT: LBB3_2: ; %bb1.cond.split -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cbz w1, LBB3_1 ; CHECK-NEXT: ; %bb.3: ; %bb4 ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -139,7 +131,6 @@ ; CHECK-LABEL: test_or_unpredictable: ; CHECK: ; %bb.0: ; %bb1 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cset w8, eq ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, eq @@ -152,7 +143,9 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: bl _bar ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: LBB4_2: ; %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: LBB4_2: ; %bb3 +; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret bb1: %0 = icmp eq i32 %a, 0 @@ -172,7 +165,6 @@ ; CHECK-LABEL: test_and_unpredictable: ; CHECK: ; %bb.0: ; %bb1 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, ne @@ -185,7 +177,9 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: bl _bar ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: LBB5_2: ; %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: LBB5_2: ; %bb3 +; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret bb1: %0 = icmp ne i32 %a, 0 diff --git a/llvm/test/CodeGen/AArch64/implicit-null-check.ll b/llvm/test/CodeGen/AArch64/implicit-null-check.ll --- a/llvm/test/CodeGen/AArch64/implicit-null-check.ll +++ b/llvm/test/CodeGen/AArch64/implicit-null-check.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB0_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB0_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -36,8 +36,8 @@ ; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB1_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB1_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -58,8 +58,8 @@ ; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB2_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB2_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -83,8 +83,8 @@ ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ldar w0, [x0] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB3_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -106,8 +106,8 @@ ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ldr w0, [x0] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB4_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB4_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -129,8 +129,8 @@ ; CHECK-NEXT: ldrb w0, [x0] // on-fault: .LBB5_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB5_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -149,14 +149,15 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB6_2 ; CHECK-NEXT: // %bb.1: // %not_null +; CHECK-NEXT: ldp x8, x1, [x0] ; CHECK-NEXT: ldp x2, x3, [x0, #16] -; CHECK-NEXT: ldp x0, x1, [x0] +; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB6_2: +; CHECK-NEXT: .LBB6_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: mov x1, xzr ; CHECK-NEXT: mov x2, xzr ; CHECK-NEXT: mov x3, xzr -; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -179,8 +180,8 @@ ; CHECK-NEXT: ldr w0, [x0, #128] // on-fault: .LBB7_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB7_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB7_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -203,8 +204,8 @@ ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB8_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -230,8 +231,8 @@ ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB9_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB9_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -253,14 +254,14 @@ define i32 @imp_null_check_hoist_over_unrelated_load(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: imp_null_check_hoist_over_unrelated_load: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz x0, .LBB10_2 +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ldr w0, [x0] // on-fault: .LBB10_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ldr w8, [x1] -; CHECK-NEXT: ldr w0, [x0] ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB10_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB10_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -279,15 +280,15 @@ define i32 @imp_null_check_gep_load_with_use_dep(ptr %x, i32 %a) { ; CHECK-LABEL: imp_null_check_gep_load_with_use_dep: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: .Ltmp7: ; CHECK-NEXT: ldr w8, [x0] // on-fault: .LBB11_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: add w9, w0, w1 ; CHECK-NEXT: add w8, w9, w8 ; CHECK-NEXT: add w0, w8, #4 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB11_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB11_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -315,8 +316,8 @@ ; CHECK-NEXT: dmb ishld ; CHECK-NEXT: ldr w0, [x0] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB12_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB12_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -341,8 +342,8 @@ ; CHECK-NEXT: dmb ish ; CHECK-NEXT: ldr w0, [x0] ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB13_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB13_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -363,9 +364,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB14_2 ; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: .LBB14_2: // %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_2: // %is_null ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -385,9 +387,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cbz x0, .LBB15_2 ; CHECK-NEXT: // %bb.1: // %not_null -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x0] -; CHECK-NEXT: .LBB15_2: // %common.ret +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB15_2: // %is_null ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null @@ -404,12 +407,12 @@ define i32 @imp_null_check_neg_gep_load(ptr %x) { ; CHECK-LABEL: imp_null_check_neg_gep_load: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: .Ltmp8: ; CHECK-NEXT: ldur w0, [x0, #-128] // on-fault: .LBB16_2 ; CHECK-NEXT: // %bb.1: // %not_null ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB16_2: -; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: .LBB16_2: // %is_null +; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: ret entry: %c = icmp eq ptr %x, null diff --git a/llvm/test/CodeGen/AArch64/large-stack-cmp.ll b/llvm/test/CodeGen/AArch64/large-stack-cmp.ll --- a/llvm/test/CodeGen/AArch64/large-stack-cmp.ll +++ b/llvm/test/CodeGen/AArch64/large-stack-cmp.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: b LBB0_3 ; CHECK-NEXT: LBB0_2: ; %true ; CHECK-NEXT: bl _bar -; CHECK-NEXT: LBB0_3: ; %common.ret +; CHECK-NEXT: LBB0_3: ; %true ; CHECK-NEXT: add sp, sp, #1, lsl #12 ; =4096 ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll b/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll --- a/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll +++ b/llvm/test/CodeGen/AArch64/ldst-opt-after-block-placement.ll @@ -20,7 +20,7 @@ ; CHECK-NEXT: b.ge .LBB0_4 ; CHECK-NEXT: .LBB0_3: // %exit1 ; CHECK-NEXT: str xzr, [x1, #8] -; CHECK-NEXT: .LBB0_4: // %common.ret +; CHECK-NEXT: .LBB0_4: // %exit2 ; CHECK-NEXT: ret entry: br i1 %cond, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll --- a/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll +++ b/llvm/test/CodeGen/AArch64/ldst-paired-aliasing.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl _Z5setupv ; CHECK-NEXT: movi v0.4s, #1 -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: add x0, sp, #48 ; CHECK-NEXT: mov x1, sp ; CHECK-NEXT: str xzr, [sp, #80] @@ -32,7 +32,7 @@ ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_2: // %if.then ; CHECK-NEXT: bl f2 -; CHECK-NEXT: .LBB0_3: // %common.ret +; CHECK-NEXT: .LBB0_3: // %for.inc ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: add sp, sp, #112 diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -257,12 +257,11 @@ ; CHECK-NEXT: tst x9, x10, lsl #63 ; CHECK-NEXT: b.lt .LBB2_4 ; CHECK-NEXT: // %bb.2: // %test3 -; CHECK-NEXT: and x10, x9, x10, asr #12 -; CHECK-NEXT: cmp x10, #1 -; CHECK-NEXT: b.ge .LBB2_4 +; CHECK-NEXT: tst x9, x10, asr #12 +; CHECK-NEXT: b.gt .LBB2_4 ; CHECK-NEXT: // %bb.3: // %other_exit ; CHECK-NEXT: str x9, [x8] -; CHECK-NEXT: .LBB2_4: // %common.ret +; CHECK-NEXT: .LBB2_4: // %ret ; CHECK-NEXT: ret %val1 = load i64, ptr @var1_64 %val2 = load i64, ptr @var2_64 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll --- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-NEXT: cbz w2, .LBB0_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: cmp w2, #16 ; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: b.hi .LBB0_3 +; CHECK-NEXT: b.hs .LBB0_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: b .LBB0_6 @@ -32,7 +32,7 @@ ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: b.eq .LBB0_8 -; CHECK-NEXT: .LBB0_6: // %for.body.preheader1 +; CHECK-NEXT: .LBB0_6: // %for.body.preheader14 ; CHECK-NEXT: lsl x10, x9, #1 ; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: add x9, x1, x10 diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -19,30 +19,32 @@ ; A53-NEXT: mov x8, x0 ; A53-NEXT: mov x19, x8 ; A53-NEXT: mov w0, w1 -; A53-NEXT: mov w9, #256 +; A53-NEXT: mov w9, #256 // =0x100 ; A53-NEXT: stp x2, x3, [x8, #32] ; A53-NEXT: mov x2, x8 ; A53-NEXT: str q0, [x19, #16]! ; A53-NEXT: str w1, [x19] -; A53-NEXT: mov w1, #4 +; A53-NEXT: mov w1, #4 // =0x4 ; A53-NEXT: str q0, [x8] ; A53-NEXT: strh w9, [x8, #24] ; A53-NEXT: str wzr, [x8, #20] ; A53-NEXT: bl fcntl +; A53-NEXT: ldr w8, [x19] +; A53-NEXT: tbnz w8, #31, .LBB0_6 +; A53-NEXT: // %bb.1: // %while.body.i.preheader ; A53-NEXT: adrp x9, gv0 ; A53-NEXT: add x9, x9, :lo12:gv0 ; A53-NEXT: cmp x19, x9 -; A53-NEXT: b.eq .LBB0_4 -; A53-NEXT: // %bb.1: -; A53-NEXT: ldr w8, [x19] +; A53-NEXT: b.eq .LBB0_5 +; A53-NEXT: // %bb.2: // %while.body.i.split.ver.us.preheader ; A53-NEXT: ldr w9, [x9] ; A53-NEXT: .p2align 4, , 8 -; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us +; A53-NEXT: .LBB0_3: // %while.body.i.split.ver.us ; A53-NEXT: // =>This Inner Loop Header: Depth=1 ; A53-NEXT: lsl w9, w9, #1 ; A53-NEXT: cmp w9, w8 -; A53-NEXT: b.le .LBB0_2 -; A53-NEXT: // %bb.3: // %while.end.i +; A53-NEXT: b.le .LBB0_3 +; A53-NEXT: // %bb.4: // %while.end.i ; A53-NEXT: bl foo ; A53-NEXT: adrp x8, gv1 ; A53-NEXT: str x0, [x8, :lo12:gv1] @@ -52,10 +54,11 @@ ; A53-NEXT: .cfi_restore w30 ; A53-NEXT: ret ; A53-NEXT: .p2align 4, , 8 -; A53-NEXT: .LBB0_4: // %while.body.i.split +; A53-NEXT: .LBB0_5: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 ; A53-NEXT: .cfi_restore_state -; A53-NEXT: b .LBB0_4 +; A53-NEXT: b .LBB0_5 +; A53-NEXT: .LBB0_6: // %if.then.i entry: tail call void @llvm.memset.p0.i64(ptr align 8 %fde, i8 0, i64 40, i1 false) %state = getelementptr inbounds %struct1, ptr %fde, i64 0, i32 4 diff --git a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll --- a/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ b/llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -19,9 +19,8 @@ ; CHECK-NEXT: cbz wzr, .LBB0_4 ; CHECK-NEXT: // %bb.2: // %b3 ; CHECK-NEXT: ldr w8, [x8] -; CHECK-NEXT: and w0, w8, #0x100 -; CHECK-NEXT: cbz w0, .LBB0_5 -; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split +; CHECK-NEXT: tbz w8, #8, .LBB0_5 +; CHECK-NEXT: .LBB0_3: // %b7 ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -32,7 +31,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: cbnz w0, .LBB0_3 -; CHECK-NEXT: .LBB0_5: // %common.ret +; CHECK-NEXT: .LBB0_5: // %b8 ; CHECK-NEXT: ret %c0 = icmp sgt i64 0, 0 br i1 %c0, label %b1, label %b6 diff --git a/llvm/test/CodeGen/AArch64/pr51476.ll b/llvm/test/CodeGen/AArch64/pr51476.ll --- a/llvm/test/CodeGen/AArch64/pr51476.ll +++ b/llvm/test/CodeGen/AArch64/pr51476.ll @@ -5,15 +5,14 @@ ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: cmp w9, #1 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: cmp w8, #1 ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: strb w0, [sp, #12] -; CHECK-NEXT: cbz w8, .LBB0_2 +; CHECK-NEXT: cbnz wzr, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %do_call ; CHECK-NEXT: bl unknown -; CHECK-NEXT: .LBB0_2: // %common.ret +; CHECK-NEXT: .LBB0_2: // %exit ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp = alloca i8 diff --git a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll --- a/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll +++ b/llvm/test/CodeGen/AArch64/rm_redundant_cmp.ll @@ -17,10 +17,12 @@ ; CHECK-NEXT: ldrsh w9, [x8, #2] ; CHECK-NEXT: ldrsh w10, [x8, #4] ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.lt .LBB0_2 -; CHECK-NEXT: // %bb.1: // %if.end8.sink.split +; CHECK-NEXT: b.gt .LBB0_2 +; CHECK-NEXT: // %bb.1: // %if.else +; CHECK-NEXT: b.ne .LBB0_3 +; CHECK-NEXT: .LBB0_2: // %if.then7 ; CHECK-NEXT: strh w9, [x8] -; CHECK-NEXT: .LBB0_2: // %if.end8 +; CHECK-NEXT: .LBB0_3: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2 @@ -49,16 +51,18 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:cost_s_i8_i16 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s_i8_i16] -; CHECK-NEXT: ldrsh w9, [x8, #2] -; CHECK-NEXT: ldrsh w10, [x8, #4] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.gt .LBB1_2 -; CHECK-NEXT: // %bb.1: // %if.else -; CHECK-NEXT: mov w9, w10 -; CHECK-NEXT: b.ge .LBB1_3 -; CHECK-NEXT: .LBB1_2: // %if.end8.sink.split +; CHECK-NEXT: ldrsh w10, [x8, #2] +; CHECK-NEXT: ldrsh w9, [x8, #4] +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: b.le .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: strh w10, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %if.else +; CHECK-NEXT: b.ge .LBB1_4 +; CHECK-NEXT: // %bb.3: // %if.then7 ; CHECK-NEXT: strh w9, [x8] -; CHECK-NEXT: .LBB1_3: // %if.end8 +; CHECK-NEXT: .LBB1_4: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i16, ptr getelementptr inbounds (%struct.s_signed_i16, ptr @cost_s_i8_i16, i64 0, i32 1), align 2 @@ -90,10 +94,12 @@ ; CHECK-NEXT: ldrh w9, [x8, #2] ; CHECK-NEXT: ldrh w10, [x8, #4] ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.lo .LBB2_2 -; CHECK-NEXT: // %bb.1: // %if.end8.sink.split +; CHECK-NEXT: b.hi .LBB2_2 +; CHECK-NEXT: // %bb.1: // %if.else +; CHECK-NEXT: b.ne .LBB2_3 +; CHECK-NEXT: .LBB2_2: // %if.then7 ; CHECK-NEXT: strh w9, [x8] -; CHECK-NEXT: .LBB2_2: // %if.end8 +; CHECK-NEXT: .LBB2_3: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2 @@ -122,16 +128,18 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:cost_u_i16 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_u_i16] -; CHECK-NEXT: ldrh w9, [x8, #2] -; CHECK-NEXT: ldrh w10, [x8, #4] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.hi .LBB3_2 -; CHECK-NEXT: // %bb.1: // %if.else -; CHECK-NEXT: mov w9, w10 -; CHECK-NEXT: b.hs .LBB3_3 -; CHECK-NEXT: .LBB3_2: // %if.end8.sink.split +; CHECK-NEXT: ldrh w10, [x8, #2] +; CHECK-NEXT: ldrh w9, [x8, #4] +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: b.ls .LBB3_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: strh w10, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %if.else +; CHECK-NEXT: b.hs .LBB3_4 +; CHECK-NEXT: // %bb.3: // %if.then7 ; CHECK-NEXT: strh w9, [x8] -; CHECK-NEXT: .LBB3_3: // %if.end8 +; CHECK-NEXT: .LBB3_4: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i16, ptr getelementptr inbounds (%struct.s_unsigned_i16, ptr @cost_u_i16, i64 0, i32 1), align 2 @@ -172,10 +180,12 @@ ; CHECK-NEXT: ldrsb w9, [x8, #1] ; CHECK-NEXT: ldrsb w10, [x8, #2] ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.lt .LBB4_2 -; CHECK-NEXT: // %bb.1: // %if.end8.sink.split +; CHECK-NEXT: b.gt .LBB4_2 +; CHECK-NEXT: // %bb.1: // %if.else +; CHECK-NEXT: b.ne .LBB4_3 +; CHECK-NEXT: .LBB4_2: // %if.then7 ; CHECK-NEXT: strb w9, [x8] -; CHECK-NEXT: .LBB4_2: // %if.end8 +; CHECK-NEXT: .LBB4_3: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2 @@ -204,16 +214,18 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:cost_s ; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_s] -; CHECK-NEXT: ldrsb w9, [x8, #1] -; CHECK-NEXT: ldrsb w10, [x8, #2] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.gt .LBB5_2 -; CHECK-NEXT: // %bb.1: // %if.else -; CHECK-NEXT: mov w9, w10 -; CHECK-NEXT: b.ge .LBB5_3 -; CHECK-NEXT: .LBB5_2: // %if.end8.sink.split +; CHECK-NEXT: ldrsb w10, [x8, #1] +; CHECK-NEXT: ldrsb w9, [x8, #2] +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: b.le .LBB5_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: strb w10, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %if.else +; CHECK-NEXT: b.ge .LBB5_4 +; CHECK-NEXT: // %bb.3: // %if.then7 ; CHECK-NEXT: strb w9, [x8] -; CHECK-NEXT: .LBB5_3: // %if.end8 +; CHECK-NEXT: .LBB5_4: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i8, ptr getelementptr inbounds (%struct.s_signed_i8, ptr @cost_s, i64 0, i32 1), align 2 @@ -245,10 +257,12 @@ ; CHECK-NEXT: ldrb w9, [x8, #1] ; CHECK-NEXT: ldrb w10, [x8, #2] ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.lo .LBB6_2 -; CHECK-NEXT: // %bb.1: // %if.end8.sink.split +; CHECK-NEXT: b.hi .LBB6_2 +; CHECK-NEXT: // %bb.1: // %if.else +; CHECK-NEXT: b.ne .LBB6_3 +; CHECK-NEXT: .LBB6_2: // %if.then7 ; CHECK-NEXT: strb w9, [x8] -; CHECK-NEXT: .LBB6_2: // %if.end8 +; CHECK-NEXT: .LBB6_3: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2 @@ -277,16 +291,18 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:cost_u_i8 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:cost_u_i8] -; CHECK-NEXT: ldrb w9, [x8, #1] -; CHECK-NEXT: ldrb w10, [x8, #2] -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: b.hi .LBB7_2 -; CHECK-NEXT: // %bb.1: // %if.else -; CHECK-NEXT: mov w9, w10 -; CHECK-NEXT: b.hs .LBB7_3 -; CHECK-NEXT: .LBB7_2: // %if.end8.sink.split +; CHECK-NEXT: ldrb w10, [x8, #1] +; CHECK-NEXT: ldrb w9, [x8, #2] +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: b.ls .LBB7_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: strb w10, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %if.else +; CHECK-NEXT: b.hs .LBB7_4 +; CHECK-NEXT: // %bb.3: // %if.then7 ; CHECK-NEXT: strb w9, [x8] -; CHECK-NEXT: .LBB7_3: // %if.end8 +; CHECK-NEXT: .LBB7_4: // %if.end8 ; CHECK-NEXT: ret entry: %0 = load i8, ptr getelementptr inbounds (%struct.s_unsigned_i8, ptr @cost_u_i8, i64 0, i32 1), align 2 diff --git a/llvm/test/CodeGen/AArch64/signbit-test.ll b/llvm/test/CodeGen/AArch64/signbit-test.ll --- a/llvm/test/CodeGen/AArch64/signbit-test.ll +++ b/llvm/test/CodeGen/AArch64/signbit-test.ll @@ -4,9 +4,9 @@ define i64 @test_clear_mask_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i64_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csel x0, x8, x0, ge +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x80000000 +; CHECK-NEXT: csel x0, x0, x8, ne ; CHECK-NEXT: ret entry: %a = and i64 %x, 2147483648 @@ -22,9 +22,9 @@ define i64 @test_set_mask_i64_i32(i64 %x) nounwind { ; CHECK-LABEL: test_set_mask_i64_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: tst x0, #0x80000000 -; CHECK-NEXT: csel x0, x8, x0, ne +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x80000000 +; CHECK-NEXT: csel x0, x0, x8, eq ; CHECK-NEXT: ret entry: %a = and i64 %x, 2147483648 @@ -40,9 +40,9 @@ define i64 @test_clear_mask_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i64_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: tst x0, #0x8000 -; CHECK-NEXT: csel x0, x8, x0, eq +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x8000 +; CHECK-NEXT: csel x0, x0, x8, ne ; CHECK-NEXT: ret entry: %a = and i64 %x, 32768 @@ -58,9 +58,9 @@ define i64 @test_set_mask_i64_i16(i64 %x) nounwind { ; CHECK-LABEL: test_set_mask_i64_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: tst x0, #0x8000 -; CHECK-NEXT: csel x0, x8, x0, ne +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x8000 +; CHECK-NEXT: csel x0, x0, x8, eq ; CHECK-NEXT: ret entry: %a = and i64 %x, 32768 @@ -76,9 +76,9 @@ define i64 @test_clear_mask_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i64_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: tst x0, #0x80 -; CHECK-NEXT: csel x0, x8, x0, eq +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x80 +; CHECK-NEXT: csel x0, x0, x8, ne ; CHECK-NEXT: ret entry: %a = and i64 %x, 128 @@ -94,9 +94,9 @@ define i64 @test_set_mask_i64_i8(i64 %x) nounwind { ; CHECK-LABEL: test_set_mask_i64_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: tst x0, #0x80 -; CHECK-NEXT: csel x0, x8, x0, ne +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: tst w0, #0x80 +; CHECK-NEXT: csel x0, x0, x8, eq ; CHECK-NEXT: ret entry: %a = and i64 %x, 128 @@ -112,9 +112,9 @@ define i32 @test_clear_mask_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i32_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x8000 -; CHECK-NEXT: csel w0, w8, w0, eq +; CHECK-NEXT: csel w0, w0, w8, ne ; CHECK-NEXT: ret entry: %a = and i32 %x, 32768 @@ -130,9 +130,9 @@ define i32 @test_set_mask_i32_i16(i32 %x) nounwind { ; CHECK-LABEL: test_set_mask_i32_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x8000 -; CHECK-NEXT: csel w0, w8, w0, ne +; CHECK-NEXT: csel w0, w0, w8, eq ; CHECK-NEXT: ret entry: %a = and i32 %x, 32768 @@ -148,9 +148,9 @@ define i32 @test_clear_mask_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i32_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x80 -; CHECK-NEXT: csel w0, w8, w0, eq +; CHECK-NEXT: csel w0, w0, w8, ne ; CHECK-NEXT: ret entry: %a = and i32 %x, 128 @@ -166,9 +166,9 @@ define i32 @test_set_mask_i32_i8(i32 %x) nounwind { ; CHECK-LABEL: test_set_mask_i32_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x80 -; CHECK-NEXT: csel w0, w8, w0, ne +; CHECK-NEXT: csel w0, w0, w8, eq ; CHECK-NEXT: ret entry: %a = and i32 %x, 128 @@ -184,9 +184,10 @@ define i16 @test_clear_mask_i16_i8(i16 %x) nounwind { ; CHECK-LABEL: test_clear_mask_i16_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: tst w0, #0x80 -; CHECK-NEXT: csel w0, w8, w0, eq +; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret entry: %a = and i16 %x, 128 @@ -202,9 +203,10 @@ define i16 @test_set_mask_i16_i8(i16 %x) nounwind { ; CHECK-LABEL: test_set_mask_i16_i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: tst w0, #0x80 -; CHECK-NEXT: csel w0, w8, w0, ne +; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret entry: %a = and i16 %x, 128 @@ -220,9 +222,10 @@ define i16 @test_set_mask_i16_i7(i16 %x) nounwind { ; CHECK-LABEL: test_set_mask_i16_i7: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: tst w0, #0x40 -; CHECK-NEXT: csel w0, w8, w0, ne +; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret entry: %a = and i16 %x, 64 diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -175,11 +175,11 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: ldr x0, [sp, #8] ; CHECK-NEXT: bl consume -; CHECK-NEXT: b .LBB8_3 -; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: mov w19, #1 // =0x1 -; CHECK-NEXT: .LBB8_3: // %common.ret ; CHECK-NEXT: and w0, w19, #0x1 +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: // %right +; CHECK-NEXT: mov w0, #1 // =0x1 +; CHECK-NEXT: .LBB8_3: // %right ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll --- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll +++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll @@ -13,22 +13,15 @@ define @wide_32i8(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB0_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -41,22 +34,15 @@ define @wide_16i16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB1_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -69,22 +55,15 @@ define @wide_8i32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB2_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -97,22 +76,15 @@ define @wide_4i64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB3_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -125,22 +97,15 @@ define @wide_16f16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB4_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB4_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -153,22 +118,15 @@ define @wide_8f32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB5_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB5_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -181,22 +139,15 @@ define @wide_4f64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z1.d -; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z2.d ; CHECK-NEXT: tbz w0, #0, .LBB6_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB6_2: // %common.ret -; CHECK-NEXT: mov z0.d, z9.d -; CHECK-NEXT: mov z1.d, z8.d -; CHECK-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #2 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -213,26 +164,16 @@ define @wide_48i8(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_48i8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB7_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB7_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -245,26 +186,16 @@ define @wide_24i16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_24i16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB8_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -277,26 +208,16 @@ define @wide_12i32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_12i32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB9_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -309,26 +230,16 @@ define @wide_6i64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_6i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB10_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB10_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB10_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -341,26 +252,16 @@ define @wide_24f16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_24f16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB11_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB11_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -373,26 +274,16 @@ define @wide_12f32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_12f32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB12_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB12_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB12_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -405,26 +296,16 @@ define @wide_6f64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_6f64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-3 -; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z1.d -; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z3.d ; CHECK-NEXT: tbz w0, #0, .LBB13_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB13_2: // %common.ret -; CHECK-NEXT: mov z0.d, z10.d -; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z8.d -; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB13_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -441,30 +322,17 @@ define @wide_64i8(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB14_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB14_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB14_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -477,30 +345,17 @@ define @wide_32i16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB15_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB15_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB15_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -513,30 +368,17 @@ define @wide_16i32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB16_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB16_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB16_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -549,30 +391,17 @@ define @wide_8i64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB17_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB17_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -585,30 +414,17 @@ define @wide_32f16(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB18_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB18_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB18_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -621,30 +437,17 @@ define @wide_16f32(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB19_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB19_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB19_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: @@ -657,30 +460,17 @@ define @wide_8f64(i1 %b, %legal, %illegal) nounwind { ; CHECK-LABEL: wide_8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov z11.d, z1.d -; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK-NEXT: mov z8.d, z4.d ; CHECK-NEXT: tbz w0, #0, .LBB20_2 ; CHECK-NEXT: // %bb.1: // %L1 +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl bar -; CHECK-NEXT: .LBB20_2: // %common.ret -; CHECK-NEXT: mov z0.d, z11.d -; CHECK-NEXT: mov z1.d, z10.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov z2.d, z9.d -; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: addvl sp, sp, #4 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB20_2: // %L2 +; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: mov z3.d, z4.d ; CHECK-NEXT: ret br i1 %b, label %L1, label %L2 L1: diff --git a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll @@ -50,7 +50,7 @@ ; ASM-NEXT: add x8, x8, x9 ; ASM-NEXT: cmp x8, #1024 ; ASM-NEXT: b.ne .LBB0_1 -; ASM-NEXT: // %bb.2: // %exit +; ASM-NEXT: // %bb.2: // %loop.exit ; ASM-NEXT: ret entry: br label %loop.ph @@ -123,7 +123,7 @@ ; ASM-NEXT: add x8, x8, x9 ; ASM-NEXT: cmp x2, x8 ; ASM-NEXT: b.ne .LBB1_1 -; ASM-NEXT: // %bb.2: // %exit +; ASM-NEXT: // %bb.2: // %loop.exit ; ASM-NEXT: ret entry: br label %loop.ph diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll @@ -17,7 +17,7 @@ ; i8 define void @subvector_v4i8(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4i8: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] @@ -32,7 +32,7 @@ define void @subvector_v8i8(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v8i8: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -46,7 +46,7 @@ define void @subvector_v16i8(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v16i8: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -60,9 +60,9 @@ define void @subvector_v32i8(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v32i8: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in br label %bb1 @@ -75,7 +75,7 @@ ; i16 define void @subvector_v2i16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2i16: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldrh w8, [x0, #2] @@ -97,7 +97,7 @@ define void @subvector_v4i16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4i16: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -111,7 +111,7 @@ define void @subvector_v8i16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v8i16: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -125,9 +125,9 @@ define void @subvector_v16i16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v16i16: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %in br label %bb1 @@ -140,7 +140,7 @@ ; i32 define void @subvector_v2i32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2i32: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -154,7 +154,7 @@ define void @subvector_v4i32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4i32: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -168,9 +168,9 @@ define void @subvector_v8i32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v8i32: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <8 x i32>, ptr %in br label %bb1 @@ -183,7 +183,7 @@ ; i64 define void @subvector_v2i64(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2i64: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -197,9 +197,9 @@ define void @subvector_v4i64(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4i64: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <4 x i64>, ptr %in br label %bb1 @@ -212,8 +212,9 @@ ; f16 define void @subvector_v2f16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2f16: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldr w8, [x0] +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x1] ; CHECK-NEXT: ret %a = load <2 x half>, ptr %in @@ -226,7 +227,7 @@ define void @subvector_v4f16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4f16: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -240,7 +241,7 @@ define void @subvector_v8f16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v8f16: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -254,9 +255,9 @@ define void @subvector_v16f16(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v16f16: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <16 x half>, ptr %in br label %bb1 @@ -269,7 +270,7 @@ ; f32 define void @subvector_v2f32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2f32: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -283,7 +284,7 @@ define void @subvector_v4f32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4f32: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -297,9 +298,9 @@ define void @subvector_v8f32(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v8f32: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <8 x float>,ptr %in br label %bb1 @@ -312,7 +313,7 @@ ; f64 define void @subvector_v2f64(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v2f64: -; CHECK: // %bb.0: // %bb1 +; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -326,9 +327,9 @@ define void @subvector_v4f64(ptr %in, ptr %out) { ; CHECK-LABEL: subvector_v4f64: -; CHECK: // %bb.0: // %bb1 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK: // %bb.0: +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <4 x double>, ptr %in br label %bb1 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -416,13 +416,13 @@ ; CHECK-APPLE-NEXT: ; %bb.1: ; %gen_error ; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-NEXT: bl _malloc -; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 ; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1 +; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret -; CHECK-APPLE-NEXT: LBB3_2: +; CHECK-APPLE-NEXT: LBB3_2: ; %normal ; CHECK-APPLE-NEXT: movi d0, #0000000000000000 ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: subs w8, w2, #1 ; CHECK-NEXT: b.lt .LBB0_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w8, #6 -; CHECK-NEXT: b.hi .LBB0_3 +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: b.hs .LBB0_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 @@ -50,7 +50,7 @@ ; CHECK-NEXT: // %bb.5: // %middle.block ; CHECK-NEXT: cmp x11, x10 ; CHECK-NEXT: b.eq .LBB0_8 -; CHECK-NEXT: .LBB0_6: // %for.body.preheader1 +; CHECK-NEXT: .LBB0_6: // %for.body.preheader21 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 @@ -147,8 +147,8 @@ ; CHECK-NEXT: subs w8, w2, #1 ; CHECK-NEXT: b.lt .LBB1_7 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB1_4 +; CHECK-NEXT: cmp w8, #3 +; CHECK-NEXT: b.lo .LBB1_4 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: ubfiz x9, x8, #1, #32 ; CHECK-NEXT: add x9, x9, #2 @@ -163,7 +163,7 @@ ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB1_5: // %for.body.preheader1 +; CHECK-NEXT: .LBB1_5: // %for.body.preheader35 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 @@ -322,8 +322,8 @@ ; CHECK-NEXT: subs w8, w2, #1 ; CHECK-NEXT: b.lt .LBB2_9 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB2_6 +; CHECK-NEXT: cmp w8, #3 +; CHECK-NEXT: b.lo .LBB2_6 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: add x9, x8, w8, uxtw #1 ; CHECK-NEXT: add x9, x9, #3 @@ -378,7 +378,7 @@ ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB2_7: // %for.body.preheader1 +; CHECK-NEXT: .LBB2_7: // %for.body.preheader46 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 @@ -531,8 +531,8 @@ ; CHECK-NEXT: subs w8, w2, #1 ; CHECK-NEXT: b.lt .LBB3_7 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB3_4 +; CHECK-NEXT: cmp w8, #3 +; CHECK-NEXT: b.lo .LBB3_4 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: ubfiz x9, x8, #2, #32 ; CHECK-NEXT: add x9, x9, #4 @@ -547,7 +547,7 @@ ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB3_5: // %for.body.preheader1 +; CHECK-NEXT: .LBB3_5: // %for.body.preheader57 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 diff --git a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll @@ -12,8 +12,8 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl w9, w8, #1 ; CHECK-NEXT: sub w10, w8, #2 -; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w10, w9, lo +; CHECK-NEXT: cmp w8, #253 +; CHECK-NEXT: csel w8, w9, w10, hi ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -60,8 +60,8 @@ ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl w9, w8, #1 ; CHECK-NEXT: sub w10, w8, #2 -; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w10, w9, lo +; CHECK-NEXT: cmp w8, #253 +; CHECK-NEXT: csel w8, w9, w10, hi ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -103,12 +103,11 @@ ; CHECK-LABEL: phi_i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: .LBB2_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmp w8, #128 -; CHECK-NEXT: cinc w10, w9, lo -; CHECK-NEXT: add w8, w8, w10 +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: add w9, w8, #2 +; CHECK-NEXT: csinc w8, w9, w8, ls ; CHECK-NEXT: cmp w8, #253 ; CHECK-NEXT: b.lo .LBB2_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -142,12 +141,11 @@ ; CHECK-LABEL: ret_i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB3_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmp w0, #128 -; CHECK-NEXT: cinc w9, w8, lo -; CHECK-NEXT: add w0, w0, w9 +; CHECK-NEXT: cmp w0, #127 +; CHECK-NEXT: add w8, w0, #2 +; CHECK-NEXT: csinc w0, w8, w0, ls ; CHECK-NEXT: cmp w0, #252 ; CHECK-NEXT: b.hi .LBB3_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -181,12 +179,11 @@ ; CHECK-LABEL: phi_multiple_undefs: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: .LBB4_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cmp w8, #128 -; CHECK-NEXT: cinc w10, w9, lo -; CHECK-NEXT: add w8, w8, w10 +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: add w9, w8, #2 +; CHECK-NEXT: csinc w8, w9, w8, ls ; CHECK-NEXT: cmp w8, #253 ; CHECK-NEXT: b.lo .LBB4_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/AArch64/use-cr-result-of-dom-icmp-st.ll @@ -19,9 +19,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 ; CHECK-NEXT: cmn x8, #2 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB0_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -42,11 +46,14 @@ ; CHECK-LABEL: ll_a_op_b__1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tbnz x8, #63, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_2: // %if.end ; CHECK-NEXT: cmn x8, #1 -; CHECK-NEXT: csinc x9, x1, xzr, eq -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: mul x9, x9, x0 -; CHECK-NEXT: csel x0, x1, x9, ge +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -68,9 +75,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 ; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB2_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -92,9 +103,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 ; CHECK-NEXT: cmp x8, #1 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB3_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -116,9 +131,13 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: lsl x8, x0, x1 ; CHECK-NEXT: cmp x8, #2 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB4_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, %b @@ -139,9 +158,13 @@ ; CHECK-LABEL: ll_a__2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmn x0, #2 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB5_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i64 %a, -2 @@ -160,11 +183,14 @@ define i64 @ll_a__1(i64 %a, i64 %b) { ; CHECK-LABEL: ll_a__1: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbnz x0, #63, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_2: // %if.end ; CHECK-NEXT: cmn x0, #1 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, ge +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i64 %a, -1 @@ -184,9 +210,13 @@ ; CHECK-LABEL: ll_a_0: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB7_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB7_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i64 %a, 0 @@ -206,9 +236,13 @@ ; CHECK-LABEL: ll_a_1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB8_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i64 %a, 1 @@ -228,9 +262,13 @@ ; CHECK-LABEL: ll_a_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x0, #2 -; CHECK-NEXT: csinc x8, x1, xzr, eq -; CHECK-NEXT: mul x8, x8, x0 -; CHECK-NEXT: csel x0, x1, x8, gt +; CHECK-NEXT: b.le .LBB9_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_2: // %if.end +; CHECK-NEXT: csinc x8, x1, xzr, ge +; CHECK-NEXT: mul x0, x8, x0 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i64 %a, 2 @@ -249,12 +287,15 @@ define i64 @i_a_op_b__2(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b__2: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: cmn w8, #2 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: b.gt .LBB10_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB10_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, %b @@ -276,13 +317,17 @@ define i64 @i_a_op_b__1(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b__1: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tbnz w8, #31, .LBB11_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: sxtw x0, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_2: // %if.end ; CHECK-NEXT: cmn w8, #1 -; CHECK-NEXT: csinc w9, w1, wzr, eq -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: mul w9, w9, w0 -; CHECK-NEXT: csel w8, w1, w9, ge -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, %b @@ -304,12 +349,17 @@ define i64 @i_a_op_b_0(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b_0: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: b.le .LBB12_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: sxtw x0, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB12_2: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, %b @@ -331,12 +381,15 @@ define i64 @i_a_op_b_1(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b_1: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: cmp w8, #1 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: b.gt .LBB13_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB13_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, %b @@ -358,12 +411,15 @@ define i64 @i_a_op_b_2(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a_op_b_2: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsl w8, w0, w1 ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: b.gt .LBB14_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB14_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, %b @@ -386,10 +442,13 @@ ; CHECK-LABEL: i_a__2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmn w0, #2 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: b.gt .LBB15_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB15_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %a, -2 @@ -410,12 +469,16 @@ define i64 @i_a__1(i32 signext %a, i32 signext %b) { ; CHECK-LABEL: i_a__1: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: tbnz w0, #31, .LBB16_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: sxtw x0, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB16_2: // %if.end ; CHECK-NEXT: cmn w0, #1 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, ge -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %a, -1 @@ -437,10 +500,15 @@ ; CHECK-LABEL: i_a_0: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: b.le .LBB17_2 +; CHECK-NEXT: // %bb.1: // %return +; CHECK-NEXT: sxtw x0, w1 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_2: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %a, 0 @@ -462,10 +530,13 @@ ; CHECK-LABEL: i_a_1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: b.gt .LBB18_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB18_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %a, 1 @@ -487,10 +558,13 @@ ; CHECK-LABEL: i_a_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #2 -; CHECK-NEXT: csinc w8, w1, wzr, eq -; CHECK-NEXT: mul w8, w8, w0 -; CHECK-NEXT: csel w8, w1, w8, gt -; CHECK-NEXT: sxtw x0, w8 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: b.gt .LBB19_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: csinc w8, w1, wzr, ge +; CHECK-NEXT: mul w1, w8, w0 +; CHECK-NEXT: .LBB19_2: // %return +; CHECK-NEXT: sxtw x0, w1 ; CHECK-NEXT: ret entry: %cmp = icmp sgt i32 %a, 2 diff --git a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll --- a/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll +++ b/llvm/test/Transforms/AtomicExpand/AMDGPU/expand-atomic-simplify-cfg-CAS-block.ll @@ -21,9 +21,7 @@ ; GFX90A-NEXT: [[SUCCESS:%.*]] = extractvalue { i32, i1 } [[TMP3]], 1 ; GFX90A-NEXT: [[NEWLOADED:%.*]] = extractvalue { i32, i1 } [[TMP3]], 0 ; GFX90A-NEXT: [[TMP4]] = bitcast i32 [[NEWLOADED]] to float -; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ATOMICRMW_END:%.*]], label [[ATOMICRMW_START]] -; GFX90A: atomicrmw.end: -; GFX90A-NEXT: br label [[ENDIF:%.*]] +; GFX90A-NEXT: br i1 [[SUCCESS]], label [[ENDIF:%.*]], label [[ATOMICRMW_START]] ; GFX90A: else: ; GFX90A-NEXT: [[TMP5:%.*]] = load float, ptr addrspace(1) [[OUT]], align 4 ; GFX90A-NEXT: br label [[ATOMICRMW_START2:%.*]] @@ -36,11 +34,9 @@ ; GFX90A-NEXT: [[SUCCESS5:%.*]] = extractvalue { i32, i1 } [[TMP8]], 1 ; GFX90A-NEXT: [[NEWLOADED6:%.*]] = extractvalue { i32, i1 } [[TMP8]], 0 ; GFX90A-NEXT: [[TMP9]] = bitcast i32 [[NEWLOADED6]] to float -; GFX90A-NEXT: br i1 [[SUCCESS5]], label [[ATOMICRMW_END1:%.*]], label [[ATOMICRMW_START2]] -; GFX90A: atomicrmw.end1: -; GFX90A-NEXT: br label [[ENDIF]] +; GFX90A-NEXT: br i1 [[SUCCESS5]], label [[ENDIF]], label [[ATOMICRMW_START2]] ; GFX90A: endif: -; GFX90A-NEXT: [[COMBINE:%.*]] = phi float [ [[TMP4]], [[ATOMICRMW_END]] ], [ [[TMP9]], [[ATOMICRMW_END1]] ] +; GFX90A-NEXT: [[COMBINE:%.*]] = phi float [ [[TMP4]], [[ATOMICRMW_START]] ], [ [[TMP9]], [[ATOMICRMW_START2]] ] ; GFX90A-NEXT: store float [[COMBINE]], ptr addrspace(1) [[OUT]], align 4 ; GFX90A-NEXT: ret void ; diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -86,7 +86,7 @@ ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w2, #0x1 -; CHECK-NEXT: csel x9, x1, x0, ne +; CHECK-NEXT: csel x9, x0, x1, eq ; CHECK-NEXT: cbz x9, .LBB2_3 ; CHECK-NEXT: // %bb.1: // %while_cond.preheader ; CHECK-NEXT: mov w10, #40000 // =0x9c40 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-pre-inc-offset-check.ll @@ -19,10 +19,10 @@ define void @test_lsr_pre_inc_offset_check(ptr %p) { ; CHECK-LABEL: test_lsr_pre_inc_offset_check: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #165 +; CHECK-NEXT: mov w8, #165 // =0xa5 ; CHECK-NEXT: add x9, x0, #339 -; CHECK-NEXT: mov w10, #2 -; CHECK-NEXT: .LBB0_1: // %main +; CHECK-NEXT: mov w10, #2 // =0x2 +; CHECK-NEXT: .LBB0_1: // %if.then ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: str wzr, [x9] ; CHECK-NEXT: subs x8, x8, #1