diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -361,361 +361,6 @@ return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic; } -bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo, - SmallPtrSetImpl &Visited) { - const PPCTargetMachine &TM = ST->getTargetMachine(); - - // Loop through the inline asm constraints and look for something that - // clobbers ctr. - auto asmClobbersCTR = [](InlineAsm *IA) { - InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints(); - for (const InlineAsm::ConstraintInfo &C : CIV) { - if (C.Type != InlineAsm::isInput) - for (const auto &Code : C.Codes) - if (StringRef(Code).equals_insensitive("{ctr}")) - return true; - } - return false; - }; - - auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) { - if (IntegerType *ITy = dyn_cast(Ty)) - return ITy->getBitWidth() > (Is32Bit ? 32U : 64U); - - return false; - }; - - auto supportedHalfPrecisionOp = [](Instruction *Inst) { - switch (Inst->getOpcode()) { - default: - return false; - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::Load: - case Instruction::Store: - case Instruction::FPToUI: - case Instruction::UIToFP: - case Instruction::FPToSI: - case Instruction::SIToFP: - return true; - } - }; - - for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); - J != JE; ++J) { - // There are no direct operations on half precision so assume that - // anything with that type requires a call except for a few select - // operations with Power9. - if (Instruction *CurrInst = dyn_cast(J)) { - for (const auto &Op : CurrInst->operands()) { - if (Op->getType()->getScalarType()->isHalfTy() || - CurrInst->getType()->getScalarType()->isHalfTy()) - return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst)); - } - } - if (CallInst *CI = dyn_cast(J)) { - // Inline ASM is okay, unless it clobbers the ctr register. - if (InlineAsm *IA = dyn_cast(CI->getCalledOperand())) { - if (asmClobbersCTR(IA)) - return true; - continue; - } - - if (Function *F = CI->getCalledFunction()) { - // Most intrinsics don't become function calls, but some might. - // sin, cos, exp and log are always calls. - unsigned Opcode = 0; - if (F->getIntrinsicID() != Intrinsic::not_intrinsic) { - switch (F->getIntrinsicID()) { - default: continue; - // If we have a call to loop_decrement or set_loop_iterations, - // we're definitely using CTR. - case Intrinsic::set_loop_iterations: - case Intrinsic::loop_decrement: - return true; - - // Binary operations on 128-bit value will use CTR. - case Intrinsic::experimental_constrained_fadd: - case Intrinsic::experimental_constrained_fsub: - case Intrinsic::experimental_constrained_fmul: - case Intrinsic::experimental_constrained_fdiv: - case Intrinsic::experimental_constrained_frem: - if (F->getType()->getScalarType()->isFP128Ty() || - F->getType()->getScalarType()->isPPC_FP128Ty()) - return true; - break; - - case Intrinsic::experimental_constrained_fptosi: - case Intrinsic::experimental_constrained_fptoui: - case Intrinsic::experimental_constrained_sitofp: - case Intrinsic::experimental_constrained_uitofp: { - Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType(); - Type *DstType = CI->getType()->getScalarType(); - if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() || - isLargeIntegerTy(!TM.isPPC64(), SrcType) || - isLargeIntegerTy(!TM.isPPC64(), DstType)) - return true; - break; - } - - // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp - // because, although it does clobber the counter register, the - // control can't then return to inside the loop unless there is also - // an eh_sjlj_setjmp. - case Intrinsic::eh_sjlj_setjmp: - - case Intrinsic::memcpy: - case Intrinsic::memmove: - case Intrinsic::memset: - case Intrinsic::powi: - case Intrinsic::log: - case Intrinsic::log2: - case Intrinsic::log10: - case Intrinsic::exp: - case Intrinsic::exp2: - case Intrinsic::pow: - case Intrinsic::sin: - case Intrinsic::cos: - case Intrinsic::experimental_constrained_powi: - case Intrinsic::experimental_constrained_log: - case Intrinsic::experimental_constrained_log2: - case Intrinsic::experimental_constrained_log10: - case Intrinsic::experimental_constrained_exp: - case Intrinsic::experimental_constrained_exp2: - case Intrinsic::experimental_constrained_pow: - case Intrinsic::experimental_constrained_sin: - case Intrinsic::experimental_constrained_cos: - return true; - case Intrinsic::copysign: - if (CI->getArgOperand(0)->getType()->getScalarType()-> - isPPC_FP128Ty()) - return true; - else - continue; // ISD::FCOPYSIGN is never a library call. - case Intrinsic::fmuladd: - case Intrinsic::fma: Opcode = ISD::FMA; break; - case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; - case Intrinsic::floor: Opcode = ISD::FFLOOR; break; - case Intrinsic::ceil: Opcode = ISD::FCEIL; break; - case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; - case Intrinsic::rint: Opcode = ISD::FRINT; break; - case Intrinsic::lrint: Opcode = ISD::LRINT; break; - case Intrinsic::llrint: Opcode = ISD::LLRINT; break; - case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; - case Intrinsic::round: Opcode = ISD::FROUND; break; - case Intrinsic::lround: Opcode = ISD::LROUND; break; - case Intrinsic::llround: Opcode = ISD::LLROUND; break; - case Intrinsic::minnum: Opcode = ISD::FMINNUM; break; - case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break; - case Intrinsic::experimental_constrained_fcmp: - Opcode = ISD::STRICT_FSETCC; - break; - case Intrinsic::experimental_constrained_fcmps: - Opcode = ISD::STRICT_FSETCCS; - break; - case Intrinsic::experimental_constrained_fma: - Opcode = ISD::STRICT_FMA; - break; - case Intrinsic::experimental_constrained_sqrt: - Opcode = ISD::STRICT_FSQRT; - break; - case Intrinsic::experimental_constrained_floor: - Opcode = ISD::STRICT_FFLOOR; - break; - case Intrinsic::experimental_constrained_ceil: - Opcode = ISD::STRICT_FCEIL; - break; - case Intrinsic::experimental_constrained_trunc: - Opcode = ISD::STRICT_FTRUNC; - break; - case Intrinsic::experimental_constrained_rint: - Opcode = ISD::STRICT_FRINT; - break; - case Intrinsic::experimental_constrained_lrint: - Opcode = ISD::STRICT_LRINT; - break; - case Intrinsic::experimental_constrained_llrint: - Opcode = ISD::STRICT_LLRINT; - break; - case Intrinsic::experimental_constrained_nearbyint: - Opcode = ISD::STRICT_FNEARBYINT; - break; - case Intrinsic::experimental_constrained_round: - Opcode = ISD::STRICT_FROUND; - break; - case Intrinsic::experimental_constrained_lround: - Opcode = ISD::STRICT_LROUND; - break; - case Intrinsic::experimental_constrained_llround: - Opcode = ISD::STRICT_LLROUND; - break; - case Intrinsic::experimental_constrained_minnum: - Opcode = ISD::STRICT_FMINNUM; - break; - case Intrinsic::experimental_constrained_maxnum: - Opcode = ISD::STRICT_FMAXNUM; - break; - case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break; - case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break; - } - } - - // PowerPC does not use [US]DIVREM or other library calls for - // operations on regular types which are not otherwise library calls - // (i.e. soft float or atomics). If adapting for targets that do, - // additional care is required here. - - LibFunc Func; - if (!F->hasLocalLinkage() && F->hasName() && LibInfo && - LibInfo->getLibFunc(F->getName(), Func) && - LibInfo->hasOptimizedCodeGen(Func)) { - // Non-read-only functions are never treated as intrinsics. - if (!CI->onlyReadsMemory()) - return true; - - // Conversion happens only for FP calls. - if (!CI->getArgOperand(0)->getType()->isFloatingPointTy()) - return true; - - switch (Func) { - default: return true; - case LibFunc_copysign: - case LibFunc_copysignf: - continue; // ISD::FCOPYSIGN is never a library call. - case LibFunc_copysignl: - return true; - case LibFunc_fabs: - case LibFunc_fabsf: - case LibFunc_fabsl: - continue; // ISD::FABS is never a library call. - case LibFunc_sqrt: - case LibFunc_sqrtf: - case LibFunc_sqrtl: - Opcode = ISD::FSQRT; break; - case LibFunc_floor: - case LibFunc_floorf: - case LibFunc_floorl: - Opcode = ISD::FFLOOR; break; - case LibFunc_nearbyint: - case LibFunc_nearbyintf: - case LibFunc_nearbyintl: - Opcode = ISD::FNEARBYINT; break; - case LibFunc_ceil: - case LibFunc_ceilf: - case LibFunc_ceill: - Opcode = ISD::FCEIL; break; - case LibFunc_rint: - case LibFunc_rintf: - case LibFunc_rintl: - Opcode = ISD::FRINT; break; - case LibFunc_round: - case LibFunc_roundf: - case LibFunc_roundl: - Opcode = ISD::FROUND; break; - case LibFunc_trunc: - case LibFunc_truncf: - case LibFunc_truncl: - Opcode = ISD::FTRUNC; break; - case LibFunc_fmin: - case LibFunc_fminf: - case LibFunc_fminl: - Opcode = ISD::FMINNUM; break; - case LibFunc_fmax: - case LibFunc_fmaxf: - case LibFunc_fmaxl: - Opcode = ISD::FMAXNUM; break; - } - } - - if (Opcode) { - EVT EVTy = - TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true); - - if (EVTy == MVT::Other) - return true; - - if (TLI->isOperationLegalOrCustom(Opcode, EVTy)) - continue; - else if (EVTy.isVector() && - TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType())) - continue; - - return true; - } - } - - return true; - } else if ((J->getType()->getScalarType()->isFP128Ty() || - J->getType()->getScalarType()->isPPC_FP128Ty())) { - // Most operations on f128 or ppc_f128 values become calls. - return true; - } else if (isa(J) && - J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) { - return true; - } else if ((isa(J) || isa(J)) && - (cast(J)->getSrcTy()->getScalarType()->isFP128Ty() || - cast(J)->getDestTy()->getScalarType()->isFP128Ty())) { - return true; - } else if (isa(J) || isa(J) || - isa(J) || isa(J)) { - CastInst *CI = cast(J); - if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() || - CI->getDestTy()->getScalarType()->isPPC_FP128Ty() || - isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) || - isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType())) - return true; - } else if (isLargeIntegerTy(!TM.isPPC64(), - J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::UDiv || - J->getOpcode() == Instruction::SDiv || - J->getOpcode() == Instruction::URem || - J->getOpcode() == Instruction::SRem)) { - return true; - } else if (!TM.isPPC64() && - isLargeIntegerTy(false, J->getType()->getScalarType()) && - (J->getOpcode() == Instruction::Shl || - J->getOpcode() == Instruction::AShr || - J->getOpcode() == Instruction::LShr)) { - // Only on PPC32, for 128-bit integers (specifically not 64-bit - // integers), these might be runtime calls. - return true; - } else if (isa(J) || isa(J)) { - // On PowerPC, indirect jumps use the counter register. - return true; - } else if (SwitchInst *SI = dyn_cast(J)) { - if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries()) - return true; - } - - // FREM is always a call. - if (J->getOpcode() == Instruction::FRem) - return true; - - if (ST->useSoftFloat()) { - switch(J->getOpcode()) { - case Instruction::FAdd: - case Instruction::FSub: - case Instruction::FMul: - case Instruction::FDiv: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::FCmp: - return true; - } - } - - for (Value *Operand : J->operands()) - if (memAddrUsesCTR(Operand, TM, Visited)) - return true; - } - - return false; -} - bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, @@ -737,14 +382,6 @@ return false; } - // We don't want to spill/restore the counter register, and so we don't - // want to use the counter register if the loop contains calls. - SmallPtrSet Visited; - for (Loop::block_iterator I = L->block_begin(), IE = L->block_end(); - I != IE; ++I) - if (mightUseCTR(*I, LibInfo, Visited)) - return false; - SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); @@ -774,6 +411,7 @@ // address for that value will be computed in the loop. SmallVector ExitBlocks; L->getExitBlocks(ExitBlocks); + SmallPtrSet Visited; for (auto &BB : ExitBlocks) { for (auto &PHI : BB->phis()) { for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx; diff --git a/llvm/test/CodeGen/PowerPC/branch-opt.ll b/llvm/test/CodeGen/PowerPC/branch-opt.ll --- a/llvm/test/CodeGen/PowerPC/branch-opt.ll +++ b/llvm/test/CodeGen/PowerPC/branch-opt.ll @@ -8,11 +8,13 @@ ; The last (whichever it is) should have a fallthrough exit, and the other three ; need an unconditional branch. No other block should have an unconditional ; branch to cond_next48 +; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy +; of %cond_next48, so there should only be two unconditional branches. -;CHECK: .LBB0_7: # %cond_next48 -;CHECK: b .LBB0_7 -;CHECK: b .LBB0_7 -;CHECK: b .LBB0_7 +;CHECK: b .LBB0_13 +;CHECK: b .LBB0_13 +;CHECK-NOT: b .LBB0_13 +;CHECK: .LBB0_13: # %cond_next48 define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) { entry: diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll --- a/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-constrained-fp.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: stdu 1, -64(1) -; CHECK-NEXT: li 30, 0 +; CHECK-NEXT: li 30, 255 ; CHECK-NEXT: addi 29, 3, -8 ; CHECK-NEXT: std 0, 80(1) ; CHECK-NEXT: .p2align 5 @@ -22,10 +22,10 @@ ; CHECK-NEXT: lfdu 1, 8(29) ; CHECK-NEXT: bl cos ; CHECK-NEXT: nop -; CHECK-NEXT: addi 30, 30, 8 +; CHECK-NEXT: addi 30, 30, -1 ; CHECK-NEXT: stfd 1, 0(29) -; CHECK-NEXT: cmpldi 30, 2040 -; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: cmpldi 30, 0 +; CHECK-NEXT: bc 12, 1, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %exit ; CHECK-NEXT: addi 1, 1, 64 ; CHECK-NEXT: ld 0, 16(1) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll --- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll @@ -9,27 +9,26 @@ define void @fmul_ctrloop_fp128() nounwind { ; PWR9-LABEL: fmul_ctrloop_fp128: ; PWR9: # %bb.0: # %entry -; PWR9-NEXT: addis 5, 2, a@toc@ha +; PWR9-NEXT: addis 3, 2, a@toc@ha +; PWR9-NEXT: li 4, 4 +; PWR9-NEXT: addi 3, 3, a@toc@l +; PWR9-NEXT: lxv 34, 0(3) ; PWR9-NEXT: addis 3, 2, y@toc@ha +; PWR9-NEXT: mtctr 4 ; PWR9-NEXT: addis 4, 2, x@toc@ha -; PWR9-NEXT: addi 5, 5, a@toc@l ; PWR9-NEXT: addi 3, 3, y@toc@l ; PWR9-NEXT: addi 4, 4, x@toc@l -; PWR9-NEXT: lxv 34, 0(5) ; PWR9-NEXT: addi 3, 3, -16 ; PWR9-NEXT: addi 4, 4, -16 -; PWR9-NEXT: li 5, 0 ; PWR9-NEXT: .p2align 5 ; PWR9-NEXT: .LBB0_1: # %for.body ; PWR9-NEXT: # ; PWR9-NEXT: lxv 35, 16(4) -; PWR9-NEXT: addi 5, 5, 16 ; PWR9-NEXT: addi 4, 4, 16 -; PWR9-NEXT: cmpldi 5, 64 ; PWR9-NEXT: xsmulqp 3, 2, 3 ; PWR9-NEXT: stxv 35, 16(3) ; PWR9-NEXT: addi 3, 3, 16 -; PWR9-NEXT: bne 0, .LBB0_1 +; PWR9-NEXT: bdnz .LBB0_1 ; PWR9-NEXT: # %bb.2: # %for.end ; PWR9-NEXT: blr ; @@ -43,7 +42,7 @@ ; PWR8-NEXT: std 28, 80(1) # 8-byte Folded Spill ; PWR8-NEXT: std 29, 88(1) # 8-byte Folded Spill ; PWR8-NEXT: std 30, 96(1) # 8-byte Folded Spill -; PWR8-NEXT: li 30, 0 +; PWR8-NEXT: li 30, 4 ; PWR8-NEXT: addi 4, 4, x@toc@l ; PWR8-NEXT: li 29, 16 ; PWR8-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill @@ -68,12 +67,12 @@ ; PWR8-NEXT: bl __mulkf3 ; PWR8-NEXT: nop ; PWR8-NEXT: xxswapd 0, 34 -; PWR8-NEXT: addi 30, 30, 16 +; PWR8-NEXT: addi 30, 30, -1 ; PWR8-NEXT: mr 3, 26 -; PWR8-NEXT: cmpldi 30, 64 +; PWR8-NEXT: cmpldi 30, 0 ; PWR8-NEXT: stxvd2x 0, 28, 29 ; PWR8-NEXT: mr 28, 27 -; PWR8-NEXT: bne 0, .LBB0_1 +; PWR8-NEXT: bc 12, 1, .LBB0_1 ; PWR8-NEXT: # %bb.2: # %for.end ; PWR8-NEXT: li 3, 48 ; PWR8-NEXT: ld 30, 96(1) # 8-byte Folded Reload @@ -108,22 +107,21 @@ define void @fpext_ctrloop_fp128(ptr %a) nounwind { ; PWR9-LABEL: fpext_ctrloop_fp128: ; PWR9: # %bb.0: # %entry -; PWR9-NEXT: addis 4, 2, y@toc@ha +; PWR9-NEXT: li 4, 4 ; PWR9-NEXT: addi 3, 3, -8 +; PWR9-NEXT: mtctr 4 +; PWR9-NEXT: addis 4, 2, y@toc@ha ; PWR9-NEXT: addi 4, 4, y@toc@l -; PWR9-NEXT: addi 5, 4, -16 -; PWR9-NEXT: li 4, 0 +; PWR9-NEXT: addi 4, 4, -16 ; PWR9-NEXT: .p2align 5 ; PWR9-NEXT: .LBB1_1: # %for.body ; PWR9-NEXT: # ; PWR9-NEXT: lfdu 0, 8(3) -; PWR9-NEXT: addi 4, 4, 8 -; PWR9-NEXT: cmpldi 4, 32 ; PWR9-NEXT: xscpsgndp 34, 0, 0 ; PWR9-NEXT: xscvdpqp 2, 2 -; PWR9-NEXT: stxv 34, 16(5) -; PWR9-NEXT: addi 5, 5, 16 -; PWR9-NEXT: bne 0, .LBB1_1 +; PWR9-NEXT: stxv 34, 16(4) +; PWR9-NEXT: addi 4, 4, 16 +; PWR9-NEXT: bdnz .LBB1_1 ; PWR9-NEXT: # %bb.2: # %for.end ; PWR9-NEXT: blr ; @@ -136,22 +134,22 @@ ; PWR8-NEXT: stdu 1, -64(1) ; PWR8-NEXT: addis 4, 2, y@toc@ha ; PWR8-NEXT: addi 30, 3, -8 -; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: li 29, 4 ; PWR8-NEXT: std 0, 80(1) ; PWR8-NEXT: addi 4, 4, y@toc@l -; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: addi 28, 4, -16 ; PWR8-NEXT: .p2align 4 ; PWR8-NEXT: .LBB1_1: # %for.body ; PWR8-NEXT: # ; PWR8-NEXT: lfdu 1, 8(30) -; PWR8-NEXT: addi 29, 29, 16 +; PWR8-NEXT: addi 28, 28, 16 ; PWR8-NEXT: bl __extenddfkf2 ; PWR8-NEXT: nop ; PWR8-NEXT: xxswapd 0, 34 -; PWR8-NEXT: addi 28, 28, 8 -; PWR8-NEXT: cmpldi 28, 32 -; PWR8-NEXT: stxvd2x 0, 0, 29 -; PWR8-NEXT: bne 0, .LBB1_1 +; PWR8-NEXT: addi 29, 29, -1 +; PWR8-NEXT: cmpldi 29, 0 +; PWR8-NEXT: stxvd2x 0, 0, 28 +; PWR8-NEXT: bc 12, 1, .LBB1_1 ; PWR8-NEXT: # %bb.2: # %for.end ; PWR8-NEXT: addi 1, 1, 64 ; PWR8-NEXT: ld 0, 16(1) @@ -181,22 +179,21 @@ define void @fptrunc_ctrloop_fp128(ptr %a) nounwind { ; PWR9-LABEL: fptrunc_ctrloop_fp128: ; PWR9: # %bb.0: # %entry -; PWR9-NEXT: addis 4, 2, x@toc@ha +; PWR9-NEXT: li 4, 4 ; PWR9-NEXT: addi 3, 3, -8 -; PWR9-NEXT: li 5, 0 +; PWR9-NEXT: mtctr 4 +; PWR9-NEXT: addis 4, 2, x@toc@ha ; PWR9-NEXT: addi 4, 4, x@toc@l ; PWR9-NEXT: addi 4, 4, -16 ; PWR9-NEXT: .p2align 5 ; PWR9-NEXT: .LBB2_1: # %for.body ; PWR9-NEXT: # ; PWR9-NEXT: lxv 34, 16(4) -; PWR9-NEXT: addi 5, 5, 8 ; PWR9-NEXT: addi 4, 4, 16 -; PWR9-NEXT: cmpldi 5, 32 ; PWR9-NEXT: xscvqpdp 2, 2 ; PWR9-NEXT: xscpsgndp 0, 34, 34 ; PWR9-NEXT: stfdu 0, 8(3) -; PWR9-NEXT: bne 0, .LBB2_1 +; PWR9-NEXT: bdnz .LBB2_1 ; PWR9-NEXT: # %bb.2: # %for.end ; PWR9-NEXT: blr ; @@ -209,22 +206,22 @@ ; PWR8-NEXT: stdu 1, -64(1) ; PWR8-NEXT: addis 4, 2, x@toc@ha ; PWR8-NEXT: addi 30, 3, -8 -; PWR8-NEXT: li 28, 0 +; PWR8-NEXT: li 29, 4 ; PWR8-NEXT: std 0, 80(1) ; PWR8-NEXT: addi 4, 4, x@toc@l -; PWR8-NEXT: addi 29, 4, -16 +; PWR8-NEXT: addi 28, 4, -16 ; PWR8-NEXT: .p2align 4 ; PWR8-NEXT: .LBB2_1: # %for.body ; PWR8-NEXT: # -; PWR8-NEXT: addi 29, 29, 16 -; PWR8-NEXT: lxvd2x 0, 0, 29 +; PWR8-NEXT: addi 28, 28, 16 +; PWR8-NEXT: lxvd2x 0, 0, 28 ; PWR8-NEXT: xxswapd 34, 0 ; PWR8-NEXT: bl __trunckfdf2 ; PWR8-NEXT: nop -; PWR8-NEXT: addi 28, 28, 8 +; PWR8-NEXT: addi 29, 29, -1 ; PWR8-NEXT: stfdu 1, 8(30) -; PWR8-NEXT: cmpldi 28, 32 -; PWR8-NEXT: bne 0, .LBB2_1 +; PWR8-NEXT: cmpldi 29, 0 +; PWR8-NEXT: bc 12, 1, .LBB2_1 ; PWR8-NEXT: # %bb.2: # %for.end ; PWR8-NEXT: addi 1, 1, 64 ; PWR8-NEXT: ld 0, 16(1) diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll --- a/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll +++ b/llvm/test/CodeGen/PowerPC/ctrloop-sh.ll @@ -8,88 +8,87 @@ ; CHECK-LABEL: foo1: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: li 7, 0 ; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill +; CHECK-NEXT: li 6, 2048 ; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: mtctr 6 +; CHECK-NEXT: li 6, 0 ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 10, 12(5) -; CHECK-NEXT: addi 6, 6, -1 -; CHECK-NEXT: lwz 12, 12(4) -; CHECK-NEXT: lwz 11, 8(4) -; CHECK-NEXT: subfic 0, 10, 96 -; CHECK-NEXT: lwz 8, 4(4) -; CHECK-NEXT: addi 30, 10, -64 -; CHECK-NEXT: lwz 9, 0(4) -; CHECK-NEXT: cmplwi 10, 64 -; CHECK-NEXT: slw 25, 12, 10 -; CHECK-NEXT: addi 29, 10, -96 -; CHECK-NEXT: subfic 27, 10, 32 -; CHECK-NEXT: srw 0, 12, 0 -; CHECK-NEXT: slw 24, 11, 30 +; CHECK-NEXT: lwz 9, 12(5) +; CHECK-NEXT: lwz 10, 8(4) +; CHECK-NEXT: lwz 11, 12(4) +; CHECK-NEXT: subfic 12, 9, 96 +; CHECK-NEXT: lwz 7, 4(4) +; CHECK-NEXT: addi 0, 9, -64 +; CHECK-NEXT: lwz 8, 0(4) +; CHECK-NEXT: subfic 28, 9, 32 +; CHECK-NEXT: cmplwi 9, 64 +; CHECK-NEXT: slw 26, 11, 9 +; CHECK-NEXT: srw 12, 11, 12 +; CHECK-NEXT: slw 25, 10, 0 +; CHECK-NEXT: addi 30, 9, -96 +; CHECK-NEXT: slw 29, 8, 9 +; CHECK-NEXT: or 12, 25, 12 +; CHECK-NEXT: srw 25, 7, 28 ; CHECK-NEXT: bc 12, 0, .LBB0_3 ; CHECK-NEXT: # %bb.2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 25, 7, 0 +; CHECK-NEXT: ori 26, 6, 0 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: slw 28, 9, 10 -; CHECK-NEXT: or 0, 24, 0 -; CHECK-NEXT: srw 24, 8, 27 -; CHECK-NEXT: stw 25, 12(3) -; CHECK-NEXT: subfic 25, 10, 64 -; CHECK-NEXT: slw 29, 12, 29 -; CHECK-NEXT: slw 26, 11, 10 -; CHECK-NEXT: or 28, 28, 24 -; CHECK-NEXT: srw 24, 12, 27 -; CHECK-NEXT: or 29, 0, 29 -; CHECK-NEXT: subfic 0, 25, 32 -; CHECK-NEXT: or 26, 26, 24 -; CHECK-NEXT: addi 24, 10, -32 -; CHECK-NEXT: srw 27, 11, 27 -; CHECK-NEXT: slw 0, 11, 0 -; CHECK-NEXT: srw 11, 11, 25 -; CHECK-NEXT: srw 25, 12, 25 -; CHECK-NEXT: slw 30, 12, 30 -; CHECK-NEXT: slw 12, 12, 24 -; CHECK-NEXT: slw 24, 8, 24 -; CHECK-NEXT: or 0, 25, 0 -; CHECK-NEXT: or 28, 28, 24 -; CHECK-NEXT: cmplwi 1, 10, 0 -; CHECK-NEXT: slw 10, 8, 10 -; CHECK-NEXT: or 0, 0, 27 -; CHECK-NEXT: or 11, 28, 11 -; CHECK-NEXT: or 10, 10, 0 -; CHECK-NEXT: or 12, 26, 12 +; CHECK-NEXT: slw 27, 10, 9 +; CHECK-NEXT: or 29, 29, 25 +; CHECK-NEXT: srw 25, 11, 28 +; CHECK-NEXT: stw 26, 12(3) +; CHECK-NEXT: subfic 26, 9, 64 +; CHECK-NEXT: slw 30, 11, 30 +; CHECK-NEXT: or 27, 27, 25 +; CHECK-NEXT: addi 25, 9, -32 +; CHECK-NEXT: or 12, 12, 30 +; CHECK-NEXT: subfic 30, 26, 32 +; CHECK-NEXT: srw 28, 10, 28 +; CHECK-NEXT: slw 30, 10, 30 +; CHECK-NEXT: srw 10, 10, 26 +; CHECK-NEXT: srw 26, 11, 26 +; CHECK-NEXT: slw 24, 11, 0 +; CHECK-NEXT: slw 0, 7, 25 +; CHECK-NEXT: or 0, 29, 0 +; CHECK-NEXT: or 30, 26, 30 +; CHECK-NEXT: cmplwi 1, 9, 0 +; CHECK-NEXT: slw 9, 7, 9 +; CHECK-NEXT: or 10, 0, 10 +; CHECK-NEXT: or 0, 30, 28 +; CHECK-NEXT: slw 11, 11, 25 +; CHECK-NEXT: or 9, 9, 0 +; CHECK-NEXT: or 11, 27, 11 ; CHECK-NEXT: bc 12, 0, .LBB0_5 ; CHECK-NEXT: # %bb.4: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 11, 29, 0 -; CHECK-NEXT: ori 10, 30, 0 -; CHECK-NEXT: ori 12, 7, 0 +; CHECK-NEXT: ori 10, 12, 0 +; CHECK-NEXT: ori 9, 24, 0 +; CHECK-NEXT: ori 11, 6, 0 ; CHECK-NEXT: b .LBB0_5 ; CHECK-NEXT: .LBB0_5: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: cmplwi 6, 0 ; CHECK-NEXT: bc 12, 6, .LBB0_7 ; CHECK-NEXT: # %bb.6: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 9, 11, 0 ; CHECK-NEXT: ori 8, 10, 0 +; CHECK-NEXT: ori 7, 9, 0 ; CHECK-NEXT: b .LBB0_7 ; CHECK-NEXT: .LBB0_7: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: stw 12, 8(3) -; CHECK-NEXT: stw 9, 0(3) -; CHECK-NEXT: stw 8, 4(3) -; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: stw 11, 8(3) +; CHECK-NEXT: stw 8, 0(3) +; CHECK-NEXT: stw 7, 4(3) +; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.8: # %for.end ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload @@ -122,104 +121,105 @@ ; CHECK-LABEL: foo2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: stw 23, 12(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill +; CHECK-NEXT: li 6, 2048 ; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: mtctr 6 ; CHECK-NEXT: .LBB1_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 9, 12(5) -; CHECK-NEXT: addi 6, 6, -1 -; CHECK-NEXT: lwz 10, 4(4) -; CHECK-NEXT: lwz 11, 0(4) -; CHECK-NEXT: subfic 12, 9, 96 -; CHECK-NEXT: lwz 7, 8(4) -; CHECK-NEXT: addi 0, 9, -64 -; CHECK-NEXT: lwz 8, 12(4) -; CHECK-NEXT: subfic 28, 9, 32 -; CHECK-NEXT: slw 12, 11, 12 -; CHECK-NEXT: srw 24, 10, 0 -; CHECK-NEXT: srw 29, 8, 9 -; CHECK-NEXT: or 12, 24, 12 -; CHECK-NEXT: slw 24, 7, 28 -; CHECK-NEXT: srw 26, 10, 9 -; CHECK-NEXT: or 29, 29, 24 -; CHECK-NEXT: slw 24, 11, 28 -; CHECK-NEXT: cmplwi 9, 64 -; CHECK-NEXT: srawi 25, 11, 31 -; CHECK-NEXT: or 26, 26, 24 -; CHECK-NEXT: sraw 24, 11, 9 -; CHECK-NEXT: addi 30, 9, -96 +; CHECK-NEXT: lwz 8, 12(5) +; CHECK-NEXT: lwz 9, 4(4) +; CHECK-NEXT: lwz 10, 0(4) +; CHECK-NEXT: subfic 11, 8, 96 +; CHECK-NEXT: lwz 6, 8(4) +; CHECK-NEXT: addi 12, 8, -64 +; CHECK-NEXT: lwz 7, 12(4) +; CHECK-NEXT: subfic 29, 8, 32 +; CHECK-NEXT: slw 11, 10, 11 +; CHECK-NEXT: srw 25, 9, 12 +; CHECK-NEXT: srw 30, 7, 8 +; CHECK-NEXT: or 11, 25, 11 +; CHECK-NEXT: slw 25, 6, 29 +; CHECK-NEXT: srw 27, 9, 8 +; CHECK-NEXT: or 30, 30, 25 +; CHECK-NEXT: slw 25, 10, 29 +; CHECK-NEXT: addi 0, 8, -96 +; CHECK-NEXT: cmplwi 8, 64 +; CHECK-NEXT: srawi 26, 10, 31 +; CHECK-NEXT: or 27, 27, 25 +; CHECK-NEXT: sraw 25, 10, 8 +; CHECK-NEXT: cmpwi 1, 0, 1 +; CHECK-NEXT: sraw 24, 10, 0 ; CHECK-NEXT: bc 12, 0, .LBB1_3 ; CHECK-NEXT: # %bb.2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 24, 25, 0 -; CHECK-NEXT: b .LBB1_3 +; CHECK-NEXT: ori 0, 26, 0 +; CHECK-NEXT: b .LBB1_4 ; CHECK-NEXT: .LBB1_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: cmpwi 1, 30, 1 -; CHECK-NEXT: sraw 30, 11, 30 -; CHECK-NEXT: stw 24, 0(3) -; CHECK-NEXT: subfic 24, 9, 64 -; CHECK-NEXT: addi 27, 9, -32 -; CHECK-NEXT: bc 12, 4, .LBB1_5 -; CHECK-NEXT: # %bb.4: # %for.body +; CHECK-NEXT: addi 0, 25, 0 +; CHECK-NEXT: .LBB1_4: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 12, 30, 0 -; CHECK-NEXT: b .LBB1_5 -; CHECK-NEXT: .LBB1_5: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: subfic 30, 24, 32 -; CHECK-NEXT: slw 28, 10, 28 -; CHECK-NEXT: srw 30, 10, 30 -; CHECK-NEXT: slw 10, 10, 24 -; CHECK-NEXT: slw 24, 11, 24 -; CHECK-NEXT: sraw 23, 11, 0 -; CHECK-NEXT: srw 0, 7, 27 -; CHECK-NEXT: sraw 11, 11, 27 -; CHECK-NEXT: cmpwi 1, 27, 1 -; CHECK-NEXT: or 0, 29, 0 -; CHECK-NEXT: or 30, 24, 30 +; CHECK-NEXT: addi 28, 8, -32 +; CHECK-NEXT: stw 0, 0(3) +; CHECK-NEXT: subfic 0, 8, 64 +; CHECK-NEXT: subfic 25, 0, 32 +; CHECK-NEXT: slw 29, 9, 29 +; CHECK-NEXT: srw 25, 9, 25 +; CHECK-NEXT: slw 9, 9, 0 +; CHECK-NEXT: slw 0, 10, 0 ; CHECK-NEXT: bc 12, 4, .LBB1_6 -; CHECK-NEXT: b .LBB1_7 +; CHECK-NEXT: # %bb.5: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: ori 11, 24, 0 +; CHECK-NEXT: b .LBB1_6 ; CHECK-NEXT: .LBB1_6: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: addi 11, 26, 0 +; CHECK-NEXT: sraw 12, 10, 12 +; CHECK-NEXT: sraw 10, 10, 28 +; CHECK-NEXT: cmpwi 1, 28, 1 +; CHECK-NEXT: srw 28, 6, 28 +; CHECK-NEXT: or 0, 0, 25 +; CHECK-NEXT: or 30, 30, 28 +; CHECK-NEXT: bc 12, 4, .LBB1_7 +; CHECK-NEXT: b .LBB1_8 ; CHECK-NEXT: .LBB1_7: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: cmplwi 1, 9, 0 -; CHECK-NEXT: srw 9, 7, 9 -; CHECK-NEXT: or 10, 0, 10 -; CHECK-NEXT: or 0, 30, 28 -; CHECK-NEXT: or 9, 9, 0 -; CHECK-NEXT: bc 12, 0, .LBB1_9 -; CHECK-NEXT: # %bb.8: # %for.body +; CHECK-NEXT: addi 10, 27, 0 +; CHECK-NEXT: .LBB1_8: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 10, 12, 0 -; CHECK-NEXT: ori 9, 23, 0 -; CHECK-NEXT: ori 11, 25, 0 -; CHECK-NEXT: b .LBB1_9 -; CHECK-NEXT: .LBB1_9: # %for.body +; CHECK-NEXT: cmplwi 1, 8, 0 +; CHECK-NEXT: srw 8, 6, 8 +; CHECK-NEXT: or 0, 0, 29 +; CHECK-NEXT: or 9, 30, 9 +; CHECK-NEXT: or 8, 8, 0 +; CHECK-NEXT: bc 12, 0, .LBB1_10 +; CHECK-NEXT: # %bb.9: # %for.body +; CHECK-NEXT: # +; CHECK-NEXT: ori 9, 11, 0 +; CHECK-NEXT: ori 8, 12, 0 +; CHECK-NEXT: ori 10, 26, 0 +; CHECK-NEXT: b .LBB1_10 +; CHECK-NEXT: .LBB1_10: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: cmplwi 6, 0 -; CHECK-NEXT: bc 12, 6, .LBB1_11 -; CHECK-NEXT: # %bb.10: # %for.body +; CHECK-NEXT: bc 12, 6, .LBB1_12 +; CHECK-NEXT: # %bb.11: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 8, 10, 0 ; CHECK-NEXT: ori 7, 9, 0 -; CHECK-NEXT: b .LBB1_11 -; CHECK-NEXT: .LBB1_11: # %for.body +; CHECK-NEXT: ori 6, 8, 0 +; CHECK-NEXT: b .LBB1_12 +; CHECK-NEXT: .LBB1_12: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: stw 11, 4(3) -; CHECK-NEXT: stw 8, 12(3) -; CHECK-NEXT: stw 7, 8(3) -; CHECK-NEXT: bne 0, .LBB1_1 -; CHECK-NEXT: # %bb.12: # %for.end +; CHECK-NEXT: stw 10, 4(3) +; CHECK-NEXT: stw 7, 12(3) +; CHECK-NEXT: stw 6, 8(3) +; CHECK-NEXT: bdnz .LBB1_1 +; CHECK-NEXT: # %bb.13: # %for.end ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload @@ -227,7 +227,6 @@ ; CHECK-NEXT: lwz 26, 24(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 25, 20(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 24, 16(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 23, 12(1) # 4-byte Folded Reload ; CHECK-NEXT: addi 1, 1, 48 ; CHECK-NEXT: blr entry: @@ -252,88 +251,87 @@ ; CHECK-LABEL: foo3: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: li 6, 2048 -; CHECK-NEXT: li 7, 0 ; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill +; CHECK-NEXT: li 6, 2048 ; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill ; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: mtctr 6 +; CHECK-NEXT: li 6, 0 ; CHECK-NEXT: .LBB2_1: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lwz 10, 12(5) -; CHECK-NEXT: addi 6, 6, -1 -; CHECK-NEXT: lwz 12, 0(4) -; CHECK-NEXT: lwz 11, 4(4) -; CHECK-NEXT: subfic 0, 10, 96 -; CHECK-NEXT: lwz 8, 8(4) -; CHECK-NEXT: addi 30, 10, -64 -; CHECK-NEXT: lwz 9, 12(4) -; CHECK-NEXT: cmplwi 10, 64 -; CHECK-NEXT: srw 25, 12, 10 -; CHECK-NEXT: addi 29, 10, -96 -; CHECK-NEXT: subfic 27, 10, 32 -; CHECK-NEXT: slw 0, 12, 0 -; CHECK-NEXT: srw 24, 11, 30 +; CHECK-NEXT: lwz 9, 12(5) +; CHECK-NEXT: lwz 10, 4(4) +; CHECK-NEXT: lwz 11, 0(4) +; CHECK-NEXT: subfic 12, 9, 96 +; CHECK-NEXT: lwz 7, 8(4) +; CHECK-NEXT: addi 0, 9, -64 +; CHECK-NEXT: lwz 8, 12(4) +; CHECK-NEXT: subfic 28, 9, 32 +; CHECK-NEXT: cmplwi 9, 64 +; CHECK-NEXT: srw 26, 11, 9 +; CHECK-NEXT: slw 12, 11, 12 +; CHECK-NEXT: srw 25, 10, 0 +; CHECK-NEXT: addi 30, 9, -96 +; CHECK-NEXT: srw 29, 8, 9 +; CHECK-NEXT: or 12, 25, 12 +; CHECK-NEXT: slw 25, 7, 28 ; CHECK-NEXT: bc 12, 0, .LBB2_3 ; CHECK-NEXT: # %bb.2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 25, 7, 0 +; CHECK-NEXT: ori 26, 6, 0 ; CHECK-NEXT: b .LBB2_3 ; CHECK-NEXT: .LBB2_3: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: srw 28, 9, 10 -; CHECK-NEXT: or 0, 24, 0 -; CHECK-NEXT: slw 24, 8, 27 -; CHECK-NEXT: stw 25, 0(3) -; CHECK-NEXT: subfic 25, 10, 64 -; CHECK-NEXT: srw 29, 12, 29 -; CHECK-NEXT: srw 26, 11, 10 -; CHECK-NEXT: or 28, 28, 24 -; CHECK-NEXT: slw 24, 12, 27 -; CHECK-NEXT: or 29, 0, 29 -; CHECK-NEXT: subfic 0, 25, 32 -; CHECK-NEXT: or 26, 26, 24 -; CHECK-NEXT: addi 24, 10, -32 -; CHECK-NEXT: slw 27, 11, 27 -; CHECK-NEXT: srw 0, 11, 0 -; CHECK-NEXT: slw 11, 11, 25 -; CHECK-NEXT: slw 25, 12, 25 -; CHECK-NEXT: srw 30, 12, 30 -; CHECK-NEXT: srw 12, 12, 24 -; CHECK-NEXT: srw 24, 8, 24 -; CHECK-NEXT: or 0, 25, 0 -; CHECK-NEXT: or 28, 28, 24 -; CHECK-NEXT: cmplwi 1, 10, 0 -; CHECK-NEXT: srw 10, 8, 10 -; CHECK-NEXT: or 0, 0, 27 -; CHECK-NEXT: or 11, 28, 11 -; CHECK-NEXT: or 10, 10, 0 -; CHECK-NEXT: or 12, 26, 12 +; CHECK-NEXT: srw 27, 10, 9 +; CHECK-NEXT: or 29, 29, 25 +; CHECK-NEXT: slw 25, 11, 28 +; CHECK-NEXT: stw 26, 0(3) +; CHECK-NEXT: subfic 26, 9, 64 +; CHECK-NEXT: srw 30, 11, 30 +; CHECK-NEXT: or 27, 27, 25 +; CHECK-NEXT: addi 25, 9, -32 +; CHECK-NEXT: or 12, 12, 30 +; CHECK-NEXT: subfic 30, 26, 32 +; CHECK-NEXT: slw 28, 10, 28 +; CHECK-NEXT: srw 30, 10, 30 +; CHECK-NEXT: slw 10, 10, 26 +; CHECK-NEXT: slw 26, 11, 26 +; CHECK-NEXT: srw 24, 11, 0 +; CHECK-NEXT: srw 0, 7, 25 +; CHECK-NEXT: or 0, 29, 0 +; CHECK-NEXT: or 30, 26, 30 +; CHECK-NEXT: cmplwi 1, 9, 0 +; CHECK-NEXT: srw 9, 7, 9 +; CHECK-NEXT: or 10, 0, 10 +; CHECK-NEXT: or 0, 30, 28 +; CHECK-NEXT: srw 11, 11, 25 +; CHECK-NEXT: or 9, 9, 0 +; CHECK-NEXT: or 11, 27, 11 ; CHECK-NEXT: bc 12, 0, .LBB2_5 ; CHECK-NEXT: # %bb.4: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 11, 29, 0 -; CHECK-NEXT: ori 10, 30, 0 -; CHECK-NEXT: ori 12, 7, 0 +; CHECK-NEXT: ori 10, 12, 0 +; CHECK-NEXT: ori 9, 24, 0 +; CHECK-NEXT: ori 11, 6, 0 ; CHECK-NEXT: b .LBB2_5 ; CHECK-NEXT: .LBB2_5: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: cmplwi 6, 0 ; CHECK-NEXT: bc 12, 6, .LBB2_7 ; CHECK-NEXT: # %bb.6: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: ori 9, 11, 0 ; CHECK-NEXT: ori 8, 10, 0 +; CHECK-NEXT: ori 7, 9, 0 ; CHECK-NEXT: b .LBB2_7 ; CHECK-NEXT: .LBB2_7: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: stw 12, 4(3) -; CHECK-NEXT: stw 9, 12(3) -; CHECK-NEXT: stw 8, 8(3) -; CHECK-NEXT: bne 0, .LBB2_1 +; CHECK-NEXT: stw 11, 4(3) +; CHECK-NEXT: stw 8, 12(3) +; CHECK-NEXT: stw 7, 8(3) +; CHECK-NEXT: bdnz .LBB2_1 ; CHECK-NEXT: # %bb.8: # %for.end ; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload ; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll --- a/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll +++ b/llvm/test/CodeGen/PowerPC/disable-ctr-ppcf128.ll @@ -1,16 +1,46 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE ; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE ; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \ ; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2 define ppc_fp128 @test_ctr0() { +; P9LE-LABEL: test_ctr0: +; P9LE: # %bb.0: # %bb +; P9LE-NEXT: mflr r0 +; P9LE-NEXT: .cfi_def_cfa_offset 48 +; P9LE-NEXT: .cfi_offset lr, 16 +; P9LE-NEXT: .cfi_offset r30, -16 +; P9LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P9LE-NEXT: stdu r1, -48(r1) +; P9LE-NEXT: li r3, 1 +; P9LE-NEXT: xxlxor f1, f1, f1 +; P9LE-NEXT: xxlxor f2, f2, f2 +; P9LE-NEXT: std r0, 64(r1) +; P9LE-NEXT: rldic r30, r3, 62, 1 +; P9LE-NEXT: .p2align 5 +; P9LE-NEXT: .LBB0_1: # %bb6 +; P9LE-NEXT: # +; P9LE-NEXT: xxlxor f3, f3, f3 +; P9LE-NEXT: xxlxor f4, f4, f4 +; P9LE-NEXT: bl __gcc_qadd +; P9LE-NEXT: nop +; P9LE-NEXT: addi r30, r30, -1 +; P9LE-NEXT: cmpldi r30, 0 +; P9LE-NEXT: bc 12, gt, .LBB0_1 +; P9LE-NEXT: # %bb.2: # %bb14 +; P9LE-NEXT: addi r1, r1, 48 +; P9LE-NEXT: ld r0, 16(r1) +; P9LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P9LE-NEXT: mtlr r0 +; P9LE-NEXT: blr +; ; P9BE-LABEL: test_ctr0: ; P9BE: # %bb.0: # %bb ; P9BE-NEXT: mflr r0 @@ -19,9 +49,10 @@ ; P9BE-NEXT: .cfi_def_cfa_offset 128 ; P9BE-NEXT: .cfi_offset lr, 16 ; P9BE-NEXT: .cfi_offset r30, -16 +; P9BE-NEXT: li r3, 1 ; P9BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill ; P9BE-NEXT: xxlxor f1, f1, f1 -; P9BE-NEXT: li r30, 0 +; P9BE-NEXT: rldic r30, r3, 62, 1 ; P9BE-NEXT: xxlxor f2, f2, f2 ; P9BE-NEXT: .p2align 5 ; P9BE-NEXT: .LBB0_1: # %bb6 @@ -30,9 +61,9 @@ ; P9BE-NEXT: xxlxor f4, f4, f4 ; P9BE-NEXT: bl __gcc_qadd ; P9BE-NEXT: nop -; P9BE-NEXT: addi r30, r30, 4 +; P9BE-NEXT: addi r30, r30, -1 ; P9BE-NEXT: cmpldi r30, 0 -; P9BE-NEXT: bne cr0, .LBB0_1 +; P9BE-NEXT: bc 12, gt, .LBB0_1 ; P9BE-NEXT: # %bb.2: # %bb14 ; P9BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload ; P9BE-NEXT: addi r1, r1, 128 @@ -40,6 +71,36 @@ ; P9BE-NEXT: mtlr r0 ; P9BE-NEXT: blr ; +; P8LE-LABEL: test_ctr0: +; P8LE: # %bb.0: # %bb +; P8LE-NEXT: mflr r0 +; P8LE-NEXT: .cfi_def_cfa_offset 48 +; P8LE-NEXT: .cfi_offset lr, 16 +; P8LE-NEXT: .cfi_offset r30, -16 +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; P8LE-NEXT: stdu r1, -48(r1) +; P8LE-NEXT: xxlxor f1, f1, f1 +; P8LE-NEXT: li r3, 1 +; P8LE-NEXT: std r0, 64(r1) +; P8LE-NEXT: xxlxor f2, f2, f2 +; P8LE-NEXT: rldic r30, r3, 62, 1 +; P8LE-NEXT: .p2align 5 +; P8LE-NEXT: .LBB0_1: # %bb6 +; P8LE-NEXT: # +; P8LE-NEXT: xxlxor f3, f3, f3 +; P8LE-NEXT: xxlxor f4, f4, f4 +; P8LE-NEXT: bl __gcc_qadd +; P8LE-NEXT: nop +; P8LE-NEXT: addi r30, r30, -1 +; P8LE-NEXT: cmpldi r30, 0 +; P8LE-NEXT: bc 12, gt, .LBB0_1 +; P8LE-NEXT: # %bb.2: # %bb14 +; P8LE-NEXT: addi r1, r1, 48 +; P8LE-NEXT: ld r0, 16(r1) +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: mtlr r0 +; P8LE-NEXT: blr +; ; P8BE-LABEL: test_ctr0: ; P8BE: # %bb.0: # %bb ; P8BE-NEXT: mflr r0 @@ -49,9 +110,10 @@ ; P8BE-NEXT: .cfi_offset lr, 16 ; P8BE-NEXT: .cfi_offset r30, -16 ; P8BE-NEXT: xxlxor f1, f1, f1 +; P8BE-NEXT: li r3, 1 ; P8BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; P8BE-NEXT: li r30, 0 ; P8BE-NEXT: xxlxor f2, f2, f2 +; P8BE-NEXT: rldic r30, r3, 62, 1 ; P8BE-NEXT: .p2align 5 ; P8BE-NEXT: .LBB0_1: # %bb6 ; P8BE-NEXT: # @@ -59,9 +121,9 @@ ; P8BE-NEXT: xxlxor f4, f4, f4 ; P8BE-NEXT: bl __gcc_qadd ; P8BE-NEXT: nop -; P8BE-NEXT: addi r30, r30, 4 +; P8BE-NEXT: addi r30, r30, -1 ; P8BE-NEXT: cmpldi r30, 0 -; P8BE-NEXT: bne cr0, .LBB0_1 +; P8BE-NEXT: bc 12, gt, .LBB0_1 ; P8BE-NEXT: # %bb.2: # %bb14 ; P8BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload ; P8BE-NEXT: addi r1, r1, 128 @@ -82,5 +144,3 @@ bb14: ; preds = %bb6 ret ppc_fp128 %i8 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; LE: {{.*}} diff --git a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll --- a/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll +++ b/llvm/test/CodeGen/PowerPC/hardware-loops-crash.ll @@ -18,12 +18,15 @@ ; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: ; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond() -; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]] +; CHECK: while.cond25.preheader: +; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 51) +; CHECK-NEXT: br label [[WHILE_COND25:%.*]] ; CHECK: while.cond25: -; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ], [ 0, [[FOR_INC]] ] -; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[FOR_INC]] ] -; CHECK-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[INDVARS_IV349]], 0 -; CHECK-NEXT: br i1 [[CMP26_NOT]], label [[WHILE_END187:%.*]], label [[LAND_RHS]] +; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ] +; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1) +; CHECK-NEXT: br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]] ; CHECK: land.rhs: ; CHECK-NEXT: [[INDVARS_IV_NEXT350]] = add nsw i64 [[INDVARS_IV349]], -1 ; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond() @@ -34,13 +37,13 @@ ; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond() ; CHECK-NEXT: br i1 [[C_2]], label [[WHILE_END187]], label [[WHILE_COND35_PREHEADER:%.*]] ; CHECK: while.cond35.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR_LCSSA1]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 51 -; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 [[TMP1]]) +; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVAR_LCSSA1]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 51 +; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 [[TMP2]]) ; CHECK-NEXT: br label [[WHILE_COND35:%.*]] ; CHECK: while.cond35: -; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1) -; CHECK-NEXT: br i1 [[TMP2]], label [[LAND_RHS37:%.*]], label [[IF_END51:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1) +; CHECK-NEXT: br i1 [[TMP3]], label [[LAND_RHS37:%.*]], label [[IF_END51:%.*]] ; CHECK: land.rhs37: ; CHECK-NEXT: br label [[WHILE_COND35]] ; CHECK: if.end51: diff --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll --- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll @@ -99,10 +99,13 @@ ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body ; CHECK: bl {{.*}}something ; -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-DAG: cmplwi [[IV]], 0 -; CHECK-NEXT: bne 0, {{.*}}[[LOOP]] +; CHECK-64-DAG: addi [[IV]], [[IV]], -1 +; CHECK-64-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-64-DAG: cmpldi [[IV]], 0 +; CHECK-32-DAG: addi [[IV]], [[IV]], -1 +; CHECK-32-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-32-DAG: cmplwi [[IV]], 0 +; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]] ; ; Next BB. ; CHECK: slwi 3, [[SUM]], 3 @@ -171,11 +174,14 @@ ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body ; CHECK: bl {{.*}}something ; -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-DAG: cmplwi [[IV]], 0 +; CHECK-64-DAG: addi [[IV]], [[IV]], -1 +; CHECK-64-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-64-DAG: cmpldi [[IV]], 0 +; CHECK-32-DAG: addi [[IV]], [[IV]], -1 +; CHECK-32-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-32-DAG: cmplwi [[IV]], 0 ; -; CHECK-NEXT: bne 0, {{.*}}[[LOOP]] +; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]] ; ; Next BB ; CHECK: %for.exit @@ -240,11 +246,14 @@ ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body ; CHECK: bl {{.*}}something ; -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-DAG: cmplwi [[IV]], 0 +; CHECK-64-DAG: addi [[IV]], [[IV]], -1 +; CHECK-64-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-64-DAG: cmpldi [[IV]], 0 +; CHECK-32-DAG: addi [[IV]], [[IV]], -1 +; CHECK-32-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-32-DAG: cmplwi [[IV]], 0 ; -; CHECK-NEXT: bne 0, {{.*}}[[LOOP]] +; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]] ; ; Next BB ; CHECK: bl {{.*}}somethingElse @@ -336,11 +345,14 @@ ; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body ; CHECK: bl {{.*}}something ; -; CHECK-DAG: addi [[IV]], [[IV]], -1 -; CHECK-DAG: add [[SUM]], 3, [[SUM]] -; CHECK-DAG: cmplwi [[IV]], 0 +; CHECK-64-DAG: addi [[IV]], [[IV]], -1 +; CHECK-64-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-64-DAG: cmpldi [[IV]], 0 +; CHECK-32-DAG: addi [[IV]], [[IV]], -1 +; CHECK-32-DAG: add [[SUM]], 3, [[SUM]] +; CHECK-32-DAG: cmplwi [[IV]], 0 ; -; CHECK-NEXT: bne 0, {{.*}}[[LOOP]] +; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]] ; ; Next BB. ; CHECK: slwi 3, [[SUM]], 3 diff --git a/llvm/test/CodeGen/PowerPC/pr36292.ll b/llvm/test/CodeGen/PowerPC/pr36292.ll --- a/llvm/test/CodeGen/PowerPC/pr36292.ll +++ b/llvm/test/CodeGen/PowerPC/pr36292.ll @@ -12,12 +12,19 @@ ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: stdu 1, -64(1) ; CHECK-NEXT: std 0, 80(1) -; CHECK-NEXT: ld 29, 0(3) +; CHECK-NEXT: ld 3, 0(3) ; CHECK-NEXT: ld 30, 32(1) -; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: bge- 0, .LBB0_2 -; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_1: # %bounds.ok +; CHECK-NEXT: sub 4, 3, 30 +; CHECK-NEXT: cmpld 4, 3 +; CHECK-NEXT: iselgt 3, 0, 4 +; CHECK-NEXT: addi 29, 3, 1 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: # %forcond +; CHECK-NEXT: # +; CHECK-NEXT: addi 29, 29, -1 +; CHECK-NEXT: cmpldi 29, 0 +; CHECK-NEXT: bc 4, 1, .LBB0_3 +; CHECK-NEXT: # %bb.2: # %bounds.ok ; CHECK-NEXT: # ; CHECK-NEXT: lfs 2, 0(3) ; CHECK-NEXT: xxlxor 1, 1, 1 @@ -25,9 +32,8 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: addi 30, 30, 1 ; CHECK-NEXT: stfs 1, 0(3) -; CHECK-NEXT: cmpld 30, 29 -; CHECK-NEXT: blt+ 0, .LBB0_1 -; CHECK-NEXT: .LBB0_2: # %bounds.fail +; CHECK-NEXT: b .LBB0_1 +; CHECK-NEXT: .LBB0_3: # %bounds.fail ; CHECK-NEXT: std 30, 32(1) %pos = alloca i64, align 8 br label %forcond diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -28,7 +28,7 @@ ; CHECK-NEXT: nop ; CHECK-NEXT: addi r30, r30, -1 ; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: bne cr0, .LBB0_3 +; CHECK-NEXT: bc 12, gt, .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb15 ; CHECK-NEXT: stb r3, 0(r3) ; CHECK-NEXT: addi r1, r1, 64 diff --git a/llvm/test/CodeGen/PowerPC/pr48519.ll b/llvm/test/CodeGen/PowerPC/pr48519.ll --- a/llvm/test/CodeGen/PowerPC/pr48519.ll +++ b/llvm/test/CodeGen/PowerPC/pr48519.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -48(r1) -; CHECK-NEXT: li r3, 1 ; CHECK-NEXT: li r30, 0 +; CHECK-NEXT: li r3, 1 ; CHECK-NEXT: std r0, 64(r1) ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb3 @@ -28,7 +28,7 @@ ; CHECK-NEXT: addi r30, r30, -1 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: bne+ cr0, .LBB0_1 +; CHECK-NEXT: bc 12, gt, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %bb11 ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop @@ -91,7 +91,7 @@ ; CHECK-NEXT: # ; CHECK-NEXT: addi r30, r30, -1 ; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: beq cr0, .LBB1_3 +; CHECK-NEXT: bc 4, gt, .LBB1_3 ; CHECK-NEXT: # %bb.2: # %bb3 ; CHECK-NEXT: # ; CHECK-NEXT: lhz r3, 0(0) @@ -156,26 +156,28 @@ ; CHECK-NEXT: ld r3, 0(r3) ; CHECK-NEXT: cmpdi r3, 0 ; CHECK-NEXT: crnot 4*cr2+lt, eq -; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_3 +; CHECK-NEXT: b .LBB2_2 ; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: .LBB2_1: # %bb4 +; CHECK-NEXT: .LBB2_1: # %bb10 +; CHECK-NEXT: # +; CHECK-NEXT: addi r30, r30, -1 +; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: bc 4, gt, .LBB2_5 +; CHECK-NEXT: .LBB2_2: # %bb2 +; CHECK-NEXT: # +; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_1 +; CHECK-NEXT: # %bb.3: # %bb4 +; CHECK-NEXT: # ; CHECK-NEXT: lhz r3, 0(r3) ; CHECK-NEXT: bl __gnu_h2f_ieee ; CHECK-NEXT: nop ; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB2_6 -; CHECK-NEXT: # %bb.2: # %bb8 +; CHECK-NEXT: # %bb.4: # %bb8 +; CHECK-NEXT: # ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop ; CHECK-NEXT: sth r3, 0(0) -; CHECK-NEXT: .LBB2_3: # %bb10 -; CHECK-NEXT: # -; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: beq cr0, .LBB2_5 -; CHECK-NEXT: # %bb.4: # %bb12 -; CHECK-NEXT: # -; CHECK-NEXT: addi r30, r30, 1 -; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB2_1 -; CHECK-NEXT: b .LBB2_3 +; CHECK-NEXT: b .LBB2_1 ; CHECK-NEXT: .LBB2_5: # %bb14 ; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 48 @@ -263,20 +265,21 @@ ; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -64(r1) ; CHECK-NEXT: fmr f31, f1 -; CHECK-NEXT: li r30, 0 +; CHECK-NEXT: li r3, 1 ; CHECK-NEXT: li r29, 0 ; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: rldic r30, r3, 62, 1 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB3_1: # %bb1 ; CHECK-NEXT: # ; CHECK-NEXT: fmr f1, f31 ; CHECK-NEXT: bl __gnu_f2h_ieee ; CHECK-NEXT: nop -; CHECK-NEXT: addi r29, r29, -12 -; CHECK-NEXT: sth r3, 0(r30) -; CHECK-NEXT: addi r30, r30, 24 -; CHECK-NEXT: cmpldi r29, 0 -; CHECK-NEXT: bne+ cr0, .LBB3_1 +; CHECK-NEXT: addi r30, r30, -1 +; CHECK-NEXT: sth r3, 0(r29) +; CHECK-NEXT: addi r29, r29, 24 +; CHECK-NEXT: cmpldi r30, 0 +; CHECK-NEXT: bc 12, gt, .LBB3_1 ; CHECK-NEXT: # %bb.2: # %bb5 ; ; CHECK-P9-LABEL: func_48785: diff --git a/llvm/test/CodeGen/PowerPC/pr55463.ll b/llvm/test/CodeGen/PowerPC/pr55463.ll --- a/llvm/test/CodeGen/PowerPC/pr55463.ll +++ b/llvm/test/CodeGen/PowerPC/pr55463.ll @@ -6,14 +6,12 @@ ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: mflr 0 ; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: # implicit-def: $r3 ; CHECK-NEXT: stw 0, 20(1) +; CHECK-NEXT: mtctr 3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: # %bb1 ; CHECK-NEXT: # -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: cmplwi 3, 0 -; CHECK-NEXT: bne 0, .LBB0_1 +; CHECK-NEXT: bdnz .LBB0_1 ; CHECK-NEXT: # %bb.2: # %bb8 ; CHECK-NEXT: bl wibble ; CHECK-NEXT: lwz 0, 20(1) @@ -70,10 +68,10 @@ ; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7 ; CHECK-NEXT: bl fma ; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: addi 28, 28, 1 +; CHECK-NEXT: addi 28, 28, -1 ; CHECK-NEXT: cmplwi 28, 0 ; CHECK-NEXT: efdctsiz 3, 3 -; CHECK-NEXT: bne 0, .LBB1_1 +; CHECK-NEXT: bc 12, 1, .LBB1_1 ; CHECK-NEXT: # %bb.2: # %bb8 ; CHECK-NEXT: bl wibble ; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/PowerPC/spe.ll b/llvm/test/CodeGen/PowerPC/spe.ll --- a/llvm/test/CodeGen/PowerPC/spe.ll +++ b/llvm/test/CodeGen/PowerPC/spe.ll @@ -1793,10 +1793,11 @@ ; CHECK-NEXT: efscfsi 3, 29 ; CHECK-NEXT: mr 4, 3 ; CHECK-NEXT: bl fmaf -; CHECK-NEXT: addi 29, 29, 1 -; CHECK-NEXT: cmplw 30, 29 +; CHECK-NEXT: addi 30, 30, -1 ; CHECK-NEXT: mr 5, 3 -; CHECK-NEXT: bne 0, .LBB56_2 +; CHECK-NEXT: cmplwi 30, 0 +; CHECK-NEXT: addi 29, 29, 1 +; CHECK-NEXT: bc 12, 1, .LBB56_2 ; CHECK-NEXT: b .LBB56_4 ; CHECK-NEXT: .LBB56_3: ; CHECK-NEXT: # implicit-def: $r5 diff --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll --- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll +++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll @@ -14,31 +14,34 @@ ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: mr r29, r5 ; CHECK-NEXT: mr r30, r4 -; CHECK-NEXT: mr r28, r3 +; CHECK-NEXT: cmpwi r5, 1 +; CHECK-NEXT: mr r29, r3 ; CHECK-NEXT: std r2, 24(r1) ; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: cmpwi r29, 1 -; CHECK-NEXT: bc 12, lt, .LBB0_3 +; CHECK-NEXT: bc 12, lt, .LBB0_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: cmpwi r30, 11 -; CHECK-NEXT: bc 12, lt, .LBB0_3 +; CHECK-NEXT: bc 12, lt, .LBB0_4 +; CHECK-NEXT: # %bb.2: # %for.body.us.preheader +; CHECK-NEXT: addi r3, r5, -1 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: addi r28, r3, 1 ; CHECK-NEXT: .p2align 5 -; CHECK-NEXT: .LBB0_2: # %for.body.us +; CHECK-NEXT: .LBB0_3: # %for.body.us ; CHECK-NEXT: # -; CHECK-NEXT: mtctr r28 +; CHECK-NEXT: mtctr r29 ; CHECK-NEXT: mr r3, r30 -; CHECK-NEXT: mr r12, r28 +; CHECK-NEXT: mr r12, r29 ; CHECK-NEXT: bctrl ; CHECK-NEXT: ld 2, 24(r1) -; CHECK-NEXT: addi r29, r29, -1 -; CHECK-NEXT: cmplwi r29, 0 -; CHECK-NEXT: bne cr0, .LBB0_2 -; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup -; CHECK-NEXT: mtctr r28 +; CHECK-NEXT: addi r28, r28, -1 +; CHECK-NEXT: cmpldi r28, 0 +; CHECK-NEXT: bc 12, gt, .LBB0_3 +; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup +; CHECK-NEXT: mtctr r29 ; CHECK-NEXT: mr r3, r30 -; CHECK-NEXT: mr r12, r28 +; CHECK-NEXT: mr r12, r29 ; CHECK-NEXT: bctrl ; CHECK-NEXT: ld 2, 24(r1) ; CHECK-NEXT: addi r1, r1, 64