Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -3436,7 +3436,9 @@ may be any of the :ref:`binary ` or :ref:`bitwise binary ` operations. The constraints on operands are the same as those for the corresponding instruction (e.g. no bitwise - operations on floating-point values are allowed). + operations on floating-point values are allowed). Division by zero + and overflowing signed division produce poison (unlike division + and remainder instructions, which have undefined behavior). Other Values ============ Index: docs/ReleaseNotes.rst =================================================================== --- docs/ReleaseNotes.rst +++ docs/ReleaseNotes.rst @@ -72,6 +72,10 @@ pointee type. In the next release we intend to make this parameter mandatory in preparation for opaque pointer types. +* The semantics of constant expressions have changed so it is no longer + possible for a constant expression to have undefined behavior. The + ``Constant::canTrap()`` C++ API has been removed. + Changes to the ARM Backend -------------------------- Index: include/llvm/Analysis/ValueTracking.h =================================================================== --- include/llvm/Analysis/ValueTracking.h +++ include/llvm/Analysis/ValueTracking.h @@ -388,7 +388,7 @@ /// /// This method can return true for instructions that read memory; /// for such instructions, moving them may change the resulting value. - bool isSafeToSpeculativelyExecute(const Value *V, + bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr); Index: include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -337,15 +337,27 @@ } bool translateUDiv(const User &U, MachineIRBuilder &MIRBuilder) { + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(U)) + return false; return translateBinaryOp(TargetOpcode::G_UDIV, U, MIRBuilder); } bool translateSDiv(const User &U, MachineIRBuilder &MIRBuilder) { + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(U)) + return false; return translateBinaryOp(TargetOpcode::G_SDIV, U, MIRBuilder); } bool translateURem(const User &U, MachineIRBuilder &MIRBuilder) { + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(U)) + return false; return translateBinaryOp(TargetOpcode::G_UREM, U, MIRBuilder); } bool translateSRem(const User &U, MachineIRBuilder &MIRBuilder) { + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(U)) + return false; return translateBinaryOp(TargetOpcode::G_SREM, U, MIRBuilder); } bool translateIntToPtr(const User &U, MachineIRBuilder &MIRBuilder) { Index: include/llvm/IR/Constant.h =================================================================== --- include/llvm/IR/Constant.h +++ include/llvm/IR/Constant.h @@ -94,10 +94,6 @@ /// expressions. bool containsConstantExpression() const; - /// Return true if evaluation of this constant could trap. This is true for - /// things like constant expressions that could divide by zero. - bool canTrap() const; - /// Return true if the value can vary between threads. bool isThreadDependent() const; Index: include/llvm/IR/Constants.h =================================================================== --- include/llvm/IR/Constants.h +++ include/llvm/IR/Constants.h @@ -1246,6 +1246,9 @@ /// Returns an Instruction which implements the same operation as this /// ConstantExpr. The instruction is not linked to any basic block. /// + /// For division operations, the denominator may be rewritten to avoid + /// generating a division which would trap. + /// /// A better approach to this could be to have a constructor for Instruction /// which would take a ConstantExpr parameter, but that would have spread /// implementation details of ConstantExpr outside of Constants.cpp, which Index: lib/Analysis/CodeMetrics.cpp =================================================================== --- lib/Analysis/CodeMetrics.cpp +++ lib/Analysis/CodeMetrics.cpp @@ -34,7 +34,8 @@ for (const Value *Operand : U->operands()) if (Visited.insert(Operand).second) - if (isSafeToSpeculativelyExecute(Operand)) + if (!isa(Operand) || + isSafeToSpeculativelyExecute(cast(Operand))) Worklist.push_back(Operand); } Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp +++ lib/Analysis/ValueTracking.cpp @@ -485,7 +485,8 @@ if (V == E) return true; - if (V == I || isSafeToSpeculativelyExecute(V)) { + if (V == I || !isa(V) || + isSafeToSpeculativelyExecute(cast(V))) { EphValues.insert(V); if (const User *U = dyn_cast(V)) for (User::const_op_iterator J = U->op_begin(), JE = U->op_end(); @@ -3895,18 +3896,9 @@ return true; } -bool llvm::isSafeToSpeculativelyExecute(const Value *V, +bool llvm::isSafeToSpeculativelyExecute(const Instruction *Inst, const Instruction *CtxI, const DominatorTree *DT) { - const Operator *Inst = dyn_cast(V); - if (!Inst) - return false; - - for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) - if (Constant *C = dyn_cast(Inst->getOperand(i))) - if (C->canTrap()) - return false; - switch (Inst->getOpcode()) { default: return true; Index: lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/FastISel.cpp +++ lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1817,14 +1817,26 @@ case Instruction::FMul: return selectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(I)) + return false; return selectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(I)) + return false; return selectBinaryOp(I, ISD::UDIV); case Instruction::FDiv: return selectBinaryOp(I, ISD::FDIV); case Instruction::SRem: + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(I)) + return false; return selectBinaryOp(I, ISD::SREM); case Instruction::URem: + // Non-trapping div for ConstantExpr not yet implemented. + if (isa(I)) + return false; return selectBinaryOp(I, ISD::UREM); case Instruction::FRem: return selectBinaryOp(I, ISD::FREM); Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -878,17 +878,18 @@ void visitBinary(const User &I, unsigned Opcode); void visitShift(const User &I, unsigned Opcode); + void visitDivRem(const User &I, unsigned Opcode); void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } void visitSub(const User &I) { visitBinary(I, ISD::SUB); } void visitFSub(const User &I); void visitMul(const User &I) { visitBinary(I, ISD::MUL); } void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } - void visitURem(const User &I) { visitBinary(I, ISD::UREM); } - void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitURem(const User &I) { visitDivRem(I, ISD::UREM); } + void visitSRem(const User &I) { visitDivRem(I, ISD::SREM); } void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } - void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } - void visitSDiv(const User &I); + void visitUDiv(const User &I) { visitDivRem(I, ISD::UDIV); } + void visitSDiv(const User &I) { visitDivRem(I, ISD::SDIV); } void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } void visitAnd (const User &I) { visitBinary(I, ISD::AND); } void visitOr (const User &I) { visitBinary(I, ISD::OR); } Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3206,15 +3206,48 @@ setValue(&I, Res); } -void SelectionDAGBuilder::visitSDiv(const User &I) { +void SelectionDAGBuilder::visitDivRem(const User &I, unsigned Opcode) { + if (!isa(I)) + return visitBinary(I, Opcode); + + // Constants aren't allowed to trap, so we have to do something + // a bit trickier. + // + // FIXME: Some targets have a cheap non-trapping div. SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - - SDNodeFlags Flags; - Flags.setExact(isa(&I) && - cast(&I)->isExact()); - setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, - Op2, Flags)); + SDLoc dl(getCurSDLoc()); + EVT VT = Op1.getValueType(); + if (Opcode == ISD::UDIV || Opcode == ISD::UREM) { + // Ensure the denominator is not zero. + Op2 = DAG.getNode(ISD::UMAX, dl, VT, Op2, DAG.getConstant(1, dl, VT)); + } else { + // Ensure the denominator is not zero, and we are not dividing INT_MIN + // by -1. + auto &TLI = DAG.getTargetLoweringInfo(); + EVT CCVT = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue IsZero = + DAG.getSetCC(dl, CCVT, Op2, DAG.getConstant(0, dl, VT), ISD::SETEQ); + SDValue IsNegOne = + DAG.getSetCC(dl, CCVT, Op2, DAG.getAllOnesConstant(dl, VT), ISD::SETEQ); + auto IntMin = APInt::getSignedMinValue(VT.getScalarSizeInBits()); + SDValue IsIntMin = DAG.getSetCC( + dl, CCVT, Op1, DAG.getConstant(IntMin, dl, VT), ISD::SETEQ); + SDValue IsIntMinOverNegOne = + DAG.getNode(ISD::AND, dl, CCVT, IsNegOne, IsIntMin); + SDValue IsInvalid = + DAG.getNode(ISD::OR, dl, CCVT, IsZero, IsIntMinOverNegOne); + ISD::NodeType SelectOpCode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + Op2 = DAG.getNode(SelectOpCode, dl, VT, IsInvalid, + DAG.getConstant(1, dl, VT), Op2); + } + + SDNodeFlags DivFlags; + if (auto *ExactOp = dyn_cast(&I)) + DivFlags.setExact(ExactOp->isExact()); + SDValue BinNodeValue = DAG.getNode(Opcode, dl, VT, Op1, Op2, DivFlags); + setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitICmp(const User &I) { Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -342,47 +342,6 @@ MachineFunctionPass::getAnalysisUsage(AU); } -/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that -/// may trap on it. In this case we have to split the edge so that the path -/// through the predecessor block that doesn't go to the phi block doesn't -/// execute the possibly trapping instruction. If available, we pass domtree -/// and loop info to be updated when we split critical edges. This is because -/// SelectionDAGISel preserves these analyses. -/// This is required for correctness, so it must be done at -O0. -/// -static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT, - LoopInfo *LI) { - // Loop for blocks with phi nodes. - for (BasicBlock &BB : Fn) { - PHINode *PN = dyn_cast(BB.begin()); - if (!PN) continue; - - ReprocessBlock: - // For each block with a PHI node, check to see if any of the input values - // are potentially trapping constant expressions. Constant expressions are - // the only potentially trapping value that can occur as the argument to a - // PHI. - for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast(I)); ++I) - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { - ConstantExpr *CE = dyn_cast(PN->getIncomingValue(i)); - if (!CE || !CE->canTrap()) continue; - - // The only case we have to worry about is when the edge is critical. - // Since this block has a PHI Node, we assume it has multiple input - // edges: check to see if the pred has multiple successors. - BasicBlock *Pred = PN->getIncomingBlock(i); - if (Pred->getTerminator()->getNumSuccessors() == 1) - continue; - - // Okay, we have to split this edge. - SplitCriticalEdge( - Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), - CriticalEdgeSplittingOptions(DT, LI).setMergeIdenticalEdges()); - goto ReprocessBlock; - } - } -} - static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F, MachineModuleInfo &MMI) { // Only needed for MSVC @@ -437,15 +396,9 @@ LibInfo = &getAnalysis().getTLI(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : nullptr; ORE = make_unique(&Fn); - auto *DTWP = getAnalysisIfAvailable(); - DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *LIWP = getAnalysisIfAvailable(); - LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); - SplitCriticalSideEffectEdges(const_cast(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this, LibInfo, getAnalysisIfAvailable()); FuncInfo->set(Fn, *MF, CurDAG); Index: lib/IR/Constants.cpp =================================================================== --- lib/IR/Constants.cpp +++ lib/IR/Constants.cpp @@ -408,42 +408,6 @@ delete this; } -static bool canTrapImpl(const Constant *C, - SmallPtrSetImpl &NonTrappingOps) { - assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!"); - // The only thing that could possibly trap are constant exprs. - const ConstantExpr *CE = dyn_cast(C); - if (!CE) - return false; - - // ConstantExpr traps if any operands can trap. - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - if (ConstantExpr *Op = dyn_cast(CE->getOperand(i))) { - if (NonTrappingOps.insert(Op).second && canTrapImpl(Op, NonTrappingOps)) - return true; - } - } - - // Otherwise, only specific operations can trap. - switch (CE->getOpcode()) { - default: - return false; - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::URem: - case Instruction::SRem: - // Div and rem can trap if the RHS is not known to be non-zero. - if (!isa(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue()) - return true; - return false; - } -} - -bool Constant::canTrap() const { - SmallPtrSet NonTrappingOps; - return canTrapImpl(this, NonTrappingOps); -} - /// Check if C contains a GlobalValue for which Predicate is true. static bool ConstHasGlobalValuePredicate(const Constant *C, @@ -2992,7 +2956,6 @@ return ExtractValueInst::Create(Ops[0], getIndices()); case Instruction::ShuffleVector: return new ShuffleVectorInst(Ops[0], Ops[1], Ops[2]); - case Instruction::GetElementPtr: { const auto *GO = cast(this); if (GO->isInBounds()) @@ -3009,9 +2972,37 @@ return UnaryOperator::Create((Instruction::UnaryOps)getOpcode(), Ops[0]); default: assert(getNumOperands() == 2 && "Must be binary operator?"); + Constant *Op0 = getOperand(0); + Constant *Op1 = getOperand(1); + if (getOpcode() == Instruction::UDiv || getOpcode() == Instruction::URem) { + // Ensure the denominator is not zero. + Constant *Zero = Constant::getNullValue(getType()); + Constant *One = ConstantInt::get(getType(), 1); + Constant *IsZero = ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, Zero); + Op1 = ConstantExpr::getSelect(IsZero, One, Op1); + } + if (getOpcode() == Instruction::SDiv || getOpcode() == Instruction::SRem) { + // Ensure the denominator is not zero, and we are not dividing INT_MIN + // by -1. + unsigned BitWidth = getType()->getScalarSizeInBits(); + assert(BitWidth != 1 && "One-bit divide should be folded away"); + Constant *Zero = Constant::getNullValue(getType()); + Constant *NegOne = Constant::getAllOnesValue(getType()); + Constant *One = ConstantInt::get(getType(), 1); + Constant *SignedMin = + ConstantInt::get(getType(), APInt::getSignedMinValue(BitWidth)); + Constant *IsZero = + ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, Zero); + Constant *IsNegOne = + ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op1, NegOne); + Constant *IsIntMin = + ConstantExpr::getICmp(CmpInst::ICMP_EQ, Op0, SignedMin); + Constant *IsOverflowing = ConstantExpr::getAnd(IsNegOne, IsIntMin); + Constant *IsUndefined = ConstantExpr::getOr(IsOverflowing, IsZero); + Op1 = ConstantExpr::getSelect(IsUndefined, One, Op1); + } BinaryOperator *BO = - BinaryOperator::Create((Instruction::BinaryOps)getOpcode(), - Ops[0], Ops[1]); + BinaryOperator::Create((Instruction::BinaryOps)getOpcode(), Op0, Op1); if (isa(BO)) { BO->setHasNoUnsignedWrap(SubclassOptionalData & OverflowingBinaryOperator::NoUnsignedWrap); Index: lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- lib/Transforms/Utils/SimplifyCFG.cpp +++ lib/Transforms/Utils/SimplifyCFG.cpp @@ -308,7 +308,8 @@ /// expensive. static unsigned ComputeSpeculationCost(const User *I, const TargetTransformInfo &TTI) { - assert(isSafeToSpeculativelyExecute(I) && + assert(!isa(I) || + isSafeToSpeculativelyExecute(cast(I)) && "Instruction is not safe to speculatively execute!"); return TTI.getUserCost(I); } @@ -343,14 +344,8 @@ return false; Instruction *I = dyn_cast(V); - if (!I) { - // Non-instructions all dominate instructions, but not all constantexprs - // can be executed unconditionally. - if (ConstantExpr *C = dyn_cast(V)) - if (C->canTrap()) - return false; + if (!I) return true; - } BasicBlock *PBB = I->getParent(); // We don't want to allow weird loops that might have the "if condition" in @@ -1378,11 +1373,6 @@ if (passingValueIsAlwaysUndefined(BB1V, &PN) || passingValueIsAlwaysUndefined(BB2V, &PN)) return Changed; - - if (isa(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) - return Changed; - if (isa(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) - return Changed; } } @@ -2056,9 +2046,6 @@ if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) - return false; unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0; unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0; unsigned MaxCost = @@ -2467,18 +2454,6 @@ if (FVPN->getParent() == FalseSucc) FalseValue = FVPN->getIncomingValueForBlock(BI->getParent()); - // In order for this transformation to be safe, we must be able to - // unconditionally execute both operands to the return. This is - // normally the case, but we could have a potentially-trapping - // constant expression that prevents this transformation from being - // safe. - if (ConstantExpr *TCV = dyn_cast_or_null(TrueValue)) - if (TCV->canTrap()) - return false; - if (ConstantExpr *FCV = dyn_cast_or_null(FalseValue)) - if (FCV->canTrap()) - return false; - // Okay, we collected all the mapped values and checked them for sanity, and // defined to really do this transformation. First, update the CFG. TrueSucc->removePredecessor(BI->getParent()); @@ -2634,15 +2609,6 @@ return false; } - // Cond is known to be a compare or binary operator. Check to make sure that - // neither operand is a potentially-trapping constant expression. - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(0))) - if (CE->canTrap()) - return false; - if (ConstantExpr *CE = dyn_cast(Cond->getOperand(1))) - if (CE->canTrap()) - return false; - // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr; @@ -3244,10 +3210,6 @@ } } - if (auto *CE = dyn_cast(BI->getCondition())) - if (CE->canTrap()) - return false; - // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. @@ -3288,29 +3250,12 @@ // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. - - // Also do not perform this transformation if any phi node in the common - // destination block can trap when reached by BB or PBB (PR17073). In that - // case, it would be unsafe to hoist the operation into a select instruction. - BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); isa(II); ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; - - PHINode *PN = cast(II); - Value *BIV = PN->getIncomingValueForBlock(BB); - if (ConstantExpr *CE = dyn_cast(BIV)) - if (CE->canTrap()) - return false; - - unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); - Value *PBIV = PN->getIncomingValue(PBBIdx); - if (ConstantExpr *CE = dyn_cast(PBIV)) - if (CE->canTrap()) - return false; } // Finally, if everything is ok, fold the branches to logical ops. Index: lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -378,20 +378,6 @@ return true; } -/// Check whether it is safe to if-convert this phi node. -/// -/// Phi nodes with constant expressions that can trap are not safe to if -/// convert. -static bool canIfConvertPHINodes(BasicBlock *BB) { - for (PHINode &Phi : BB->phis()) { - for (Value *V : Phi.incoming_values()) - if (auto *C = dyn_cast(V)) - if (C->canTrap()) - return false; - } - return true; -} - static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { if (Ty->isPointerTy()) return DL.getIntPtrType(Ty); @@ -877,12 +863,6 @@ const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); for (Instruction &I : *BB) { - // Check that we don't have a constant expression that can trap as operand. - for (Value *Operand : I.operands()) { - if (auto *C = dyn_cast(Operand)) - if (C->canTrap()) - return false; - } // We might be able to hoist the load. if (I.mayReadFromMemory()) { auto *LI = dyn_cast(&I); @@ -941,7 +921,6 @@ } // Collect the blocks that need predication. - BasicBlock *Header = TheLoop->getHeader(); for (BasicBlock *BB : TheLoop->blocks()) { // We don't support switch statements inside loops. if (!isa(BB->getTerminator())) { @@ -960,12 +939,6 @@ "NoCFGForSelect", BB->getTerminator()); return false; } - } else if (BB != Header && !canIfConvertPHINodes(BB)) { - reportVectorizationFailure( - "Control flow cannot be substituted for a select", - "control flow cannot be substituted for a select", - "NoCFGForSelect", BB->getTerminator()); - return false; } } Index: test/CodeGen/X86/critical-edge-split-2.ll =================================================================== --- test/CodeGen/X86/critical-edge-split-2.ll +++ test/CodeGen/X86/critical-edge-split-2.ll @@ -21,6 +21,8 @@ ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: sete %sil ; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: cmovnel %eax, %esi +; CHECK-NEXT: movl $1, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi ; CHECK-NEXT: movl %edx, %eax Index: test/CodeGen/X86/divide-constant-expression.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/divide-constant-expression.ll @@ -0,0 +1,390 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=SDAG +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -verify-machineinstrs | FileCheck %s -check-prefix=FAST +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=0 -verify-machineinstrs | FileCheck %s -check-prefix=GLOBAL + +@g1 = extern_weak global i8 +@g2 = extern_weak global i8 + +define i32 @test1(i1 %c) { +; SDAG-LABEL: test1: +; SDAG: # %bb.0: # %entry +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; SDAG-NEXT: movl $g2, %esi +; SDAG-NEXT: movl $g2, %ecx +; SDAG-NEXT: notl %ecx +; SDAG-NEXT: orl %eax, %ecx +; SDAG-NEXT: sete %al +; SDAG-NEXT: testl %esi, %esi +; SDAG-NEXT: sete %cl +; SDAG-NEXT: orb %al, %cl +; SDAG-NEXT: movl $1, %ecx +; SDAG-NEXT: cmovnel %ecx, %esi +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: cltd +; SDAG-NEXT: idivl %esi +; SDAG-NEXT: testb $1, %dil +; SDAG-NEXT: je .LBB0_2 +; SDAG-NEXT: # %bb.1: +; SDAG-NEXT: movl %eax, %ecx +; SDAG-NEXT: .LBB0_2: # %cond.end.i +; SDAG-NEXT: movl %ecx, %eax +; SDAG-NEXT: retq +; +; FAST-LABEL: test1: +; FAST: # %bb.0: # %entry +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; FAST-NEXT: movl $g2, %esi +; FAST-NEXT: movl $g2, %ecx +; FAST-NEXT: notl %ecx +; FAST-NEXT: orl %eax, %ecx +; FAST-NEXT: sete %al +; FAST-NEXT: testl %esi, %esi +; FAST-NEXT: sete %cl +; FAST-NEXT: orb %al, %cl +; FAST-NEXT: movl $1, %ecx +; FAST-NEXT: cmovnel %ecx, %esi +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: cltd +; FAST-NEXT: idivl %esi +; FAST-NEXT: testb $1, %dil +; FAST-NEXT: je .LBB0_2 +; FAST-NEXT: # %bb.1: +; FAST-NEXT: movl %eax, %ecx +; FAST-NEXT: .LBB0_2: # %cond.end.i +; FAST-NEXT: movl %ecx, %eax +; FAST-NEXT: retq +; +; GLOBAL-LABEL: test1: +; GLOBAL: # %bb.0: # %entry +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; GLOBAL-NEXT: movl $g2, %esi +; GLOBAL-NEXT: movl $g2, %ecx +; GLOBAL-NEXT: notl %ecx +; GLOBAL-NEXT: orl %eax, %ecx +; GLOBAL-NEXT: sete %al +; GLOBAL-NEXT: testl %esi, %esi +; GLOBAL-NEXT: sete %cl +; GLOBAL-NEXT: orb %al, %cl +; GLOBAL-NEXT: movl $1, %ecx +; GLOBAL-NEXT: cmovnel %ecx, %esi +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: cltd +; GLOBAL-NEXT: idivl %esi +; GLOBAL-NEXT: testb $1, %dil +; GLOBAL-NEXT: je .LBB0_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: movl %eax, %ecx +; GLOBAL-NEXT: .LBB0_2: # %cond.end.i +; GLOBAL-NEXT: movl %ecx, %eax +; GLOBAL-NEXT: retq +entry: + br i1 %c, label %cond.end.i, label %cond.false.i + +cond.false.i: + br label %cond.end.i + +cond.end.i: + %r = phi i32 [ sdiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ] + ret i32 %r +} + +define i32 @test2(i1 %c) { +; SDAG-LABEL: test2: +; SDAG: # %bb.0: # %entry +; SDAG-NEXT: movl $g2, %esi +; SDAG-NEXT: cmpl $1, %esi +; SDAG-NEXT: movl $1, %ecx +; SDAG-NEXT: cmovbel %ecx, %esi +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: xorl %edx, %edx +; SDAG-NEXT: divl %esi +; SDAG-NEXT: testb $1, %dil +; SDAG-NEXT: je .LBB1_2 +; SDAG-NEXT: # %bb.1: +; SDAG-NEXT: movl %eax, %ecx +; SDAG-NEXT: .LBB1_2: # %cond.end.i +; SDAG-NEXT: movl %ecx, %eax +; SDAG-NEXT: retq +; +; FAST-LABEL: test2: +; FAST: # %bb.0: # %entry +; FAST-NEXT: movl $g2, %esi +; FAST-NEXT: cmpl $1, %esi +; FAST-NEXT: movl $1, %ecx +; FAST-NEXT: cmovbel %ecx, %esi +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: xorl %edx, %edx +; FAST-NEXT: divl %esi +; FAST-NEXT: testb $1, %dil +; FAST-NEXT: je .LBB1_2 +; FAST-NEXT: # %bb.1: +; FAST-NEXT: movl %eax, %ecx +; FAST-NEXT: .LBB1_2: # %cond.end.i +; FAST-NEXT: movl %ecx, %eax +; FAST-NEXT: retq +; +; GLOBAL-LABEL: test2: +; GLOBAL: # %bb.0: # %entry +; GLOBAL-NEXT: movl $g2, %esi +; GLOBAL-NEXT: cmpl $1, %esi +; GLOBAL-NEXT: movl $1, %ecx +; GLOBAL-NEXT: cmovbel %ecx, %esi +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: xorl %edx, %edx +; GLOBAL-NEXT: divl %esi +; GLOBAL-NEXT: testb $1, %dil +; GLOBAL-NEXT: je .LBB1_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: movl %eax, %ecx +; GLOBAL-NEXT: .LBB1_2: # %cond.end.i +; GLOBAL-NEXT: movl %ecx, %eax +; GLOBAL-NEXT: retq +entry: + br i1 %c, label %cond.end.i, label %cond.false.i + +cond.false.i: + br label %cond.end.i + +cond.end.i: + %r = phi i32 [ udiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ] + ret i32 %r +} + +define i32 @test3(i1 %c) { +; SDAG-LABEL: test3: +; SDAG: # %bb.0: # %entry +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; SDAG-NEXT: movl $g2, %esi +; SDAG-NEXT: movl $g2, %ecx +; SDAG-NEXT: notl %ecx +; SDAG-NEXT: orl %eax, %ecx +; SDAG-NEXT: sete %al +; SDAG-NEXT: testl %esi, %esi +; SDAG-NEXT: sete %cl +; SDAG-NEXT: orb %al, %cl +; SDAG-NEXT: movl $1, %ecx +; SDAG-NEXT: cmovnel %ecx, %esi +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: cltd +; SDAG-NEXT: idivl %esi +; SDAG-NEXT: testb $1, %dil +; SDAG-NEXT: je .LBB2_2 +; SDAG-NEXT: # %bb.1: +; SDAG-NEXT: movl %edx, %ecx +; SDAG-NEXT: .LBB2_2: # %cond.end.i +; SDAG-NEXT: movl %ecx, %eax +; SDAG-NEXT: retq +; +; FAST-LABEL: test3: +; FAST: # %bb.0: # %entry +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; FAST-NEXT: movl $g2, %esi +; FAST-NEXT: movl $g2, %ecx +; FAST-NEXT: notl %ecx +; FAST-NEXT: orl %eax, %ecx +; FAST-NEXT: sete %al +; FAST-NEXT: testl %esi, %esi +; FAST-NEXT: sete %cl +; FAST-NEXT: orb %al, %cl +; FAST-NEXT: movl $1, %ecx +; FAST-NEXT: cmovnel %ecx, %esi +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: cltd +; FAST-NEXT: idivl %esi +; FAST-NEXT: testb $1, %dil +; FAST-NEXT: je .LBB2_2 +; FAST-NEXT: # %bb.1: +; FAST-NEXT: movl %edx, %ecx +; FAST-NEXT: .LBB2_2: # %cond.end.i +; FAST-NEXT: movl %ecx, %eax +; FAST-NEXT: retq +; +; GLOBAL-LABEL: test3: +; GLOBAL: # %bb.0: # %entry +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; GLOBAL-NEXT: movl $g2, %esi +; GLOBAL-NEXT: movl $g2, %ecx +; GLOBAL-NEXT: notl %ecx +; GLOBAL-NEXT: orl %eax, %ecx +; GLOBAL-NEXT: sete %al +; GLOBAL-NEXT: testl %esi, %esi +; GLOBAL-NEXT: sete %cl +; GLOBAL-NEXT: orb %al, %cl +; GLOBAL-NEXT: movl $1, %ecx +; GLOBAL-NEXT: cmovnel %ecx, %esi +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: cltd +; GLOBAL-NEXT: idivl %esi +; GLOBAL-NEXT: testb $1, %dil +; GLOBAL-NEXT: je .LBB2_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: movl %edx, %ecx +; GLOBAL-NEXT: .LBB2_2: # %cond.end.i +; GLOBAL-NEXT: movl %ecx, %eax +; GLOBAL-NEXT: retq +entry: + br i1 %c, label %cond.end.i, label %cond.false.i + +cond.false.i: + br label %cond.end.i + +cond.end.i: + %r = phi i32 [ srem (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ] + ret i32 %r +} + +define i32 @test4(i1 %c) { +; SDAG-LABEL: test4: +; SDAG: # %bb.0: # %entry +; SDAG-NEXT: movl $g2, %esi +; SDAG-NEXT: cmpl $1, %esi +; SDAG-NEXT: movl $1, %ecx +; SDAG-NEXT: cmovbel %ecx, %esi +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: xorl %edx, %edx +; SDAG-NEXT: divl %esi +; SDAG-NEXT: testb $1, %dil +; SDAG-NEXT: je .LBB3_2 +; SDAG-NEXT: # %bb.1: +; SDAG-NEXT: movl %edx, %ecx +; SDAG-NEXT: .LBB3_2: # %cond.end.i +; SDAG-NEXT: movl %ecx, %eax +; SDAG-NEXT: retq +; +; FAST-LABEL: test4: +; FAST: # %bb.0: # %entry +; FAST-NEXT: movl $g2, %esi +; FAST-NEXT: cmpl $1, %esi +; FAST-NEXT: movl $1, %ecx +; FAST-NEXT: cmovbel %ecx, %esi +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: xorl %edx, %edx +; FAST-NEXT: divl %esi +; FAST-NEXT: testb $1, %dil +; FAST-NEXT: je .LBB3_2 +; FAST-NEXT: # %bb.1: +; FAST-NEXT: movl %edx, %ecx +; FAST-NEXT: .LBB3_2: # %cond.end.i +; FAST-NEXT: movl %ecx, %eax +; FAST-NEXT: retq +; +; GLOBAL-LABEL: test4: +; GLOBAL: # %bb.0: # %entry +; GLOBAL-NEXT: movl $g2, %esi +; GLOBAL-NEXT: cmpl $1, %esi +; GLOBAL-NEXT: movl $1, %ecx +; GLOBAL-NEXT: cmovbel %ecx, %esi +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: xorl %edx, %edx +; GLOBAL-NEXT: divl %esi +; GLOBAL-NEXT: testb $1, %dil +; GLOBAL-NEXT: je .LBB3_2 +; GLOBAL-NEXT: # %bb.1: +; GLOBAL-NEXT: movl %edx, %ecx +; GLOBAL-NEXT: .LBB3_2: # %cond.end.i +; GLOBAL-NEXT: movl %ecx, %eax +; GLOBAL-NEXT: retq +entry: + br i1 %c, label %cond.end.i, label %cond.false.i + +cond.false.i: + br label %cond.end.i + +cond.end.i: + %r = phi i32 [ urem (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ] + ret i32 %r +} + +define i32 @test5(i32 %c) { +; SDAG-LABEL: test5: +; SDAG: # %bb.0: # %entry +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; SDAG-NEXT: movl $g2, %ecx +; SDAG-NEXT: movl $g2, %edx +; SDAG-NEXT: notl %edx +; SDAG-NEXT: orl %eax, %edx +; SDAG-NEXT: sete %al +; SDAG-NEXT: testl %ecx, %ecx +; SDAG-NEXT: sete %dl +; SDAG-NEXT: orb %al, %dl +; SDAG-NEXT: movl $1, %eax +; SDAG-NEXT: cmovnel %eax, %ecx +; SDAG-NEXT: movl $g1, %eax +; SDAG-NEXT: cltd +; SDAG-NEXT: idivl %ecx +; SDAG-NEXT: #APP +; SDAG-NEXT: #NO_APP +; SDAG-NEXT: .Ltmp0: # Block address taken +; SDAG-NEXT: .LBB4_1: # %cond.false.i +; SDAG-NEXT: movl $1, %eax +; SDAG-NEXT: .LBB4_2: # %cond.end.i +; SDAG-NEXT: retq +; +; FAST-LABEL: test5: +; FAST: # %bb.0: # %entry +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; FAST-NEXT: movl $g2, %ecx +; FAST-NEXT: movl $g2, %edx +; FAST-NEXT: notl %edx +; FAST-NEXT: orl %eax, %edx +; FAST-NEXT: sete %al +; FAST-NEXT: testl %ecx, %ecx +; FAST-NEXT: sete %dl +; FAST-NEXT: orb %al, %dl +; FAST-NEXT: movl $1, %eax +; FAST-NEXT: cmovnel %eax, %ecx +; FAST-NEXT: movl $g1, %eax +; FAST-NEXT: cltd +; FAST-NEXT: idivl %ecx +; FAST-NEXT: #APP +; FAST-NEXT: #NO_APP +; FAST-NEXT: .Ltmp0: # Block address taken +; FAST-NEXT: .LBB4_1: # %cond.false.i +; FAST-NEXT: movl $1, %eax +; FAST-NEXT: .LBB4_2: # %cond.end.i +; FAST-NEXT: retq +; +; GLOBAL-LABEL: test5: +; GLOBAL: # %bb.0: # %entry +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; GLOBAL-NEXT: movl $g2, %ecx +; GLOBAL-NEXT: movl $g2, %edx +; GLOBAL-NEXT: notl %edx +; GLOBAL-NEXT: orl %eax, %edx +; GLOBAL-NEXT: sete %al +; GLOBAL-NEXT: testl %ecx, %ecx +; GLOBAL-NEXT: sete %dl +; GLOBAL-NEXT: orb %al, %dl +; GLOBAL-NEXT: movl $1, %eax +; GLOBAL-NEXT: cmovnel %eax, %ecx +; GLOBAL-NEXT: movl $g1, %eax +; GLOBAL-NEXT: cltd +; GLOBAL-NEXT: idivl %ecx +; GLOBAL-NEXT: #APP +; GLOBAL-NEXT: #NO_APP +; GLOBAL-NEXT: .Ltmp0: # Block address taken +; GLOBAL-NEXT: .LBB4_1: # %cond.false.i +; GLOBAL-NEXT: movl $1, %eax +; GLOBAL-NEXT: .LBB4_2: # %cond.end.i +; GLOBAL-NEXT: retq +entry: + callbr void asm "", "r,X"(i32 %c, i8 *blockaddress(@test5, %cond.false.i)) + to label %cond.false.i [label %cond.end.i] + +cond.false.i: + br label %cond.end.i + +cond.end.i: + %r = phi i32 [ sdiv (i32 ptrtoint (i8* @g1 to i32), i32 ptrtoint (i8* @g2 to i32)), %entry ], [ 1, %cond.false.i ] + ret i32 %r +} Index: test/Transforms/LoopVectorize/X86/masked_load_store.ll =================================================================== --- test/Transforms/LoopVectorize/X86/masked_load_store.ll +++ test/Transforms/LoopVectorize/X86/masked_load_store.ll @@ -1502,47 +1502,189 @@ @a = common global [1 x i32*] zeroinitializer, align 8 @c = common global i32* null, align 8 -; The loop here should not be vectorized due to trapping -; constant expression +; Constant expressions never trap; check that we perform the transform +; consistently. define void @foo5(i32* nocapture %A, i32* nocapture readnone %B, i32* nocapture readonly %trigger) local_unnamed_addr #0 { ; AVX-LABEL: @foo5( ; AVX-NEXT: entry: +; AVX-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* +; AVX-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* +; AVX-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; AVX: vector.memcheck: +; AVX-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000 +; AVX-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; AVX-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000 +; AVX-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* +; AVX-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]] +; AVX-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] +; AVX-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; AVX-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; AVX: vector.ph: +; AVX-NEXT: br label [[VECTOR_BODY:%.*]] +; AVX: vector.body: +; AVX-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AVX-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0 +; AVX-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> undef, <8 x i32> zeroinitializer +; AVX-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; AVX-NEXT: [[INDUCTION6:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; AVX-NEXT: [[INDUCTION7:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; AVX-NEXT: [[INDUCTION8:%.*]] = add <8 x i64> [[BROADCAST_SPLAT]], +; AVX-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; AVX-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; AVX-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 +; AVX-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24 +; AVX-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] +; AVX-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] +; AVX-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] +; AVX-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] +; AVX-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; AVX-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <8 x i32>* +; AVX-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP9]], align 4, !alias.scope !41 +; AVX-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 8 +; AVX-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>* +; AVX-NEXT: [[WIDE_LOAD9:%.*]] = load <8 x i32>, <8 x i32>* [[TMP11]], align 4, !alias.scope !41 +; AVX-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16 +; AVX-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* +; AVX-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !41 +; AVX-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 24 +; AVX-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <8 x i32>* +; AVX-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x i32>, <8 x i32>* [[TMP15]], align 4, !alias.scope !41 +; AVX-NEXT: [[TMP16:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD]], +; AVX-NEXT: [[TMP17:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD9]], +; AVX-NEXT: [[TMP18:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD10]], +; AVX-NEXT: [[TMP19:%.*]] = icmp slt <8 x i32> [[WIDE_LOAD11]], +; AVX-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] +; AVX-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] +; AVX-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] +; AVX-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] +; AVX-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 +; AVX-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <8 x i32>* +; AVX-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> , <8 x i32>* [[TMP25]], i32 4, <8 x i1> [[TMP16]]), !alias.scope !44, !noalias !41 +; AVX-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 8 +; AVX-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <8 x i32>* +; AVX-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> , <8 x i32>* [[TMP27]], i32 4, <8 x i1> [[TMP17]]), !alias.scope !44, !noalias !41 +; AVX-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16 +; AVX-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <8 x i32>* +; AVX-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> , <8 x i32>* [[TMP29]], i32 4, <8 x i1> [[TMP18]]), !alias.scope !44, !noalias !41 +; AVX-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 24 +; AVX-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <8 x i32>* +; AVX-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> , <8 x i32>* [[TMP31]], i32 4, <8 x i1> [[TMP19]]), !alias.scope !44, !noalias !41 +; AVX-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 +; AVX-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 +; AVX-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !46 +; AVX: middle.block: +; AVX-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 +; AVX-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; AVX: scalar.ph: +; AVX-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; AVX-NEXT: br label [[FOR_BODY:%.*]] ; AVX: for.body: -; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]] -; AVX-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100 +; AVX-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; AVX-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] +; AVX-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; AVX-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP33]], 100 ; AVX-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; AVX: if.then: -; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; AVX-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] ; AVX-NEXT: store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4 ; AVX-NEXT: br label [[FOR_INC]] ; AVX: for.inc: ; AVX-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; AVX-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !47 ; AVX: for.end: ; AVX-NEXT: ret void ; ; AVX512-LABEL: @foo5( ; AVX512-NEXT: entry: +; AVX512-NEXT: [[A1:%.*]] = bitcast i32* [[A:%.*]] to i8* +; AVX512-NEXT: [[TRIGGER3:%.*]] = bitcast i32* [[TRIGGER:%.*]] to i8* +; AVX512-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; AVX512: vector.memcheck: +; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A]], i64 10000 +; AVX512-NEXT: [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8* +; AVX512-NEXT: [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[TRIGGER]], i64 10000 +; AVX512-NEXT: [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8* +; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[A1]], [[SCEVGEP45]] +; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[TRIGGER3]], [[SCEVGEP2]] +; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; AVX512-NEXT: [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true +; AVX512-NEXT: br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; AVX512: vector.ph: +; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] +; AVX512: vector.body: +; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; AVX512-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> undef, i64 [[INDEX]], i32 0 +; AVX512-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: [[INDUCTION:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; AVX512-NEXT: [[INDUCTION6:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; AVX512-NEXT: [[INDUCTION7:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; AVX512-NEXT: [[INDUCTION8:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; AVX512-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 +; AVX512-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 +; AVX512-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48 +; AVX512-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP0]] +; AVX512-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP1]] +; AVX512-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP2]] +; AVX512-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[TMP3]] +; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 +; AVX512-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <16 x i32>* +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP9]], align 4, !alias.scope !51 +; AVX512-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 16 +; AVX512-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <16 x i32>* +; AVX512-NEXT: [[WIDE_LOAD9:%.*]] = load <16 x i32>, <16 x i32>* [[TMP11]], align 4, !alias.scope !51 +; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 32 +; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <16 x i32>* +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP13]], align 4, !alias.scope !51 +; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 48 +; AVX512-NEXT: [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <16 x i32>* +; AVX512-NEXT: [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP15]], align 4, !alias.scope !51 +; AVX512-NEXT: [[TMP16:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD]], +; AVX512-NEXT: [[TMP17:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD9]], +; AVX512-NEXT: [[TMP18:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD10]], +; AVX512-NEXT: [[TMP19:%.*]] = icmp slt <16 x i32> [[WIDE_LOAD11]], +; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] +; AVX512-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] +; AVX512-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] +; AVX512-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]] +; AVX512-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0 +; AVX512-NEXT: [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <16 x i32>* +; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> , <16 x i32>* [[TMP25]], i32 4, <16 x i1> [[TMP16]]), !alias.scope !54, !noalias !51 +; AVX512-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 16 +; AVX512-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <16 x i32>* +; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> , <16 x i32>* [[TMP27]], i32 4, <16 x i1> [[TMP17]]), !alias.scope !54, !noalias !51 +; AVX512-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 32 +; AVX512-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <16 x i32>* +; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> , <16 x i32>* [[TMP29]], i32 4, <16 x i1> [[TMP18]]), !alias.scope !54, !noalias !51 +; AVX512-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 48 +; AVX512-NEXT: [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <16 x i32>* +; AVX512-NEXT: call void @llvm.masked.store.v16i32.p0v16i32(<16 x i32> , <16 x i32>* [[TMP31]], i32 4, <16 x i1> [[TMP19]]), !alias.scope !54, !noalias !51 +; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 64 +; AVX512-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], 9984 +; AVX512-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !56 +; AVX512: middle.block: +; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 10000, 9984 +; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; AVX512: scalar.ph: +; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 9984, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; AVX512-NEXT: br label [[FOR_BODY:%.*]] ; AVX512: for.body: -; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] -; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER:%.*]], i64 [[INDVARS_IV]] -; AVX512-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP0]], 100 +; AVX512-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] +; AVX512-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TRIGGER]], i64 [[INDVARS_IV]] +; AVX512-NEXT: [[TMP33:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; AVX512-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP33]], 100 ; AVX512-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; AVX512: if.then: -; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; AVX512-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] ; AVX512-NEXT: store i32 sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 1, i64 0), i32** @c) to i32)), i32* [[ARRAYIDX7]], align 4 ; AVX512-NEXT: br label [[FOR_INC]] ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 10000 -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !57 ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -1648,22 +1790,22 @@ ; AVX2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 ; AVX2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -3 ; AVX2-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !41 +; AVX2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4, !alias.scope !48 ; AVX2-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -4 ; AVX2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -3 ; AVX2-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>* -; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !41 +; AVX2-NEXT: [[WIDE_LOAD15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4, !alias.scope !48 ; AVX2-NEXT: [[REVERSE16:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD15]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 ; AVX2-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -3 ; AVX2-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>* -; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !41 +; AVX2-NEXT: [[WIDE_LOAD17:%.*]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4, !alias.scope !48 ; AVX2-NEXT: [[REVERSE18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD17]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -12 ; AVX2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -3 ; AVX2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* -; AVX2-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !41 +; AVX2-NEXT: [[WIDE_LOAD19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4, !alias.scope !48 ; AVX2-NEXT: [[REVERSE20:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD19]], <4 x i32> undef, <4 x i32> ; AVX2-NEXT: [[TMP20:%.*]] = icmp sgt <4 x i32> [[REVERSE]], zeroinitializer ; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt <4 x i32> [[REVERSE16]], zeroinitializer @@ -1677,25 +1819,25 @@ ; AVX2-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -3 ; AVX2-NEXT: [[REVERSE21:%.*]] = shufflevector <4 x i1> [[TMP20]], <4 x i1> undef, <4 x i32> ; AVX2-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>* -; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !44 +; AVX2-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP30]], i32 8, <4 x i1> [[REVERSE21]], <4 x double> undef), !alias.scope !51 ; AVX2-NEXT: [[REVERSE22:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -4 ; AVX2-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -3 ; AVX2-NEXT: [[REVERSE23:%.*]] = shufflevector <4 x i1> [[TMP21]], <4 x i1> undef, <4 x i32> ; AVX2-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <4 x double>* -; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !44 +; AVX2-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP33]], i32 8, <4 x i1> [[REVERSE23]], <4 x double> undef), !alias.scope !51 ; AVX2-NEXT: [[REVERSE25:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD24]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8 ; AVX2-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -3 ; AVX2-NEXT: [[REVERSE26:%.*]] = shufflevector <4 x i1> [[TMP22]], <4 x i1> undef, <4 x i32> ; AVX2-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <4 x double>* -; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !44 +; AVX2-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP36]], i32 8, <4 x i1> [[REVERSE26]], <4 x double> undef), !alias.scope !51 ; AVX2-NEXT: [[REVERSE28:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD27]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -12 ; AVX2-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -3 ; AVX2-NEXT: [[REVERSE29:%.*]] = shufflevector <4 x i1> [[TMP23]], <4 x i1> undef, <4 x i32> ; AVX2-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <4 x double>* -; AVX2-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !44 +; AVX2-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[TMP39]], i32 8, <4 x i1> [[REVERSE29]], <4 x double> undef), !alias.scope !51 ; AVX2-NEXT: [[REVERSE31:%.*]] = shufflevector <4 x double> [[WIDE_MASKED_LOAD30]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP40:%.*]] = fadd <4 x double> [[REVERSE22]], ; AVX2-NEXT: [[TMP41:%.*]] = fadd <4 x double> [[REVERSE25]], @@ -1709,25 +1851,25 @@ ; AVX2-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX2-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -3 ; AVX2-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <4 x double>* -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !46, !noalias !48 +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE32]], <4 x double>* [[TMP50]], i32 8, <4 x i1> [[REVERSE21]]), !alias.scope !53, !noalias !55 ; AVX2-NEXT: [[REVERSE34:%.*]] = shufflevector <4 x double> [[TMP41]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -4 ; AVX2-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -3 ; AVX2-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <4 x double>* -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !46, !noalias !48 +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE34]], <4 x double>* [[TMP53]], i32 8, <4 x i1> [[REVERSE23]]), !alias.scope !53, !noalias !55 ; AVX2-NEXT: [[REVERSE36:%.*]] = shufflevector <4 x double> [[TMP42]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8 ; AVX2-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -3 ; AVX2-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <4 x double>* -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !46, !noalias !48 +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE36]], <4 x double>* [[TMP56]], i32 8, <4 x i1> [[REVERSE26]]), !alias.scope !53, !noalias !55 ; AVX2-NEXT: [[REVERSE38:%.*]] = shufflevector <4 x double> [[TMP43]], <4 x double> undef, <4 x i32> ; AVX2-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -12 ; AVX2-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -3 ; AVX2-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <4 x double>* -; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !46, !noalias !48 +; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[REVERSE38]], <4 x double>* [[TMP59]], i32 8, <4 x i1> [[REVERSE29]]), !alias.scope !53, !noalias !55 ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; AVX2-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !49 +; AVX2-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !56 ; AVX2: middle.block: ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1750,7 +1892,7 @@ ; AVX2: for.inc: ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; AVX2-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; AVX2-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !50 +; AVX2-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !57 ; AVX2: for.end: ; AVX2-NEXT: ret void ; @@ -1798,22 +1940,22 @@ ; AVX512-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0 ; AVX512-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 -7 ; AVX512-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>* -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !51 +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, <8 x i32>* [[TMP10]], align 4, !alias.scope !58 ; AVX512-NEXT: [[REVERSE:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD]], <8 x i32> undef, <8 x i32> ; AVX512-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -8 ; AVX512-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 -7 ; AVX512-NEXT: [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <8 x i32>* -; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !51 +; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x i32>, <8 x i32>* [[TMP13]], align 4, !alias.scope !58 ; AVX512-NEXT: [[REVERSE16:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD15]], <8 x i32> undef, <8 x i32> ; AVX512-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -16 ; AVX512-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 -7 ; AVX512-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>* -; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !51 +; AVX512-NEXT: [[WIDE_LOAD17:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 4, !alias.scope !58 ; AVX512-NEXT: [[REVERSE18:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD17]], <8 x i32> undef, <8 x i32> ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 -24 ; AVX512-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP17]], i32 -7 ; AVX512-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <8 x i32>* -; AVX512-NEXT: [[WIDE_LOAD19:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !51 +; AVX512-NEXT: [[WIDE_LOAD19:%.*]] = load <8 x i32>, <8 x i32>* [[TMP19]], align 4, !alias.scope !58 ; AVX512-NEXT: [[REVERSE20:%.*]] = shufflevector <8 x i32> [[WIDE_LOAD19]], <8 x i32> undef, <8 x i32> ; AVX512-NEXT: [[TMP20:%.*]] = icmp sgt <8 x i32> [[REVERSE]], zeroinitializer ; AVX512-NEXT: [[TMP21:%.*]] = icmp sgt <8 x i32> [[REVERSE16]], zeroinitializer @@ -1827,25 +1969,25 @@ ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 -7 ; AVX512-NEXT: [[REVERSE21:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> undef, <8 x i32> ; AVX512-NEXT: [[TMP30:%.*]] = bitcast double* [[TMP29]] to <8 x double>* -; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE21]], <8 x double> undef), !alias.scope !54 +; AVX512-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP30]], i32 8, <8 x i1> [[REVERSE21]], <8 x double> undef), !alias.scope !61 ; AVX512-NEXT: [[REVERSE22:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP31:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -8 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds double, double* [[TMP31]], i32 -7 ; AVX512-NEXT: [[REVERSE23:%.*]] = shufflevector <8 x i1> [[TMP21]], <8 x i1> undef, <8 x i32> ; AVX512-NEXT: [[TMP33:%.*]] = bitcast double* [[TMP32]] to <8 x double>* -; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !54 +; AVX512-NEXT: [[WIDE_MASKED_LOAD24:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP33]], i32 8, <8 x i1> [[REVERSE23]], <8 x double> undef), !alias.scope !61 ; AVX512-NEXT: [[REVERSE25:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD24]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP34:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -16 ; AVX512-NEXT: [[TMP35:%.*]] = getelementptr inbounds double, double* [[TMP34]], i32 -7 ; AVX512-NEXT: [[REVERSE26:%.*]] = shufflevector <8 x i1> [[TMP22]], <8 x i1> undef, <8 x i32> ; AVX512-NEXT: [[TMP36:%.*]] = bitcast double* [[TMP35]] to <8 x double>* -; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !54 +; AVX512-NEXT: [[WIDE_MASKED_LOAD27:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP36]], i32 8, <8 x i1> [[REVERSE26]], <8 x double> undef), !alias.scope !61 ; AVX512-NEXT: [[REVERSE28:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD27]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP37:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 -24 ; AVX512-NEXT: [[TMP38:%.*]] = getelementptr inbounds double, double* [[TMP37]], i32 -7 ; AVX512-NEXT: [[REVERSE29:%.*]] = shufflevector <8 x i1> [[TMP23]], <8 x i1> undef, <8 x i32> ; AVX512-NEXT: [[TMP39:%.*]] = bitcast double* [[TMP38]] to <8 x double>* -; AVX512-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE29]], <8 x double> undef), !alias.scope !54 +; AVX512-NEXT: [[WIDE_MASKED_LOAD30:%.*]] = call <8 x double> @llvm.masked.load.v8f64.p0v8f64(<8 x double>* [[TMP39]], i32 8, <8 x i1> [[REVERSE29]], <8 x double> undef), !alias.scope !61 ; AVX512-NEXT: [[REVERSE31:%.*]] = shufflevector <8 x double> [[WIDE_MASKED_LOAD30]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP40:%.*]] = fadd <8 x double> [[REVERSE22]], ; AVX512-NEXT: [[TMP41:%.*]] = fadd <8 x double> [[REVERSE25]], @@ -1859,25 +2001,25 @@ ; AVX512-NEXT: [[TMP48:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 0 ; AVX512-NEXT: [[TMP49:%.*]] = getelementptr inbounds double, double* [[TMP48]], i32 -7 ; AVX512-NEXT: [[TMP50:%.*]] = bitcast double* [[TMP49]] to <8 x double>* -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE32]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE21]]), !alias.scope !56, !noalias !58 +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE32]], <8 x double>* [[TMP50]], i32 8, <8 x i1> [[REVERSE21]]), !alias.scope !63, !noalias !65 ; AVX512-NEXT: [[REVERSE34:%.*]] = shufflevector <8 x double> [[TMP41]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP51:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -8 ; AVX512-NEXT: [[TMP52:%.*]] = getelementptr inbounds double, double* [[TMP51]], i32 -7 ; AVX512-NEXT: [[TMP53:%.*]] = bitcast double* [[TMP52]] to <8 x double>* -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE34]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !56, !noalias !58 +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE34]], <8 x double>* [[TMP53]], i32 8, <8 x i1> [[REVERSE23]]), !alias.scope !63, !noalias !65 ; AVX512-NEXT: [[REVERSE36:%.*]] = shufflevector <8 x double> [[TMP42]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP54:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -16 ; AVX512-NEXT: [[TMP55:%.*]] = getelementptr inbounds double, double* [[TMP54]], i32 -7 ; AVX512-NEXT: [[TMP56:%.*]] = bitcast double* [[TMP55]] to <8 x double>* -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE36]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !56, !noalias !58 +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE36]], <8 x double>* [[TMP56]], i32 8, <8 x i1> [[REVERSE26]]), !alias.scope !63, !noalias !65 ; AVX512-NEXT: [[REVERSE38:%.*]] = shufflevector <8 x double> [[TMP43]], <8 x double> undef, <8 x i32> ; AVX512-NEXT: [[TMP57:%.*]] = getelementptr inbounds double, double* [[TMP44]], i32 -24 ; AVX512-NEXT: [[TMP58:%.*]] = getelementptr inbounds double, double* [[TMP57]], i32 -7 ; AVX512-NEXT: [[TMP59:%.*]] = bitcast double* [[TMP58]] to <8 x double>* -; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE38]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE29]]), !alias.scope !56, !noalias !58 +; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> [[REVERSE38]], <8 x double>* [[TMP59]], i32 8, <8 x i1> [[REVERSE29]]), !alias.scope !63, !noalias !65 ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP60:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4096 -; AVX512-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !59 +; AVX512-NEXT: br i1 [[TMP60]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !66 ; AVX512: middle.block: ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 4096, 4096 ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1900,7 +2042,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; AVX512-NEXT: [[CMP:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; AVX512-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !60 +; AVX512-NEXT: br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !67 ; AVX512: for.end: ; AVX512-NEXT: ret void ; @@ -2038,7 +2180,7 @@ ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !41 +; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !48 ; AVX1: middle.block: ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2064,7 +2206,7 @@ ; AVX1: for.inc: ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !42 +; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !49 ; AVX1: for.end.loopexit: ; AVX1-NEXT: br label [[FOR_END]] ; AVX1: for.end: @@ -2168,7 +2310,7 @@ ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51 +; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !58 ; AVX2: middle.block: ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2194,7 +2336,7 @@ ; AVX2: for.inc: ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52 +; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !59 ; AVX2: for.end.loopexit: ; AVX2-NEXT: br label [[FOR_END]] ; AVX2: for.end: @@ -2298,7 +2440,7 @@ ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> , <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !61 +; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !68 ; AVX512: middle.block: ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2324,7 +2466,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !62 +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !69 ; AVX512: for.end.loopexit: ; AVX512-NEXT: br label [[FOR_END]] ; AVX512: for.end: @@ -2473,7 +2615,7 @@ ; AVX1-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) ; AVX1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; AVX1-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !44 +; AVX1-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !51 ; AVX1: middle.block: ; AVX1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX1-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2499,7 +2641,7 @@ ; AVX1: for.inc: ; AVX1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !45 +; AVX1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !52 ; AVX1: for.end.loopexit: ; AVX1-NEXT: br label [[FOR_END]] ; AVX1: for.end: @@ -2603,7 +2745,7 @@ ; AVX2-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> , <4 x double>* [[TMP63]], i32 8, <4 x i1> [[TMP55]]) ; AVX2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 ; AVX2-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !54 +; AVX2-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !61 ; AVX2: middle.block: ; AVX2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2629,7 +2771,7 @@ ; AVX2: for.inc: ; AVX2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !55 +; AVX2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !62 ; AVX2: for.end.loopexit: ; AVX2-NEXT: br label [[FOR_END]] ; AVX2: for.end: @@ -2733,7 +2875,7 @@ ; AVX512-NEXT: call void @llvm.masked.store.v8f64.p0v8f64(<8 x double> , <8 x double>* [[TMP63]], i32 8, <8 x i1> [[TMP55]]) ; AVX512-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 32 ; AVX512-NEXT: [[TMP64:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !64 +; AVX512-NEXT: br i1 [[TMP64]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !71 ; AVX512: middle.block: ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] @@ -2759,7 +2901,7 @@ ; AVX512: for.inc: ; AVX512-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; AVX512-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !65 +; AVX512-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !72 ; AVX512: for.end.loopexit: ; AVX512-NEXT: br label [[FOR_END]] ; AVX512: for.end: Index: test/Transforms/LoopVectorize/if-conversion.ll =================================================================== --- test/Transforms/LoopVectorize/if-conversion.ll +++ test/Transforms/LoopVectorize/if-conversion.ll @@ -108,12 +108,12 @@ @a = common global [1 x i32*] zeroinitializer, align 8 @c = common global i32* null, align 8 -; We use to if convert this loop. This is not safe because there is a trapping -; constant expression. +; Constant expressions never trap; check that we perform the transform +; consistently. ; PR16729 ; CHECK-LABEL: trapping_constant_expression -; CHECK-NOT: or <4 x i32> +; CHECK: or <4 x i32> define i32 @trapping_constant_expression() { entry: @@ -122,13 +122,13 @@ for.body: %inc3 = phi i32 [ 0, %entry ], [ %inc, %cond.end ] %or2 = phi i32 [ 0, %entry ], [ %or, %cond.end ] - br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c), label %cond.false, label %cond.end + br i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** getelementptr inbounds (i32*, i32** @c, i64 1)), label %cond.false, label %cond.end cond.false: br label %cond.end cond.end: - %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** @c) to i32)), %cond.false ], [ 0, %for.body ] + %cond = phi i32 [ sdiv (i32 1, i32 zext (i1 icmp eq (i32** getelementptr inbounds ([1 x i32*], [1 x i32*]* @a, i64 0, i64 0), i32** getelementptr inbounds (i32*, i32** @c, i64 1)) to i32)), %cond.false ], [ 0, %for.body ] %or = or i32 %or2, %cond %inc = add nsw i32 %inc3, 1 %cmp = icmp slt i32 %inc, 128 @@ -138,12 +138,11 @@ ret i32 %or } -; Neither should we if-convert if there is an instruction operand that is a -; trapping constant expression. +; Constant expressions never trap; check that we perform the transform consistently. ; PR16729 ; CHECK-LABEL: trapping_constant_expression2 -; CHECK-NOT: or <4 x i32> +; CHECK: or <4 x i32> define i32 @trapping_constant_expression2() { entry: Index: test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll =================================================================== --- test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll +++ test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll @@ -1,29 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; PR957 ; RUN: opt < %s -simplifycfg -S | FileCheck %s -; CHECK-NOT: select - @G = extern_weak global i32 define i32 @test(i32 %tmp) { +; CHECK-LABEL: @test( +; CHECK-NEXT: cond_false179: +; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i32 [[TMP:%.*]], 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP181]], i32 udiv (i32 1, i32 ptrtoint (i32* @G to i32)), i32 [[TMP]] +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; cond_false179: - %tmp181 = icmp eq i32 %tmp, 0 ; [#uses=1] - br i1 %tmp181, label %cond_true182, label %cond_next185 -cond_true182: ; preds = %cond_false179 - br label %cond_next185 -cond_next185: ; preds = %cond_true182, %cond_false179 - %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ] ; [#uses=1] - ret i32 %d0.3 + %tmp181 = icmp eq i32 %tmp, 0 + br i1 %tmp181, label %cond_true182, label %cond_next185 +cond_true182: + br label %cond_next185 +cond_next185: + %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ] + ret i32 %d0.3 } define i32 @test2(i32 %tmp) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: cond_false179: +; CHECK-NEXT: [[TMP181:%.*]] = icmp eq i32 [[TMP:%.*]], 0 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TMP181]], i32 udiv (i32 1, i32 ptrtoint (i32* @G to i32)), i32 [[TMP]] +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @test(i32 4) +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; cond_false179: - %tmp181 = icmp eq i32 %tmp, 0 ; [#uses=1] - br i1 %tmp181, label %cond_true182, label %cond_next185 + %tmp181 = icmp eq i32 %tmp, 0 + br i1 %tmp181, label %cond_true182, label %cond_next185 cond_true182: ; preds = %cond_false179 - br label %cond_next185 -cond_next185: ; preds = %cond_true182, %cond_false179 - %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ] ; [#uses=1] - call i32 @test( i32 4 ) ; :0 [#uses=0] - ret i32 %d0.3 + br label %cond_next185 +cond_next185: + %d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ] + call i32 @test( i32 4 ) + ret i32 %d0.3 } Index: test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll =================================================================== --- test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll +++ test/Transforms/SimplifyCFG/ConditionalTrappingConstantExpr.ll @@ -4,18 +4,15 @@ @G = extern_weak global i32 ; PR3354 -; Do not merge bb1 into the entry block, it might trap. +; Constant expressions never trap; check that we perform the transform consistently. define i32 @admiral(i32 %a, i32 %b) { ; CHECK-LABEL: @admiral( -; CHECK-NEXT: [[C:%.*]] = icmp sle i32 %a, %b -; CHECK-NEXT: br i1 [[C]], label %bb2, label %bb1 -; CHECK: bb1: +; CHECK-NEXT: bb2: +; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[D:%.*]] = icmp sgt i32 sdiv (i32 -32768, i32 ptrtoint (i32* @G to i32)), 0 -; CHECK-NEXT: [[DOT:%.*]] = select i1 [[D]], i32 927, i32 42 -; CHECK-NEXT: br label %bb2 -; CHECK: bb2: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 42, %0 ], [ [[DOT]], %bb1 ] +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[C]], [[D]] +; CHECK-NEXT: [[MERGE:%.*]] = select i1 [[OR_COND]], i32 927, i32 42 ; CHECK-NEXT: ret i32 [[MERGE]] ; %c = icmp sle i32 %a, %b @@ -31,12 +28,9 @@ define i32 @ackbar(i1 %c) { ; CHECK-LABEL: @ackbar( -; CHECK-NEXT: br i1 %c, label %bb5, label %bb6 -; CHECK: bb5: -; CHECK-NEXT: [[DOT:%.*]] = select i1 icmp sgt (i32 sdiv (i32 32767, i32 ptrtoint (i32* @G to i32)), i32 0), i32 42, i32 927 -; CHECK-NEXT: br label %bb6 -; CHECK: bb6: -; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 42, %0 ], [ [[DOT]], %bb5 ] +; CHECK-NEXT: bb6: +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 icmp sgt (i32 sdiv (i32 32767, i32 ptrtoint (i32* @G to i32)), i32 0), i32 42, i32 927 +; CHECK-NEXT: [[MERGE:%.*]] = select i1 [[C:%.*]], i32 [[SPEC_SELECT]], i32 42 ; CHECK-NEXT: ret i32 [[MERGE]] ; br i1 %c, label %bb5, label %bb6 @@ -53,8 +47,8 @@ define i32 @tarp(i1 %c) { ; CHECK-LABEL: @tarp( ; CHECK-NEXT: bb9: -; CHECK-NEXT: [[DOT:%.*]] = select i1 fcmp oeq (float fdiv (float 3.000000e+00, float sitofp (i32 ptrtoint (i32* @G to i32) to float)), float 1.000000e+00), i32 42, i32 927 -; CHECK-NEXT: [[MERGE:%.*]] = select i1 %c, i32 [[DOT]], i32 42 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 fcmp oeq (float fdiv (float 3.000000e+00, float sitofp (i32 ptrtoint (i32* @G to i32) to float)), float 1.000000e+00), i32 42, i32 927 +; CHECK-NEXT: [[MERGE:%.*]] = select i1 [[C:%.*]], i32 [[SPEC_SELECT]], i32 42 ; CHECK-NEXT: ret i32 [[MERGE]] ; br i1 %c, label %bb8, label %bb9 Index: test/Transforms/SimplifyCFG/PR16069.ll =================================================================== --- test/Transforms/SimplifyCFG/PR16069.ll +++ test/Transforms/SimplifyCFG/PR16069.ll @@ -5,7 +5,7 @@ define i32 @foo(i1 %y) { ; CHECK-LABEL: @foo( -; CHECK: [[COND_I:%.*]] = phi i32 [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2 ], [ 0, %0 ] +; CHECK: [[COND_I:%.*]] = select i1 %y, i32 0, i32 srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)) ; CHECK-NEXT: ret i32 [[COND_I]] ; br i1 %y, label %bb1, label %bb2 @@ -20,7 +20,7 @@ define i32 @foo2(i1 %x) { ; CHECK-LABEL: @foo2( -; CHECK: [[COND:%.*]] = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ] +; CHECK: [[COND:%.*]] = select i1 %x, i32 0, i32 srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)) ; CHECK-NEXT: ret i32 [[COND]] ; bb0: Index: test/Transforms/SimplifyCFG/PR17073.ll =================================================================== --- test/Transforms/SimplifyCFG/PR17073.ll +++ test/Transforms/SimplifyCFG/PR17073.ll @@ -1,11 +1,6 @@ ; RUN: opt < %s -simplifycfg -S | FileCheck %s -; In PR17073 ( http://llvm.org/pr17073 ), we illegally hoisted an operation that can trap. -; The first test confirms that we don't do that when the trapping op is reached by the current BB (block1). -; The second test confirms that we don't do that when the trapping op is reached by the previous BB (entry). -; The third test confirms that we can still do this optimization for an operation (add) that doesn't trap. -; The tests must be complicated enough to prevent previous SimplifyCFG actions from optimizing away -; the instructions that we're checking for. +; Constant expressions never trap; check that we perform the transform consistently. target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" target triple = "i386-apple-macosx10.9.0" @@ -14,8 +9,7 @@ @b = common global i8 0, align 1 ; CHECK-LABEL: can_trap1 -; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) -; CHECK-NOT: select i1 %tobool, i32* null, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a) +; CHECK: select i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a), i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null define i32* @can_trap1() { entry: %0 = load i32, i32* @a, align 4 @@ -34,8 +28,7 @@ } ; CHECK-LABEL: can_trap2 -; CHECK-NOT: or i1 %tobool, icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) -; CHECK-NOT: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null +; CHECK: select i1 %tobool, i32* select (i1 icmp eq (i64 urem (i64 2, i64 zext (i1 icmp eq (i32* bitcast (i8* @b to i32*), i32* @a) to i64)), i64 0), i32* null, i32* @a), i32* null define i32* @can_trap2() { entry: %0 = load i32, i32* @a, align 4 Index: unittests/IR/ConstantsTest.cpp =================================================================== --- unittests/IR/ConstantsTest.cpp +++ unittests/IR/ConstantsTest.cpp @@ -241,11 +241,23 @@ CHECK(ConstantExpr::getFSub(P1, P1), "fsub float " P1STR ", " P1STR); CHECK(ConstantExpr::getMul(P0, P0), "mul i32 " P0STR ", " P0STR); CHECK(ConstantExpr::getFMul(P1, P1), "fmul float " P1STR ", " P1STR); - CHECK(ConstantExpr::getUDiv(P0, P0), "udiv i32 " P0STR ", " P0STR); - CHECK(ConstantExpr::getSDiv(P0, P0), "sdiv i32 " P0STR ", " P0STR); + CHECK(ConstantExpr::getUDiv(P0, P0), + "udiv i32 " P0STR ", select (i1 icmp eq (i32 " P0STR + ", i32 0), i32 1, i32 " P0STR ")"); + CHECK(ConstantExpr::getSDiv(P4, P0), + "sdiv i32 " P4STR + ", select (i1 or (i1 and (i1 icmp eq (i32 " P0STR + ", i32 -1), i1 icmp eq (i32 " P4STR + ", i32 -2147483648)), i1 icmp eq (i32 " P0STR ", i32 0)), i32 1, i32 " + P0STR ")"); CHECK(ConstantExpr::getFDiv(P1, P1), "fdiv float " P1STR ", " P1STR); - CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", " P0STR); - CHECK(ConstantExpr::getSRem(P0, P0), "srem i32 " P0STR ", " P0STR); + CHECK(ConstantExpr::getURem(P0, P0), "urem i32 " P0STR ", select (i1 icmp eq (i32 " P0STR ", i32 0), i32 1, i32 " P0STR ")"); + CHECK(ConstantExpr::getSRem(P4, P0), + "srem i32 " P4STR + ", select (i1 or (i1 and (i1 icmp eq (i32 " P0STR + ", i32 -1), i1 icmp eq (i32 " P4STR + ", i32 -2147483648)), i1 icmp eq (i32 " P0STR ", i32 0)), i32 1, i32 " + P0STR ")"); CHECK(ConstantExpr::getFRem(P1, P1), "frem float " P1STR ", " P1STR); CHECK(ConstantExpr::getAnd(P0, P0), "and i32 " P0STR ", " P0STR); CHECK(ConstantExpr::getOr(P0, P0), "or i32 " P0STR ", " P0STR); @@ -266,7 +278,9 @@ CHECK(ConstantExpr::getFPExtend(P1, DoubleTy), "fpext float " P1STR " to double"); - CHECK(ConstantExpr::getExactUDiv(P0, P0), "udiv exact i32 " P0STR ", " P0STR); + CHECK(ConstantExpr::getExactUDiv(P0, P0), + "udiv exact i32 " P0STR ", select (i1 icmp eq (i32 " P0STR + ", i32 0), i32 1, i32 " P0STR ")"); CHECK(ConstantExpr::getSelect(P3, P0, P4), "select i1 " P3STR ", i32 " P0STR ", i32 " P4STR);