Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -204,6 +204,11 @@ cl::desc("Max coefficients in AddRec during evolving"), cl::init(16)); +static cl::opt + MaxConstExprSize("scalar-evolution-max-const-expr-size", cl::Hidden, + cl::desc("Max number of nodes of const expression"), + cl::init(2048)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -4730,6 +4735,7 @@ if (Shifted != getCouldNotCompute() && Start != getCouldNotCompute()) { const SCEV *StartVal = getSCEV(StartValueV); + if (Start == StartVal) { // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -6013,6 +6019,12 @@ } return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); + case Instruction::IntToPtr: + case Instruction::PtrToInt: + if (getTypeSizeInBits(U->getType()) != + getTypeSizeInBits(U->getOperand(0)->getType())) + break; + // Otherwise fall through case Instruction::BitCast: // BitCasts are no-op casts so we just eliminate the cast. if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) @@ -7372,6 +7384,18 @@ return IncomingVal; } +static size_t GetConstExprSize(Constant *C) { + ConstantExpr *CE = dyn_cast(C); + if (!CE) + return 1; + size_t S = 1; + for (User::const_op_iterator OI = CE->op_begin(); OI != CE->op_end(); ++OI) { + if (Constant *CV = dyn_cast(*OI)) + S += GetConstExprSize(CV); + } + return S; +} + /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is /// in the header of its containing loop, we know the loop executes a /// constant number of times, and the PHI node is just a recurrence @@ -7427,6 +7451,10 @@ EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); if (!NextPHI) return nullptr; // Couldn't evaluate! + + if (GetConstExprSize(NextPHI) > MaxConstExprSize) + return nullptr; + NextIterVals[PN] = NextPHI; bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; @@ -10425,6 +10453,7 @@ continue; if (PHINode *PN = dyn_cast(U)) SE->ConstantEvolutionLoopExitValue.erase(PN); + SE->eraseValueFromMap(U); Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); } Index: lib/Analysis/ScalarEvolutionExpander.cpp =================================================================== --- lib/Analysis/ScalarEvolutionExpander.cpp +++ lib/Analysis/ScalarEvolutionExpander.cpp @@ -857,8 +857,17 @@ /// the PHI. If so, it may be reused by expanded expressions. bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L) { + auto IsBitCast = [this](Instruction *I) { + if (isa(I)) + return true; + if (isa(I) || isa(I)) + return SE.getTypeSizeInBits(I->getType()) == + SE.getTypeSizeInBits(I->getOperand(0)->getType()); + return false; + }; + if (IncV->getNumOperands() == 0 || isa(IncV) || - (isa(IncV) && !isa(IncV))) + (isa(IncV) && !IsBitCast(IncV))) return false; // If any of the operands don't dominate the insert position, bail. // Addrec operands are always loop-invariant, so this can only happen @@ -910,6 +919,12 @@ return dyn_cast(IncV->getOperand(0)); return nullptr; } + case Instruction::IntToPtr: + case Instruction::PtrToInt: + if (SE.getTypeSizeInBits(IncV->getType()) != + SE.getTypeSizeInBits(IncV->getOperand(0)->getType())) + return nullptr; + // fall through: case Instruction::BitCast: return dyn_cast(IncV->getOperand(0)); case Instruction::GetElementPtr: @@ -1495,7 +1510,7 @@ // actually a pointer type. if (!isa(Base) && !isa(Base)) { Value *StartV = expand(Base); - assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); + StartV = InsertNoopCastOfTo(StartV, PTy); return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); } } @@ -1680,6 +1695,16 @@ SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { SetVector *Set = SE.getSCEVValues(S); + auto IsCompatibleTy = [this](Type *Type1, Type *Type2) { + if (Type1 == Type2) + return true; + + if ((!Type1->isPointerTy() && !Type1->isIntegerTy()) || + (!Type2->isPointerTy() && !Type2->isIntegerTy())) + return false; + + return SE.getTypeSizeInBits(Type1) == SE.getTypeSizeInBits(Type2); + }; // If the expansion is not in CanonicalMode, and the SCEV contains any // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. if (CanonicalMode || !SE.containsAddRecurrence(S)) { @@ -1693,7 +1718,7 @@ ConstantInt *Offset = VOPair.second; Instruction *EntInst = nullptr; if (V && isa(V) && (EntInst = cast(V)) && - S->getType() == V->getType() && + IsCompatibleTy(S->getType(), V->getType()) && EntInst->getFunction() == InsertPt->getFunction() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || Index: lib/Transforms/Scalar/AlignmentFromAssumptions.cpp =================================================================== --- lib/Transforms/Scalar/AlignmentFromAssumptions.cpp +++ lib/Transforms/Scalar/AlignmentFromAssumptions.cpp @@ -268,12 +268,14 @@ // Try to find the ptrtoint; subtract it and the rest is the offset. for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(), JE = AndLHSAddSCEV->op_end(); J != JE; ++J) - if (const SCEVUnknown *OpUnk = dyn_cast(*J)) - if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) { + if (const SCEVUnknown *OpUnk = dyn_cast(*J)) { + OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J); + if (PtrToIntInst *PToI = dyn_cast(OpUnk->getValue())) AAPtr = PToI->getPointerOperand(); - OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J); - break; - } + else + AAPtr = OpUnk->getValue(); + break; + } } if (!AAPtr) Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3535,6 +3535,41 @@ // value. for (BasicBlock *BB : LoopBypassBlocks) BCResumeVal->addIncoming(II.getStartValue(), BB); + + // FIXME: Strictly speaking, when an incoming operand is changed + // here, we should erase the cached SCEV expressions for the + // PHI and its related values. However as the new start value is + // merged from multiple predecessors, Scalar Evolution may + // no longer be able to rediscover the phi is an AddRecExprs. For + // instance, + // int *p = base; + // int i = 0; + // do { + // p1 = phi(base, p2); + // i1 = phi(0, i2); + // .. = *p2; + // i2 = i1 + 1; + // p2 = base + i2; + // .. + // while (...); + // + // Before the loop vetorization, scalar evolution can discover that + // i2 is {0, +, 1}, and p2 is derived from i2 which is {4 + base, +, 4}. + // It also discovers that p1 is {base, +, 4} which is shifted from + // from p2. When the loop vectorizer creates the vector loop, the scalar + // loop's header phi's start value will become a merged merge of the + // original start and vector loop exit value. In this case, the start + // value of p1 becomes bc_resume_val1, and i1 has start of bc_resume_val2. + // Scalar evolution can no longer establish that p1 is a shifted value + // of {4 + 4*bc_resume_val2, +, 4}. + // + // Since the scalar evolution expander can sometimes erase cached SCEV vals + // for some of the phis in the scalar loop, we do a recomputation cache + // it here before the phi is updated (Doing on-demand recomputation after + // the update will produce inconsistent SCEVs with other PHIs with cached + // SCEVs from the original scalar loop. + // + PSE.getSCEV(OrigPhi); OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); } Index: test/Analysis/ScalarEvolution/timeout_constexpr.ll =================================================================== --- test/Analysis/ScalarEvolution/timeout_constexpr.ll +++ test/Analysis/ScalarEvolution/timeout_constexpr.ll @@ -0,0 +1,33 @@ +; CHECK the test won't timeout (will happen when -scalar-evolution-max-const-expr-size= is set to +; large value +; RUN: opt < %s -indvars +%struct.ST = type { %struct.ST* } + +@global = internal global [121 x i8] zeroinitializer, align 1 + +define void @func() { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %0 = phi %struct.ST* [ %2, %for.body ], [ bitcast ([121 x i8]* @global to %struct.ST*), %entry ] + %inc1 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %cmp = icmp slt i32 %inc1, 30 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %add.ptr1 = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 1 + %1 = ptrtoint %struct.ST* %add.ptr1 to i32 + %rem = and i32 %1, 1 + %add = add i32 %rem, %1 + %2 = inttoptr i32 %add to %struct.ST* + %next = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 0, i32 0 + store %struct.ST* %2, %struct.ST** %next, align 4 + %inc = add nsw i32 %inc1, 1 + br label %for.cond + +for.end: ; preds = %for.cond + %next6 = getelementptr inbounds %struct.ST, %struct.ST* %0, i32 0, i32 0 + store %struct.ST* null, %struct.ST** %next6, align 4 + ret void +} Index: test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll =================================================================== --- test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll +++ test/CodeGen/AArch64/arm64-2011-10-18-LdStOptBug.ll @@ -12,7 +12,7 @@ for.body: ; CHECK: for.body -; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}, x{{[0-9]+}}] +; CHECK: ldr w{{[0-9]+}}, [x{{[0-9]+}}] ; CHECK: add x[[REG:[0-9]+]], ; CHECK: x[[REG]], #1, lsl #12 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] Index: test/CodeGen/PowerPC/unal-altivec.ll =================================================================== --- test/CodeGen/PowerPC/unal-altivec.ll +++ test/CodeGen/PowerPC/unal-altivec.ll @@ -30,7 +30,7 @@ ; CHECK: @foo ; CHECK-DAG: li [[C0:[0-9]+]], 0 -; CHECK-DAG: li [[C15:[0-9]+]], 15 +; CHECK-DAG: li [[C15:[0-9]+]], 16 ; CHECK-DAG: lvx [[CNST:[0-9]+]], ; CHECK: .LBB0_1: ; CHECK-DAG: lvsl [[MASK1:[0-9]+]], [[B1:[0-9]+]], [[C0]] Index: test/Other/constant-fold-gep.ll =================================================================== --- test/Other/constant-fold-gep.ll +++ test/Other/constant-fold-gep.ll @@ -312,31 +312,31 @@ ; TO: } ; SCEV: Classifying expressions for: @fa ; SCEV: %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 2310) to i64 -; SCEV: --> (2310 * sizeof(double)) +; SCEV: --> 18480 U: [18480,18481) S: [18480,18481) ; SCEV: Classifying expressions for: @fb ; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(double) +; SCEV: --> 8 U: [8,9) S: [8,9) ; SCEV: Classifying expressions for: @fc ; SCEV: %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 2) to i64 -; SCEV: --> (2 * sizeof(double)) +; SCEV: --> 16 U: [16,17) S: [16,17) ; SCEV: Classifying expressions for: @fd ; SCEV: %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double, double* null, i32 1) to i64), i64 11) to i64 -; SCEV: --> (11 * sizeof(double)) +; SCEV: --> 88 U: [88,89) S: [88,89) ; SCEV: Classifying expressions for: @fe ; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }, { double, float, double, double }* null, i64 0, i32 2) to i64) to i64 -; SCEV: --> offsetof({ double, float, double, double }, 2) +; SCEV: --> 16 U: [16,17) S: [16,17) ; SCEV: Classifying expressions for: @ff ; SCEV: %t = bitcast i64 1 to i64 ; SCEV: --> 1 ; SCEV: Classifying expressions for: @fg ; SCEV: %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }, { i1, double }* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(double) +; SCEV: --> 8 U: [8,9) S: [8,9) ; SCEV: Classifying expressions for: @fh ; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr (i1*, i1** null, i32 1) to i64) to i64 -; SCEV: --> sizeof(i1*) +; SCEV: --> 8 U: [8,9) S: [8,9) ; SCEV: Classifying expressions for: @fi ; SCEV: %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }, { i1, i1* }* null, i64 0, i32 1) to i64) to i64 -; SCEV: --> alignof(i1*) +; SCEV: --> 8 U: [8,9) S: [8,9) define i64 @fa() nounwind { %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}, {[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64 Index: test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll =================================================================== --- test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +++ test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-reduce -S < %s | FileCheck %s +; RUN: opt -loop-reduce -dce -S < %s | FileCheck %s ; ; LTO of clang, which mistakenly uses no TargetLoweringInfo, causes a ; miscompile. ReuseOrCreateCast replace ptrtoint operand with undef. @@ -14,8 +14,9 @@ ; current LSR cost model. ; CHECK-NOT: = ptrtoint i8* undef to i64 ; CHECK: .lr.ph -; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 -; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} +; CHECK: [[TMP:%[^ ]+]] = sub i64 -1, %tmp5 +; CHECK: getelementptr i8, i8* %tmp3, i64 [[TMP]] +; CHECK-NOT: getelementptr ; CHECK: ret void define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { bb: Index: test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll =================================================================== --- test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll +++ test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll @@ -7,11 +7,10 @@ ; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast ; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1 ; CHECK: for.body.lr.ph: -; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1 ; CHECK: br label %for.body ; CHECK: for.body: -; CHECK: %lsr.iv2 = phi i64 [ %lsr.iv.next, %for.body ], [ [[r3]], %for.body.lr.ph ] -; CHECK: %lsr.iv.next = add i64 %lsr.iv2, -2 +; CHECK: %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 0, %for.body.lr.ph ] +; CHECK: %lsr.iv.next = add i64 %lsr.iv, ; CHECK: %lsr.iv.next3 = inttoptr i64 %lsr.iv.next to i16* ; CHECK: %cmp27 = icmp eq i16* %lsr.iv.next3, null Index: test/Transforms/LoopVectorize/header_phi1.ll =================================================================== --- test/Transforms/LoopVectorize/header_phi1.ll +++ test/Transforms/LoopVectorize/header_phi1.ll @@ -0,0 +1,31 @@ +;RUN: opt < %s -loop-vectorize -S | FileCheck %s + +;CHECK-LABEL: @foo +;CHECK: vector.body: +;CHECK: store <4 x i32> +;CHECK: load <4 x i32> + +target triple = "x86_64-grtev4-linux-gnu" + +define void @foo() { +entry: + br label %loop + +loop: + %t1 = phi i32* [ %t3, %loop ], [ null, %entry ] + %t2 = phi i32* [ %t5, %loop ], [ undef, %entry ] + %t3 = getelementptr inbounds i32, i32* %t1, i64 1 + store i32 0, i32* %t1, align 4 + %t4 = load i32, i32* %t3, align 4 + %t5 = getelementptr inbounds i32, i32* %t2, i64 1 + %t6 = icmp ugt i32* undef, %t5 + br i1 %t6, label %loop, label %exit + +exit: + ret void + +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 6.0.0 (trunk 311306) (llvm/trunk 311373)"} Index: test/Transforms/LoopVectorize/header_phi2.ll =================================================================== --- test/Transforms/LoopVectorize/header_phi2.ll +++ test/Transforms/LoopVectorize/header_phi2.ll @@ -0,0 +1,68 @@ +;RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +define void @foo() { +;CHECK-LABEL: @foo +;CHECK: vector.body: +;CHECK: store <4 x i32> +bb: + br label %bb56 + +bb56: ; preds = %bb100, %bb + %tmp = phi i32* [ undef, %bb ], [ %tmp98, %bb100 ] + br label %bb67 + +bb67: ; preds = %bb56 + br i1 undef, label %bb68, label %bb97 + +bb68: ; preds = %bb67 + br label %bb74 + +bb73: ; preds = %bb92 + unreachable + +bb74: ; preds = %bb68 + br label %bb75 + +bb75: ; preds = %bb74 + br i1 undef, label %bb76, label %bb77 + +bb76: ; preds = %bb75 + br label %bb97 + +bb77: ; preds = %bb75 + br label %bb83 + +bb83: ; preds = %bb92, %bb77 + %tmp84 = phi i32* [ %tmp93, %bb92 ], [ undef, %bb77 ] + br label %bb85 + +bb85: ; preds = %bb85, %bb83 + %tmp86 = phi i32* [ %tmp88, %bb85 ], [ %tmp84, %bb83 ] + %tmp87 = phi i32* [ %tmp90, %bb85 ], [ %tmp, %bb83 ] + %tmp88 = getelementptr inbounds i32, i32* %tmp86, i64 1 + store i32 undef, i32* %tmp86, align 4 + %tmp89 = load i32, i32* %tmp88, align 4 + %tmp90 = getelementptr inbounds i32, i32* %tmp87, i64 1 + store i32 %tmp89, i32* %tmp87, align 4 + %tmp91 = icmp ugt i32* undef, %tmp90 + br i1 %tmp91, label %bb85, label %bb92 + +bb92: ; preds = %bb85 + %tmp93 = getelementptr inbounds i32, i32* undef, i64 2 + %tmp94 = icmp sgt i32 undef, undef + br i1 %tmp94, label %bb83, label %bb73 + +bb97: ; preds = %bb76, %bb67 + %tmp98 = phi i32* [ undef, %bb76 ], [ %tmp, %bb67 ] + br label %bb100 + +bb100: ; preds = %bb97 + br i1 undef, label %bb101, label %bb56 + +bb101: ; preds = %bb100 + unreachable +} + Index: test/Transforms/LoopVectorize/header_phi3.ll =================================================================== --- test/Transforms/LoopVectorize/header_phi3.ll +++ test/Transforms/LoopVectorize/header_phi3.ll @@ -0,0 +1,73 @@ +;RUN: opt < %s -loop-vectorize -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +define void @foo() { +; CHECK-LABEL: @foo +; CHECK: vector.body: +bb: + br i1 undef, label %bb1, label %bb9 + +bb1: ; preds = %bb8, %bb + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %tmp = phi i16* [ undef, %bb1 ], [ %tmp6, %bb2 ] + %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp5, %bb2 ] + %tmp4 = load i16, i16* %tmp, align 2 + %tmp5 = add nuw nsw i64 %tmp3, 1 + %tmp6 = getelementptr inbounds i16, i16* undef, i64 %tmp5 + %tmp7 = icmp eq i64 %tmp5, 65535 + br i1 %tmp7, label %bb8, label %bb2 + +bb8: ; preds = %bb2 + br i1 undef, label %bb1, label %bb9 + +bb9: ; preds = %bb8, %bb + br i1 undef, label %bb10, label %bb12 + +bb10: ; preds = %bb10, %bb9 + br i1 false, label %bb10, label %bb11 + +bb11: ; preds = %bb10 + br label %bb12 + +bb12: ; preds = %bb11, %bb9 + ret void +} + +define void @foo2(i16 *%t1) { +; CHECK-LABEL: @foo2 +; CHECK: vector.body: +bb: + br i1 undef, label %bb1, label %bb9 + +bb1: ; preds = %bb8, %bb + br label %bb2 + +bb2: ; preds = %bb2, %bb1 + %tmp = phi i16* [ %t1, %bb1 ], [ %tmp6, %bb2 ] + %tmp3 = phi i64 [ 0, %bb1 ], [ %tmp5, %bb2 ] + %tmp4 = load i16, i16* %tmp, align 2 + %tmp5 = add nuw nsw i64 %tmp3, 1 + %tmp6 = getelementptr inbounds i16, i16* %t1, i64 %tmp5 + %tmp7 = icmp eq i64 %tmp5, 65535 + br i1 %tmp7, label %bb8, label %bb2 + +bb8: ; preds = %bb2 + br i1 undef, label %bb1, label %bb9 + +bb9: ; preds = %bb8, %bb + br i1 undef, label %bb10, label %bb12 + +bb10: ; preds = %bb10, %bb9 + br i1 false, label %bb10, label %bb11 + +bb11: ; preds = %bb10 + br label %bb12 + +bb12: ; preds = %bb11, %bb9 + ret void +} + + Index: test/Transforms/LoopVectorize/intptr1.ll =================================================================== --- test/Transforms/LoopVectorize/intptr1.ll +++ test/Transforms/LoopVectorize/intptr1.ll @@ -0,0 +1,37 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @test +;CHECK: vector.body: +;CHECK: load <4 x float> +;CHECK: fmul <4 x float> +;CHECK: store <4 x float> + + +@.str = private unnamed_addr constant [4 x i8] c"%f\0A\00", align 1 + +define void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr { +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add, %for.body ], [ %b, %for.body.preheader ] + %tmp = inttoptr i64 %b.addr.02 to float* + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = add nsw i64 %b.addr.02, 4 + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} Index: test/Transforms/LoopVectorize/intptr2.ll =================================================================== --- test/Transforms/LoopVectorize/intptr2.ll +++ test/Transforms/LoopVectorize/intptr2.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @test +;CHECK: vector.body: +;CHECK: load <4 x float> +;CHECK: fmul <4 x float> +;CHECK: store <4 x float> + +define void @test(float* %a, float* readnone %a_end, i64 %b) { +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.02 = phi i64 [ %add.int, %for.body ], [ %b, %for.body.preheader ] + %tmp = inttoptr i64 %b.addr.02 to float* + %tmp1 = load float, float* %tmp, align 4 + %mul.i = fmul float %tmp1, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %add = getelementptr inbounds float, float* %tmp, i64 1 + %add.int = ptrtoint float* %add to i64 + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} Index: test/Transforms/LoopVectorize/intptr3.ll =================================================================== --- test/Transforms/LoopVectorize/intptr3.ll +++ test/Transforms/LoopVectorize/intptr3.ll @@ -0,0 +1,41 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @test +;CHECK: vector.body: +;CHECK: load <4 x float> +;CHECK: fmul <4 x float> +;CHECK: store <4 x float> + +; Function Attrs: noinline norecurse nounwind uwtable +define void @test(float* %a, float* readnone %a_end, i64 %b) unnamed_addr { +entry: + %cmp1 = icmp ult float* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.float = inttoptr i64 %b to float* + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %a.addr.03 = phi float* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.float = phi float* [ %b.addr.float.inc, %for.body ], [ %b.float, %for.body.preheader ] + %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ] + %l = load float, float* %b.addr.float, align 4 + %mul.i = fmul float %l, 4.200000e+01 + store float %mul.i, float* %a.addr.03, align 4 + %b.addr.float.2 = inttoptr i64 %b.addr.i64 to float* + %b.addr.float.inc = getelementptr inbounds float, float* %b.addr.float.2, i64 1 + %b.addr.i64.inc = ptrtoint float* %b.addr.float.inc to i64 + %incdec.ptr = getelementptr inbounds float, float* %a.addr.03, i64 1 + %cmp = icmp ult float* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + Index: test/Transforms/LoopVectorize/intptr4.ll =================================================================== --- test/Transforms/LoopVectorize/intptr4.ll +++ test/Transforms/LoopVectorize/intptr4.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +;CHECK-LABEL: @test +;CHECK: vector.body: +;CHECK: load <8 x i8> +;CHECK: mul <8 x i8> +;CHECK: store <8 x i8> + + +; Function Attrs: noinline norecurse nounwind uwtable +define void @test(i8* %a, i8* readnone %a_end, i64 %b) unnamed_addr { +entry: + %cmp1 = icmp ult i8* %a, %a_end + br i1 %cmp1, label %for.body.preheader, label %for.end + +for.body.preheader: ; preds = %entry + %b.i8 = inttoptr i64 %b to i8* + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %a.addr.03 = phi i8* [ %incdec.ptr, %for.body ], [ %a, %for.body.preheader ] + %b.addr.i8 = phi i8* [ %b.addr.i8.inc, %for.body ], [ %b.i8, %for.body.preheader ] + %b.addr.i64 = phi i64 [ %b.addr.i64.inc, %for.body ], [ %b, %for.body.preheader ] + %l = load i8, i8* %b.addr.i8, align 4 + %mul.i = mul i8 %l, 4 + store i8 %mul.i, i8* %a.addr.03, align 4 + %b.addr.i8.2 = inttoptr i64 %b.addr.i64 to i8* + %b.addr.i8.inc = getelementptr inbounds i8, i8* %b.addr.i8.2, i64 1 + %b.addr.i64.inc = ptrtoint i8* %b.addr.i8.inc to i64 + %incdec.ptr = getelementptr inbounds i8, i8* %a.addr.03, i64 1 + %cmp = icmp ult i8* %incdec.ptr, %a_end + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + + + Index: test/Transforms/LoopVectorize/intptr5.ll =================================================================== --- test/Transforms/LoopVectorize/intptr5.ll +++ test/Transforms/LoopVectorize/intptr5.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -loop-vectorize -S | FileCheck %s + +;CHECK-LABEL: @foo +;CHECK: vector.body: +;CHECK: load <16 x i8> +;CHECK: store <16 x i8> + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +define void @foo(i64 %tmp0) { +bb: + br label %bb3 + +bb3: ; preds = %bb20, %bb + %tmp = phi i64 [ %tmp0, %bb ], [ %tmp21, %bb20 ] + br i1 false, label %bb4, label %bb19 + +bb4: ; preds = %bb3 + %tmp5 = inttoptr i64 %tmp to i8* + br label %bb7 + +bb7: ; preds = %bb7, %bb4 + %tmp8 = phi i64 [ %tmp, %bb4 ], [ %tmp16, %bb7 ] + %tmp9 = phi i8* [ %tmp5, %bb4 ], [ %tmp15, %bb7 ] + %tmp10 = phi i8* [ undef, %bb4 ], [ %tmp13, %bb7 ] + %tmp11 = phi i32 [ 0, %bb4 ], [ %tmp17, %bb7 ] + %tmp12 = load i8, i8* %tmp9, align 1, !range !1, !noalias !2 + %tmp13 = getelementptr inbounds i8, i8* %tmp10, i64 1 + store i8 %tmp12, i8* %tmp10, align 1, !noalias !2 + %tmp14 = inttoptr i64 %tmp8 to i8* + %tmp15 = getelementptr inbounds i8, i8* %tmp14, i64 1 + %tmp16 = ptrtoint i8* %tmp15 to i64 + %tmp17 = add nuw nsw i32 %tmp11, 1 + %tmp18 = icmp eq i32 %tmp17, undef + br i1 %tmp18, label %bb19, label %bb7 + +bb19: + br i1 undef, label %bb20, label %bb22 + +bb20: ; preds = %bb19 + %tmp21 = ptrtoint i8* undef to i64 + br label %bb3 + +bb22: ; preds = %bb19 + ret void +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version google3-trunk (trunk r311977)"} +!1 = !{i8 0, i8 2} +!2 = !{!3} +!3 = distinct !{!3, !4, !"_ZN15quality_ranklab4impl12UnionApplierINS_8internal13CondOpWrapperINS2_10UnionSumOpIbEEEEbE10MakeNStepsINS_15ForwardIteratorINS_10FullVectorIbEEEENS_25MaybeFullPropertyInserterIbPbSt20back_insert_iteratorISt6vectorIiSaIiEEEEEEET0_PT_SL_S6_i: argument 0"} +!4 = distinct !{!4, !"_ZN15quality_ranklab4impl12UnionApplierINS_8internal13CondOpWrapperINS2_10UnionSumOpIbEEEEbE10MakeNStepsINS_15ForwardIteratorINS_10FullVectorIbEEEENS_25MaybeFullPropertyInserterIbPbSt20back_insert_iteratorISt6vectorIiSaIiEEEEEEET0_PT_SL_S6_i"} Index: unittests/Analysis/ScalarEvolutionTest.cpp =================================================================== --- unittests/Analysis/ScalarEvolutionTest.cpp +++ unittests/Analysis/ScalarEvolutionTest.cpp @@ -335,9 +335,7 @@ // Expect the expansion code contains: // %0 = bitcast i32* %bitcast2 to i8* // %uglygep = getelementptr i8, i8* %0, i64 -1 - // %1 = bitcast i8* %uglygep to i32* - EXPECT_TRUE(isa(V)); - Instruction *Gep = cast(V)->getPrevNode(); + Instruction *Gep = cast(V); EXPECT_TRUE(isa(Gep)); EXPECT_TRUE(isa(Gep->getOperand(1))); EXPECT_EQ(cast(Gep->getOperand(1))->getSExtValue(), -1);