Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -574,6 +574,10 @@ /// Returns (and creates if needed) the trip count of the widened loop. Value *getOrCreateVectorTripCount(Loop *NewLoop); + /// Returns a bitcasted value to the requested vector type. + /// Also handles bitcasts of float <--> pointer types. + Value* createBitCast(Value *V, VectorType* VTy, const DataLayout& DL); + /// Emit a bypass check to see if the trip count would overflow, or we /// wouldn't have enough iterations to execute one vector loop. void emitMinimumIterationCountCheck(Loop *L, BasicBlock *Bypass); @@ -2845,6 +2849,7 @@ if (Instr != Group->getInsertPos()) return; + const DataLayout &DL = Instr->getModule()->getDataLayout(); Value *Ptr = getPointerOperand(Instr); // Prepare for the vector type of the interleaved load/store. @@ -2919,7 +2924,7 @@ // If this member has different type, cast the result type. if (Member->getType() != ScalarTy) { VectorType *OtherVTy = VectorType::get(Member->getType(), VF); - StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy); + StridedVec = createBitCast(StridedVec, OtherVTy, DL); } if (Group->isReverse()) @@ -2948,9 +2953,10 @@ if (Group->isReverse()) StoredVec = reverseVector(StoredVec); - // If this member has different type, cast it to an unified type. - if (StoredVec->getType() != SubVT) - StoredVec = Builder.CreateBitOrPointerCast(StoredVec, SubVT); + // If this member has different type, cast it to a unified type. + if (StoredVec->getType() != SubVT) { + StoredVec = createBitCast(StoredVec, SubVT, DL); + } StoredVecs.push_back(StoredVec); } @@ -3307,6 +3313,47 @@ LoopBypassBlocks.push_back(BB); } +Value* InnerLoopVectorizer::createBitCast(Value *V, VectorType* VTy, + const DataLayout& DL) { + if (CastInst::isBitOrNoopPointerCastable(V->getType(), VTy, DL)) { + return Builder.CreateBitOrPointerCast(V, VTy); + } + Value *CastVal = nullptr; + if (VectorType *SrcVecTy = dyn_cast(V->getType())) { + Type *SrcElemTy = SrcVecTy->getElementType(); + Type *DstElemTy = VTy->getElementType(); + unsigned VF = VTy->getNumElements(); + unsigned SrcVF = SrcVecTy->getNumElements(); + if (VF == SrcVF && CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) { + // Do a direct cast if elements types can be safely casted directly. + return Builder.CreateBitOrPointerCast(V, VTy); + } + // V cannot be directly casted to desired vector type. + // May happen when V is a floating point vector but VTy is a vector of pointers + // or vice-versa. + if (DstElemTy->isPointerTy() && SrcElemTy->isFloatingPointTy() && + (DL.getPointerTypeSizeInBits(DstElemTy) == SrcElemTy->getPrimitiveSizeInBits())) { + // Needs two step casting. First bitcast floating type to an int type. + // Int type can be safely casted to the pointer type. + Type *IntTy = IntegerType::getIntNTy(V->getContext(), + SrcElemTy->getPrimitiveSizeInBits()); + VectorType *VecIntTy = VectorType::get(IntTy, VF); + CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); + CastVal = Builder.CreateBitOrPointerCast(CastVal, VTy); + } else if (SrcElemTy->isPointerTy() && DstElemTy->isFloatingPointTy() && + (DL.getPointerTypeSizeInBits(SrcElemTy) == DstElemTy->getPrimitiveSizeInBits())) { + // Same handling as above with Dst and Src types reversed. + Type *IntTy = IntegerType::getIntNTy(V->getContext(), + DstElemTy->getPrimitiveSizeInBits()); + VectorType *VecIntTy = VectorType::get(IntTy, VF); + CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy); + CastVal = Builder.CreateBitOrPointerCast(CastVal, VTy); + } + } + assert(CastVal); // Either unhandled or illegal cast. + return CastVal; +} + void InnerLoopVectorizer::emitVectorLoopEnteredCheck(Loop *L, BasicBlock *Bypass) { Value *TC = getOrCreateVectorTripCount(L); Index: test/Transforms/LoopVectorize/pr33804.ll =================================================================== --- /dev/null +++ test/Transforms/LoopVectorize/pr33804.ll @@ -0,0 +1,100 @@ +; RUN: opt -loop-vectorize -debug -S < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; This checks we don't crash when vectorizing if vectorizer ends up +; requiring casting float to a pointer type. + +; ModuleID = 'bugpoint-reduced-simplified.bc' +source_filename = "bugpoint-output-26dbd81.bc" +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-cros-linux-gnueabihf" + +%struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746 = type { float, %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746* } + +@.str.13 = external unnamed_addr constant [1 x i8], align 1 + +; CHECK-LABEL: @cvCalcEMD2 +; CHECK: vector.body +define void @cvCalcEMD2() local_unnamed_addr #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @cvGetMat() + to label %invoke.cont unwind label %lpad.loopexit.split-lp + +invoke.cont: ; preds = %entry + invoke void @cvGetMat() + to label %invoke.cont3 unwind label %lpad.loopexit.split-lp + +invoke.cont3: ; preds = %invoke.cont + invoke void @_Znaj() #3 + to label %call.i.i.i1408.noexc unwind label %lpad.loopexit.split-lp + +lpad.loopexit.split-lp: ; preds = %invoke.cont3, %entry, %invoke.cont + %lpad.loopexit.split-lp2387 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +call.i.i.i1408.noexc: ; preds = %invoke.cont3 + invoke void @_ZNSsC1EPKcRKSaIcE() + to label %invoke.cont188.i unwind label %lpad187.i + +invoke.cont188.i: ; preds = %call.i.i.i1408.noexc + br label %invoke.cont203.i + +invoke.cont203.i: ; preds = %invoke.cont188.i + invoke void @_ZN2cv5errorERKNS_9ExceptionE() + to label %invoke.cont206.i unwind label %lpad205.i + +invoke.cont206.i: ; preds = %invoke.cont203.i + br label %for.body14.i.i + +lpad187.i: ; preds = %call.i.i.i1408.noexc + %0 = landingpad { i8*, i32 } + cleanup + unreachable + +lpad205.i: ; preds = %invoke.cont203.i + %1 = landingpad { i8*, i32 } + cleanup + unreachable + +for.body14.i.i: ; preds = %for.body14.i.i, %invoke.cont206.i + %i.1424.i.i = phi i32 [ %inc21.i.i, %for.body14.i.i ], [ 0, %invoke.cont206.i ] + %arrayidx15.i.i1427 = getelementptr inbounds %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746, %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746* undef, i32 %i.1424.i.i + %val.i.i = getelementptr inbounds %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746, %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746* %arrayidx15.i.i1427, i32 0, i32 0 + store float 0xC415AF1D80000000, float* %val.i.i, align 4, !tbaa !1 + %next19.i.i = getelementptr inbounds %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746, %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746* undef, i32 %i.1424.i.i, i32 1 + store %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746* undef, %struct.CvNode1D.83.209.293.440.524.629.650.755.902.1049.1070.1133.1322.1469.1532.1574.1679.1700.1763.1805.1826.1973.2183.2225.2309.2351.2393.2435.2603.2750.2939.3317.3338.3359.3380.3401.3593.3746** %next19.i.i, align 4, !tbaa !7 + %inc21.i.i = add nuw nsw i32 %i.1424.i.i, 1 + %exitcond438.i.i = icmp eq i32 %inc21.i.i, 0 + br i1 %exitcond438.i.i, label %for.end22.i.i, label %for.body14.i.i + +for.end22.i.i: ; preds = %for.body14.i.i + unreachable +} + +declare void @cvGetMat() local_unnamed_addr #1 + +declare i32 @__gxx_personality_v0(...) + +declare void @_ZN2cv5errorERKNS_9ExceptionE() local_unnamed_addr #1 + +declare void @_ZNSsC1EPKcRKSaIcE() unnamed_addr #1 + +; Function Attrs: nobuiltin +declare void @_Znaj() local_unnamed_addr #2 + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+dsp,+neon,+vfp3,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { builtin } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !3, i64 0} +!2 = !{!"_ZTS8CvNode1D", !3, i64 0, !6, i64 4} +!3 = !{!"float", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C++ TBAA"} +!6 = !{!"any pointer", !4, i64 0} +!7 = !{!2, !6, i64 4}