Index: ../lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- ../lib/Transforms/Vectorize/LoopVectorize.cpp +++ ../lib/Transforms/Vectorize/LoopVectorize.cpp @@ -254,16 +254,29 @@ return VectorType::get(Scalar, VF); } -/// A helper function that returns GEP instruction and knows to skip -/// 'bitcast'. +/// A helper function that returns GEP instruction and knows to skip a +/// 'bitcast'. The 'bitcast' may be skipped if the source and the destination +/// pointee types of the 'bitcast' have the same size. +/// For example: +/// bitcast double** %var to i64* - can be skipped +/// bitcast double** %var to i8* - can not static GetElementPtrInst *getGEPInstruction(Value *Ptr) { if (isa(Ptr)) return cast(Ptr); if (isa(Ptr) && - isa(cast(Ptr)->getOperand(0))) - return cast(cast(Ptr)->getOperand(0)); + isa(cast(Ptr)->getOperand(0))) { + Type *BitcastTy = Ptr->getType(); + Type *GEPTy = cast(Ptr)->getSrcTy(); + if (!isa(BitcastTy) || !isa(GEPTy)) + return nullptr; + Type *Pointee1Ty = cast(BitcastTy)->getPointerElementType(); + Type *Pointee2Ty = cast(GEPTy)->getPointerElementType(); + const DataLayout &DL = cast(Ptr)->getModule()->getDataLayout(); + if (DL.getTypeSizeInBits(Pointee1Ty) == DL.getTypeSizeInBits(Pointee2Ty)) + return cast(cast(Ptr)->getOperand(0)); + } return nullptr; } Index: ../test/Transforms/LoopVectorize/gep_with_bitcast.ll =================================================================== --- ../test/Transforms/LoopVectorize/gep_with_bitcast.ll +++ ../test/Transforms/LoopVectorize/gep_with_bitcast.ll @@ -37,4 +37,31 @@ for.end: ret void +} + +; CHECK-LABEL: @bar +; CHECK: vector.body +; CHECK: bitcast double** {{.*}} to i8* +; CHECK: load i8, i8* +; CHECK: insertelement <4 x i8> undef, i8 {{.*}} + +define void @bar(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double*, double** %in, i64 %indvars.iv + %tmp53 = bitcast double** %arrayidx to i8* + %tmp54 = load i8, i8* %tmp53, align 1 + %cmp1 = icmp eq i8 %tmp54, 0 + %arrayidx3 = getelementptr inbounds i8, i8* %res, i64 %indvars.iv + %frombool = zext i1 %cmp1 to i8 + store i8 %frombool, i8* %arrayidx3, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 4096 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void } \ No newline at end of file