Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3061,6 +3061,27 @@ case Intrinsic::lifetime_start: scalarizeInstruction(it); break; + case Intrinsic::powi: + case Intrinsic::ctlz: + case Intrinsic::cttz: + for (unsigned Part = 0; Part < UF; ++Part) { + SmallVector Args; + for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + if (i == 1) { + Args.push_back(CI->getArgOperand(i)); + continue; + } + VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); + Args.push_back(Arg[Part]); + } + Type *Tys[] = {CI->getType()}; + if (VF > 1) + Tys[0] = VectorType::get(CI->getType()->getScalarType(), VF); + + Function *F = Intrinsic::getDeclaration(M, ID, Tys); + Entry[Part] = Builder.CreateCall(F, Args); + } + break; default: for (unsigned Part = 0; Part < UF; ++Part) { SmallVector Args; @@ -3413,6 +3434,16 @@ return false; } + // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the 2nd + // argument is same (i.e. loop invariant) + if (CI && + hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { + if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); + return false; + } + } + // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(it->getType()) && Index: test/Transforms/LoopVectorize/intrinsic.ll =================================================================== --- test/Transforms/LoopVectorize/intrinsic.ll +++ test/Transforms/LoopVectorize/intrinsic.ll @@ -1090,3 +1090,105 @@ ret void } +declare double @llvm.powi.f64(double %Val, i32 %power) nounwind readnone + +;CHECK-LABEL: @powi_f64( +;CHECK: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64(i32 %n, double* noalias %y, double* noalias %x, i32 %P) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %call = tail call double @llvm.powi.f64(double %0, i32 %P) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +;CHECK-LABEL: @powi_f64_neg( +;CHECK-NOT: llvm.powi.v4f64 +;CHECK: ret void +define void @powi_f64_neg(i32 %n, double* noalias %y, double* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds double* %y, i64 %indvars.iv + %0 = load double* %arrayidx, align 8 + %1 = trunc i64 %indvars.iv to i32 + %call = tail call double @llvm.powi.f64(double %0, i32 %1) nounwind readnone + %arrayidx4 = getelementptr inbounds double* %x, i64 %indvars.iv + store double %call, double* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.cttz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @cttz_f64( +;CHECK: llvm.cttz.v4i64 +;CHECK: ret void +define void @cttz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.cttz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +} + +declare i64 @llvm.ctlz.i64 (i64, i1) nounwind readnone + +;CHECK-LABEL: @ctlz_f64( +;CHECK: llvm.ctlz.v4i64 +;CHECK: ret void +define void @ctlz_f64(i32 %n, i64* noalias %y, i64* noalias %x) nounwind uwtable { +entry: + %cmp9 = icmp sgt i32 %n, 0 + br i1 %cmp9, label %for.body, label %for.end + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i64* %y, i64 %indvars.iv + %0 = load i64* %arrayidx, align 8 + %call = tail call i64 @llvm.ctlz.i64(i64 %0, i1 true) nounwind readnone + %arrayidx4 = getelementptr inbounds i64* %x, i64 %indvars.iv + store i64 %call, i64* %arrayidx4, align 8 + %indvars.iv.next = add i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body, %entry + ret void +}