Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3696,6 +3696,20 @@ } } + if (ReturnInst *RI = dyn_cast(it)) + if (RI->getNumOperands() != 0) + if (BinaryOperator *BinOp = + dyn_cast(RI->getOperand(0))) { + DEBUG(dbgs() << "SLP: Found a return to vectorize.\n"); + if (tryToVectorizePair(BinOp->getOperand(0), + BinOp->getOperand(1), R)) { + Changed = true; + it = BB->begin(); + e = BB->end(); + continue; + } + } + // Try to vectorize trees that start at compare instructions. if (CmpInst *CI = dyn_cast(it)) { if (tryToVectorizePair(CI->getOperand(0), CI->getOperand(1), R)) { Index: test/Transforms/SLPVectorizer/X86/return.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/return.ll +++ test/Transforms/SLPVectorizer/X86/return.ll @@ -0,0 +1,66 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -S | FileCheck %s +target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" +target triple = "x86_64--linux-gnu" + +@a = common global [4 x double] zeroinitializer, align 8 +@b = common global [4 x double] zeroinitializer, align 8 + +; [4], b[4]; +; double foo() { +; double sum =0; +; sum = (a[0]+b[0]) + (a[1]+b[1]); +; return sum; +; } + +; CHECK-LABEL: @return1 +; CHECK: %0 = load <2 x double>* +; CHECK: %1 = load <2 x double>* +; CHECK: %2 = fadd <2 x double> + +; Function Attrs: nounwind readonly +define double @return1() #0 { +entry: + %a0 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 0), align 8, !tbaa !1 + %b0 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 0), align 8, !tbaa !1 + %add0 = fadd double %a0, %b0 + %a1 = load double* getelementptr inbounds ([4 x double]* @a, i32 0, i32 1), align 8, !tbaa !1 + %b1 = load double* getelementptr inbounds ([4 x double]* @b, i32 0, i32 1), align 8, !tbaa !1 + %add1 = fadd double %a1, %b1 + %add2 = fadd double %add0, %add1 + ret double %add2 +} + +; double hadd(double *x) { +; return ((x[0] + x[2]) + (x[1] + x[3])); +; } + +; CHECK-LABEL: @return2 +; CHECK: %1 = load <2 x double>* +; CHECK: %3 = load <2 x double>* %2 +; CHECK: %4 = fadd <2 x double> %1, %3 + +; Function Attrs: nounwind readonly +define double @return2(double* nocapture readonly %x) #0 { +entry: + %x0 = load double* %x, align 4, !tbaa !1 + %arrayidx1 = getelementptr inbounds double* %x, i32 2 + %x2 = load double* %arrayidx1, align 4, !tbaa !1 + %add3 = fadd double %x0, %x2 + %arrayidx2 = getelementptr inbounds double* %x, i32 1 + %x1 = load double* %arrayidx2, align 4, !tbaa !1 + %arrayidx3 = getelementptr inbounds double* %x, i32 3 + %x3 = load double* %arrayidx3, align 4, !tbaa !1 + %add4 = fadd double %x1, %x3 + %add5 = fadd double %add3, %add4 + ret double %add5 +} + +attributes #0 = { nounwind readonly "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = metadata !{metadata !"clang version 3.6.0 (221134)"} +!1 = metadata !{metadata !2, metadata !2, i64 0} +!2 = metadata !{metadata !"double", metadata !3, i64 0} +!3 = metadata !{metadata !"omnipotent char", metadata !4, i64 0} +!4 = metadata !{metadata !"Simple C/C++ TBAA"}