diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder_with_external_users.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s + +; Make sure that we rotate the graph to help avoid the shuffle to +; the external vectorizable stores. +; +; SLP starts vectorizing from the operands of the `fcmp` in bb2, then crosses +; into bb1, vectorizing all the way to the broadcast load at the top. +; The stores in bb1 are external to this tree, but they are vectorizable and are +; in reverse order. +define void @rotate_with_external_users(double *%A, double *%ptr) { +; CHECK-LABEL: @rotate_with_external_users( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[LD:%.*]] = load double, double* undef, align 8 +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[LD]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[PTRA1:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[PTRA1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[SHUFFLE]], <2 x double>* [[TMP4]], align 8 +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +; CHECK-NEXT: [[SEED:%.*]] = fcmp ogt double [[TMP6]], [[TMP7]] +; CHECK-NEXT: ret void +; +bb1: + %ld = load double, double* undef + + %add1 = fadd double %ld, 1.1 + %add2 = fadd double %ld, 2.2 + + %mul1 = fmul double %add1, 1.1 + %mul2 = fmul double %add2, 2.2 + + ; Thes are external vectorizable stores with operands in reverse order. + %ptrA1 = getelementptr inbounds double, double* %A, i64 0 + %ptrA2 = getelementptr inbounds double, double* %A, i64 1 + store double %mul2, double *%ptrA1 + store double %mul1, double *%ptrA2 + br label %bb2 + +bb2: + %add3 = fadd double %mul1, 3.3 + %add4 = fadd double %mul2, 4.4 + %seed = fcmp ogt double %add3, %add4 + ret void +}