Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -244,7 +244,7 @@ if (OptLevel > 1) { if (EnableMLSM) MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createGVNPass(true)); // Remove redundancies } MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset MPM.add(createSCCPPass()); // Constant prop with SCCP @@ -278,6 +278,9 @@ if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); } + + if (!UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); } if (LoadCombine) @@ -343,6 +346,8 @@ if (!DisableUnrollLoops) MPM.add(createLoopUnrollPass()); } + if (!UseGVNAfterVectorization) + MPM.add(createGVNPass(DisableGVNLoadPRE)); } addExtensionsToPM(EP_Peephole, MPM); Index: test/Transforms/SLPVectorizer/X86/gvn-slp_ordering.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/gvn-slp_ordering.ll +++ test/Transforms/SLPVectorizer/X86/gvn-slp_ordering.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -O2 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: load <4 x i32> +; CHECK: mul nsw <4 x i32> +; CHECK: add nsw <4 x i32> +; CHECK: store <4 x i32 + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@a = common global [4 x i32] zeroinitializer, align 16 +@b = common global [4 x i32] zeroinitializer, align 16 +@d = common global [4 x i32] zeroinitializer, align 16 +@c = common global [4 x i32] zeroinitializer, align 16 + +define void @fn() { + %1 = load i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 0), align 4 + %2 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 0), align 4 + %3 = mul nsw i32 %1, %2 + %4 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 0), align 4 + %5 = add nsw i32 %3, %4 + store i32 %5, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 0), align 4 + %6 = load i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 1), align 4 + %7 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 1), align 4 + %8 = mul nsw i32 %6, %7 + %9 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 1), align 4 + %10 = add nsw i32 %8, %9 + store i32 %10, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 1), align 4 + %11 = load i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 2), align 4 + %12 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 2), align 4 + %13 = mul nsw i32 %11, %12 + %14 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 2), align 4 + %15 = add nsw i32 %13, %14 + store i32 %15, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 2), align 4 + %16 = load i32* getelementptr inbounds ([4 x i32]* @a, i32 0, i64 3), align 4 + %17 = load i32* getelementptr inbounds ([4 x i32]* @b, i32 0, i64 3), align 4 + %18 = mul nsw i32 %16, %17 + %19 = load i32* getelementptr inbounds ([4 x i32]* @d, i32 0, i64 3), align 4 + %20 = add nsw i32 %18, %19 + store i32 %20, i32* getelementptr inbounds ([4 x i32]* @c, i32 0, i64 3), align 4 + ret void +}