Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2921,13 +2921,8 @@ // Peel the first iteration out of the loop since there's nothing // interesting to do anyway and it simplifies the checks in the loop. auto *I = cast(VL[0]); - Value *VLeft = I->getOperand(0); - Value *VRight = I->getOperand(1); - if (!isa(VRight) && isa(VLeft)) - // Favor having instruction to the right. FIXME: why? - std::swap(VLeft, VRight); - Left.push_back(VLeft); - Right.push_back(VRight); + Left.push_back(I->getOperand(0)); + Right.push_back(I->getOperand(1)); } // Keep track if we have instructions with all the same opcode on one side. Index: test/Transforms/LoopVectorize/X86/metadata-enable.ll =================================================================== --- test/Transforms/LoopVectorize/X86/metadata-enable.ll +++ test/Transforms/LoopVectorize/X86/metadata-enable.ll @@ -2246,84 +2246,84 @@ ; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 ; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> undef, i32 [[N:%.*]], i32 0 ; O3DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> undef, <4 x i32> zeroinitializer -; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP1]] +; O3DEFAULT-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP1]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 ; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 ; O3DEFAULT-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 -; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP7]] +; O3DEFAULT-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 ; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 ; O3DEFAULT-NEXT: [[TMP10:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP11]] +; O3DEFAULT-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP11]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 ; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 ; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 -; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP15]] +; O3DEFAULT-NEXT: [[TMP16:%.*]] = add nsw <4 x i32> [[TMP15]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP16]], <4 x i32>* [[TMP17]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 ; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 ; O3DEFAULT-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP19:%.*]] = load <4 x i32>, <4 x i32>* [[TMP18]], align 4 -; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP19]] +; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[TMP19]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP21]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 ; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 ; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP23:%.*]] = load <4 x i32>, <4 x i32>* [[TMP22]], align 4 -; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP23]] +; O3DEFAULT-NEXT: [[TMP24:%.*]] = add nsw <4 x i32> [[TMP23]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP24]], <4 x i32>* [[TMP25]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 ; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 ; O3DEFAULT-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP27:%.*]] = load <4 x i32>, <4 x i32>* [[TMP26]], align 4 -; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP27]] +; O3DEFAULT-NEXT: [[TMP28:%.*]] = add nsw <4 x i32> [[TMP27]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP28]], <4 x i32>* [[TMP29]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 ; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 ; O3DEFAULT-NEXT: [[TMP30:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP31:%.*]] = load <4 x i32>, <4 x i32>* [[TMP30]], align 4 -; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP31]] +; O3DEFAULT-NEXT: [[TMP32:%.*]] = add nsw <4 x i32> [[TMP31]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP32]], <4 x i32>* [[TMP33]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 ; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 ; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP35:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 -; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP35]] +; O3DEFAULT-NEXT: [[TMP36:%.*]] = add nsw <4 x i32> [[TMP35]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP36]], <4 x i32>* [[TMP37]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 ; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 ; O3DEFAULT-NEXT: [[TMP38:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP39:%.*]] = load <4 x i32>, <4 x i32>* [[TMP38]], align 4 -; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP39]] +; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[TMP39]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP41]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 ; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 ; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP43:%.*]] = load <4 x i32>, <4 x i32>* [[TMP42]], align 4 -; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP43]] +; O3DEFAULT-NEXT: [[TMP44:%.*]] = add nsw <4 x i32> [[TMP43]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP44]], <4 x i32>* [[TMP45]], align 4 ; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 ; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 ; O3DEFAULT-NEXT: [[TMP46:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>* ; O3DEFAULT-NEXT: [[TMP47:%.*]] = load <4 x i32>, <4 x i32>* [[TMP46]], align 4 -; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP3]], [[TMP47]] +; O3DEFAULT-NEXT: [[TMP48:%.*]] = add nsw <4 x i32> [[TMP47]], [[TMP3]] ; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* ; O3DEFAULT-NEXT: store <4 x i32> [[TMP48]], <4 x i32>* [[TMP49]], align 4 ; O3DEFAULT-NEXT: [[TMP50:%.*]] = load i32, i32* [[A]], align 4 Index: test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll +++ test/Transforms/SLPVectorizer/AArch64/ext-trunc.ll @@ -66,7 +66,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[C1:%.*]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[C2:%.*]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[C3:%.*]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], [[TMP0]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[GEP0:%.*]] = getelementptr inbounds i64, i64* [[P:%.*]], i64 [[TMP6]] ; CHECK-NEXT: [[LOAD0:%.*]] = load i64, i64* [[GEP0]] Index: test/Transforms/SLPVectorizer/AArch64/gather-cost.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/gather-cost.ll +++ test/Transforms/SLPVectorizer/AArch64/gather-cost.ll @@ -21,7 +21,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = lshr <4 x i32> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = mul nuw <4 x i32> [[TMP6]], -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[TMP7]], [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], [[TMP7]] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.i32.v4i32(<4 x i32> [[TMP9]]) ; CHECK-NEXT: ret i32 [[TMP10]] Index: test/Transforms/SLPVectorizer/AArch64/getelementptr.ll =================================================================== --- test/Transforms/SLPVectorizer/AArch64/getelementptr.ll +++ test/Transforms/SLPVectorizer/AArch64/getelementptr.ll @@ -65,7 +65,7 @@ ; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP2]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]] @@ -86,10 +86,10 @@ ; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP19]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP18]], [[TMP20]] +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> undef, i32 [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP21]] = add nsw <2 x i32> [[TMP19]], [[TMP20]] ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i32> [[TMP21]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP22]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] @@ -184,7 +184,7 @@ ; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> undef, i32 [[T4]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP1]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]] ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]] @@ -194,10 +194,10 @@ ; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]] ; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> , i32 [[ADD11]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[T12]], i32 1 -; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP15]], [[TMP17]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> undef, i32 [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> [[TMP15]], i32 [[ADD11]], i32 1 +; CHECK-NEXT: [[TMP17:%.*]] = insertelement <2 x i32> , i32 [[T12]], i32 1 +; CHECK-NEXT: [[TMP18]] = add nsw <2 x i32> [[TMP16]], [[TMP17]] ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i32> [[TMP18]], i32 0 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP19]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] Index: test/Transforms/SLPVectorizer/NVPTX/v2f16.ll =================================================================== --- test/Transforms/SLPVectorizer/NVPTX/v2f16.ll +++ test/Transforms/SLPVectorizer/NVPTX/v2f16.ll @@ -16,8 +16,8 @@ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds half, half* [[TMP10]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast half* [[TMP11]] to <2 x half>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x half>, <2 x half>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> , [[TMP3]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x half> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <2 x half> [[TMP3]], ; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds half, half* [[TMP15]], i64 [[TMP7]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast half* [[TMP16]] to <2 x half>* ; CHECK-NEXT: store <2 x half> [[TMP4]], <2 x half>* [[TMP5]], align 8 Index: test/Transforms/SLPVectorizer/X86/PR35628_2.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/PR35628_2.ll +++ test/Transforms/SLPVectorizer/X86/PR35628_2.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i64> [[TMP3]], i64 [[TMP0]], i32 3 -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i64> [[TMP4]], ; CHECK-NEXT: [[TMP6]] = extractelement <4 x i64> [[TMP5]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP5]], i32 0 ; CHECK-NEXT: [[DUMMY_SHL:%.*]] = shl i64 [[TMP7]], 32 Index: test/Transforms/SLPVectorizer/X86/PR35777.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/PR35777.ll +++ test/Transforms/SLPVectorizer/X86/PR35777.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 2) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[ARG:%.*]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[ARG]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP0]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([6 x double], [6 x double]* @global, i64 0, i64 4) to <2 x double>*), align 16 ; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], [[TMP5]] Index: test/Transforms/SLPVectorizer/X86/PR39774.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/PR39774.ll +++ test/Transforms/SLPVectorizer/X86/PR39774.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP15:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x i32> [[SHUFFLE]], i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; CHECK-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] @@ -100,7 +100,7 @@ ; FORCE_REDUCTION-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP13:%.*]], [[LOOP]] ], [ zeroinitializer, [[ENTRY:%.*]] ] ; FORCE_REDUCTION-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> undef, <4 x i32> ; FORCE_REDUCTION-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[SHUFFLE]], i32 1 -; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[SHUFFLE]] +; FORCE_REDUCTION-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], ; FORCE_REDUCTION-NEXT: [[VAL_1:%.*]] = and i32 [[TMP2]], undef ; FORCE_REDUCTION-NEXT: [[VAL_2:%.*]] = and i32 [[VAL_1]], [[TMP0:%.*]] ; FORCE_REDUCTION-NEXT: [[VAL_3:%.*]] = and i32 [[VAL_2]], [[TMP0]] Index: test/Transforms/SLPVectorizer/X86/PR40310.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/PR40310.ll +++ test/Transforms/SLPVectorizer/X86/PR40310.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[SHUFFLE]], i32 15 ; CHECK-NEXT: store atomic i32 [[TMP3]], i32* [[VALS:%.*]] unordered, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[V14:%.*]] = and i32 [[TMP2]], undef ; CHECK-NEXT: [[V16:%.*]] = and i32 undef, [[V14]] ; CHECK-NEXT: [[V18:%.*]] = and i32 undef, [[V16]] Index: test/Transforms/SLPVectorizer/X86/barriercall.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/barriercall.ll +++ test/Transforms/SLPVectorizer/X86/barriercall.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: ret i32 undef Index: test/Transforms/SLPVectorizer/X86/commutativity.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/commutativity.ll +++ test/Transforms/SLPVectorizer/X86/commutativity.ll @@ -96,7 +96,7 @@ ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[B:%.*]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[C]], i32 2 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], [[TMP9]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP9]], [[TMP12]] ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16 ; CHECK-NEXT: ret void ; Index: test/Transforms/SLPVectorizer/X86/compare-reduce.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/compare-reduce.ll +++ test/Transforms/SLPVectorizer/X86/compare-reduce.ll @@ -20,8 +20,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 ; CHECK-NEXT: [[CMP11:%.*]] = fcmp ogt double [[TMP8]], [[TMP9]] Index: test/Transforms/SLPVectorizer/X86/crash_cmpop.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/crash_cmpop.ll +++ test/Transforms/SLPVectorizer/X86/crash_cmpop.ll @@ -68,12 +68,12 @@ ; AVX-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0 ; AVX-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[TMP1]], i32 1 ; AVX-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP5]], [[TMP7]] -; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> zeroinitializer, [[TMP0]] +; AVX-NEXT: [[TMP9:%.*]] = fmul <2 x float> [[TMP0]], zeroinitializer ; AVX-NEXT: [[TMP10:%.*]] = fadd <2 x float> [[TMP9]], [[TMP8]] ; AVX-NEXT: [[TMP11:%.*]] = fcmp olt <2 x float> [[TMP10]], ; AVX-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP10]], <2 x float> ; AVX-NEXT: [[TMP13:%.*]] = fcmp olt <2 x float> [[TMP12]], -; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> zeroinitializer, [[TMP12]] +; AVX-NEXT: [[TMP14:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer ; AVX-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP13]], <2 x float> , <2 x float> [[TMP14]] ; AVX-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP15]], i32 0 ; AVX-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[TMP15]], i32 1 Index: test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll +++ test/Transforms/SLPVectorizer/X86/crash_mandeltext.ll @@ -99,7 +99,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[TMP]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = fsub <2 x double> [[TMP1]], undef ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_HOGE:%.*]], %struct.hoge* [[ARG:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> undef, [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], undef ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP7]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8 Index: test/Transforms/SLPVectorizer/X86/crash_smallpt.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/crash_smallpt.ll +++ test/Transforms/SLPVectorizer/X86/crash_smallpt.ll @@ -31,10 +31,10 @@ ; CHECK: cond.false66.us: ; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, undef ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[ADD_I276_US]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double 0xBFA5CC2D1960285F, i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> , [[TMP3]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double undef, i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fmul <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> undef, [[TMP2]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[AGG_TMP99208_SROA_0_0_IDX]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP4]], <2 x double>* [[TMP6]], align 8 Index: test/Transforms/SLPVectorizer/X86/cross_block_slp.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/cross_block_slp.ll +++ test/Transforms/SLPVectorizer/X86/cross_block_slp.ll @@ -22,7 +22,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[B:%.*]] to <2 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, <2 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP1]], ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[G:%.*]], 0 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; CHECK: if.then: Index: test/Transforms/SLPVectorizer/X86/cse.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/cse.ll +++ test/Transforms/SLPVectorizer/X86/cse.ll @@ -18,20 +18,21 @@ ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[G]], i64 6 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[ADD8:%.*]] = fadd double [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[G]], i64 2 -; CHECK-NEXT: store double [[ADD8]], double* [[ARRAYIDX9]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 ; CHECK-NEXT: [[MUL11:%.*]] = fmul double [[TMP6]], 4.000000e+00 -; CHECK-NEXT: [[ADD12:%.*]] = fadd double [[MUL11]], 8.000000e+00 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> undef, double [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[MUL11]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 -; CHECK-NEXT: store double [[ADD12]], double* [[ARRAYIDX13]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast double* [[ARRAYIDX9]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[TMP10]], align 8 ; CHECK-NEXT: ret i32 undef ; entry: @@ -72,13 +73,13 @@ ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef @@ -135,7 +136,7 @@ ; CHECK-NEXT: [[TMP8:%.*]] = fmul double [[TMP7]], 3.000000e+00 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[TMP9]], double [[TMP8]], i32 1 -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds double, double* [[G]], i64 1 ; CHECK-NEXT: [[TMP13:%.*]] = bitcast double* [[G]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP13]], align 8 @@ -146,7 +147,7 @@ ; CHECK-NEXT: [[TMP18:%.*]] = fmul double [[TMP17]], 3.000000e+00 ; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> undef, double [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x double> [[TMP19]], double [[TMP18]], i32 1 -; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = fadd <2 x double> [[TMP20]], ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds double, double* [[G]], i64 3 ; CHECK-NEXT: [[TMP23:%.*]] = bitcast double* [[TMP15]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP21]], <2 x double>* [[TMP23]], align 8 @@ -203,13 +204,13 @@ ; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[A]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> undef, double [[CONV]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[CONV]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[CONV]], i32 2 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[CONV]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP6]], [[TMP2]] -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> , [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x double> [[TMP7]], ; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[A]] to <4 x double>* ; CHECK-NEXT: store <4 x double> [[TMP8]], <4 x double>* [[TMP9]], align 8 ; CHECK-NEXT: ret i32 undef Index: test/Transforms/SLPVectorizer/X86/cycle_dup.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/cycle_dup.ll +++ test/Transforms/SLPVectorizer/X86/cycle_dup.ll @@ -24,7 +24,7 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_029:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ [[TMP4:%.*]], [[FOR_BODY]] ], [ [[TMP1]], [[ENTRY]] ] -; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> , [[TMP3]] +; CHECK-NEXT: [[TMP4]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_029]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[TMP2]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]] Index: test/Transforms/SLPVectorizer/X86/external_user.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/external_user.ll +++ test/Transforms/SLPVectorizer/X86/external_user.ll @@ -32,9 +32,9 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_020:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_020]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] Index: test/Transforms/SLPVectorizer/X86/extract.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/extract.ll +++ test/Transforms/SLPVectorizer/X86/extract.ll @@ -8,7 +8,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[LD]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[LD]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -32,7 +32,7 @@ ; CHECK-NEXT: [[LD:%.*]] = load <2 x double>, <2 x double>* undef ; CHECK-NEXT: [[REORDER_SHUFFLE:%.*]] = shufflevector <2 x double> [[LD]], <2 x double> undef, <2 x i32> ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 -; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> , [[REORDER_SHUFFLE]] +; CHECK-NEXT: [[TMP0:%.*]] = fadd <2 x double> [[REORDER_SHUFFLE]], ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[P1]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP0]], <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: ret void @@ -59,7 +59,7 @@ ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds double, double* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> undef, double [[V0]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[P0]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 4 ; CHECK-NEXT: ret void Index: test/Transforms/SLPVectorizer/X86/extractcost.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/extractcost.ll +++ test/Transforms/SLPVectorizer/X86/extractcost.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP4:%.*]] = mul nsw <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP5:%.*]] = shl <4 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> -; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0 Index: test/Transforms/SLPVectorizer/X86/hoist.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/hoist.ll +++ test/Transforms/SLPVectorizer/X86/hoist.ll @@ -25,7 +25,7 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[I_024]] ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4 -; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[SHUFFLE]], [[TMP3]] +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], [[SHUFFLE]] ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4 ; CHECK-NEXT: [[ADD10]] = add nsw i32 [[I_024]], 4 Index: test/Transforms/SLPVectorizer/X86/horizontal.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/horizontal.ll +++ test/Transforms/SLPVectorizer/X86/horizontal.ll @@ -36,7 +36,7 @@ ; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; CHECK-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> @@ -76,7 +76,7 @@ ; STORE-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[ADD1330]] ; STORE-NEXT: [[TMP1:%.*]] = bitcast float* [[ARRAYIDX]] to <4 x float>* ; STORE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> , [[TMP2]] +; STORE-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[TMP2]], ; STORE-NEXT: [[ADD6:%.*]] = fadd fast float undef, undef ; STORE-NEXT: [[ADD11:%.*]] = fadd fast float [[ADD6]], undef ; STORE-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> undef, <4 x i32> Index: test/Transforms/SLPVectorizer/X86/in-tree-user.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/in-tree-user.ll +++ test/Transforms/SLPVectorizer/X86/in-tree-user.ll @@ -21,8 +21,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* [[TMP3]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x double> [[TMP1]], [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = fadd <2 x double> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP7]], i32 0 ; CHECK-NEXT: [[INTREEUSER:%.*]] = fadd double [[TMP8]], [[TMP8]] ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP7]], i32 1 Index: test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll +++ test/Transforms/SLPVectorizer/X86/insert-after-bundle.ll @@ -132,7 +132,7 @@ ; CHECK-NEXT: [[TMP24:%.*]] = icmp ult <16 x i8> [[TMP17]], [[TMP19]] ; CHECK-NEXT: [[TMP25:%.*]] = select <16 x i1> [[TMP24]], <16 x i8> [[TMP23]], <16 x i8> [[TMP21]] ; CHECK-NEXT: [[TMP26:%.*]] = zext <16 x i8> [[TMP25]] to <16 x i32> -; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP15]], [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = mul <16 x i32> [[TMP26]], [[TMP15]] ; CHECK-NEXT: [[TMP28:%.*]] = trunc <16 x i32> [[TMP27]] to <16 x i8> ; CHECK-NEXT: [[ARRAYIDX188:%.*]] = getelementptr inbounds i8, i8* [[E_ADDR_0354]], i64 15 ; CHECK-NEXT: [[TMP29:%.*]] = bitcast i8* [[E_ADDR_0354]] to <16 x i8>* @@ -413,52 +413,52 @@ ; CHECK-LABEL: @foo1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([64 x i32]* @ib to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> , [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> [[TMP0]], ; CHECK-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* bitcast ([64 x i32]* @ia to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 4) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i32> [[TMP2]], ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 4) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 8) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP4]], ; CHECK-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 8) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 12) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 12) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 16) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i32> [[TMP8]], ; CHECK-NEXT: store <4 x i32> [[TMP9]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 16) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 20) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = xor <4 x i32> [[TMP10]], ; CHECK-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 20) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 24) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = xor <4 x i32> [[TMP12]], ; CHECK-NEXT: store <4 x i32> [[TMP13]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 24) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 28) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP14]], ; CHECK-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 28) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP16:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 32) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = xor <4 x i32> [[TMP16]], ; CHECK-NEXT: store <4 x i32> [[TMP17]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 32) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 36) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> , [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i32> [[TMP18]], ; CHECK-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 36) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 40) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> , [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = xor <4 x i32> [[TMP20]], ; CHECK-NEXT: store <4 x i32> [[TMP21]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 40) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 44) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> , [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = xor <4 x i32> [[TMP22]], ; CHECK-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 44) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP24:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 48) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> , [[TMP24]] +; CHECK-NEXT: [[TMP25:%.*]] = xor <4 x i32> [[TMP24]], ; CHECK-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 48) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 52) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> , [[TMP26]] +; CHECK-NEXT: [[TMP27:%.*]] = xor <4 x i32> [[TMP26]], ; CHECK-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 52) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 56) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> , [[TMP28]] +; CHECK-NEXT: [[TMP29:%.*]] = xor <4 x i32> [[TMP28]], ; CHECK-NEXT: store <4 x i32> [[TMP29]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 56) to <4 x i32>*), align 16 ; CHECK-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ib, i64 0, i64 60) to <4 x i32>*), align 16 -; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> , [[TMP30]] +; CHECK-NEXT: [[TMP31:%.*]] = xor <4 x i32> [[TMP30]], ; CHECK-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* bitcast (i32* getelementptr inbounds ([64 x i32], [64 x i32]* @ia, i64 0, i64 60) to <4 x i32>*), align 16 ; CHECK-NEXT: br label [[FOR_BODY5:%.*]] ; CHECK: for.cond3: Index: test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll +++ test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll @@ -627,7 +627,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 @@ -645,7 +645,7 @@ ; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 2 ; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[Z:%.*]], i32 3 ; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], -; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> , [[TMP4]] +; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> [[TMP4]], ; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 ; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 ; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 Index: test/Transforms/SLPVectorizer/X86/long_chains.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/long_chains.ll +++ test/Transforms/SLPVectorizer/X86/long_chains.ll @@ -11,22 +11,22 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[B:%.*]] to <2 x i8>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 1 -; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i8> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i8> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> undef, i8 [[TMP3]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i8> [[TMP2]], i32 1 ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i8> [[TMP4]], i8 [[TMP5]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = sitofp <2 x i8> [[TMP6]] to <2 x double> ; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x double> [[TMP7]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = fadd <2 x double> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = fmul <2 x double> [[TMP11]], [[TMP11]] -; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> , [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = fadd <2 x double> [[TMP12]], ; CHECK-NEXT: [[TMP14:%.*]] = fmul <2 x double> [[TMP13]], [[TMP13]] -; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> , [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = fadd <2 x double> [[TMP14]], ; CHECK-NEXT: [[TMP16:%.*]] = fmul <2 x double> [[TMP15]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> , [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = fadd <2 x double> [[TMP16]], ; CHECK-NEXT: [[TMP18:%.*]] = bitcast double* [[A:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP17]], <2 x double>* [[TMP18]], align 8 ; CHECK-NEXT: ret i32 undef Index: test/Transforms/SLPVectorizer/X86/loopinvariant.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/loopinvariant.ll +++ test/Transforms/SLPVectorizer/X86/loopinvariant.ll @@ -36,7 +36,7 @@ ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP13]], i32 [[N]], i32 5 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x i32> [[TMP14]], i32 [[N]], i32 6 ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x i32> [[TMP15]], i32 [[N]], i32 7 -; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP16]], [[TMP8]] +; CHECK-NEXT: [[TMP17:%.*]] = add nsw <8 x i32> [[TMP8]], [[TMP16]] ; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32* [[ARRAYIDX]] to <8 x i32>* ; CHECK-NEXT: store <8 x i32> [[TMP17]], <8 x i32>* [[TMP18]], align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 8 Index: test/Transforms/SLPVectorizer/X86/multi_block.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/multi_block.ll +++ test/Transforms/SLPVectorizer/X86/multi_block.ll @@ -26,10 +26,10 @@ ; CHECK-NEXT: br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]] ; CHECK: [[TMP6:%.*]] = tail call i32 (...) @foo() ; CHECK-NEXT: br label [[TMP7]] -; CHECK: [[TMP8:%.*]] = fadd <2 x float> , [[TMP3]] +; CHECK: [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8 ; CHECK-NEXT: [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double> -; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> , [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], ; CHECK-NEXT: [[TMP12:%.*]] = bitcast double* [[TMP9]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP12]], align 8 ; CHECK-NEXT: ret i32 undef Index: test/Transforms/SLPVectorizer/X86/multi_user.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/multi_user.ll +++ test/Transforms/SLPVectorizer/X86/multi_user.ll @@ -19,7 +19,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP1]], i32 2 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[TMP1]], i32 3 -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> , [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = add nsw <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* ; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 ; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP6]], [[TMP8]] Index: test/Transforms/SLPVectorizer/X86/operandorder.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/operandorder.ll +++ test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> undef, double [[V1:%.*]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[V2:%.*]], i32 1 -; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP4]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = fadd <2 x double> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[TO:%.*]] to <2 x double>* ; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 4 ; CHECK-NEXT: ret void Index: test/Transforms/SLPVectorizer/X86/phi.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/phi.ll +++ test/Transforms/SLPVectorizer/X86/phi.ll @@ -81,9 +81,9 @@ ; CHECK: for.body: ; CHECK-NEXT: [[I_019:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[ENTRY]] ], [ [[TMP5:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> , [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> , [[TMP3]] -; CHECK-NEXT: [[TMP5]] = fadd <2 x double> , [[TMP4]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5]] = fadd <2 x double> [[TMP4]], ; CHECK-NEXT: [[INC]] = add nsw i32 [[I_019]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -150,9 +150,9 @@ ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[R_052:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[ADD6:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP12:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP19:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = phi float [ [[TMP0]], [[ENTRY]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x float> [ [[REORDER_SHUFFLE]], [[ENTRY]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 7.000000e+00 ; CHECK-NEXT: [[ADD6]] = fadd float [[R_052]], [[MUL]] ; CHECK-NEXT: [[TMP7:%.*]] = add nsw i64 [[INDVARS_IV]], 2 @@ -163,27 +163,26 @@ ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[ARRAYIDX19]] to <2 x float>* ; CHECK-NEXT: [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4 ; CHECK-NEXT: [[REORDER_SHUFFLE1:%.*]] = shufflevector <2 x float> [[TMP10]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> , float [[TMP4]], i32 3 -; CHECK-NEXT: [[TMP12]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> undef, float [[TMP12]], i32 0 -; CHECK-NEXT: [[TMP14]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 -; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP8]], i32 2 -; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x float> [[TMP16]], float 8.000000e+00, i32 3 -; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[TMP11]], [[TMP17]] -; CHECK-NEXT: [[TMP19]] = fadd <4 x float> [[TMP6]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP20]], 121 +; CHECK-NEXT: [[TMP11]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> undef, float [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP13]] = extractelement <2 x float> [[REORDER_SHUFFLE1]], i32 1 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x float> [[TMP12]], float [[TMP13]], i32 1 +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x float> [[TMP14]], float [[TMP8]], i32 2 +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x float> [[TMP15]], float [[TMP4]], i32 3 +; CHECK-NEXT: [[TMP17:%.*]] = fmul <4 x float> [[TMP16]], +; CHECK-NEXT: [[TMP18]] = fadd <4 x float> [[TMP6]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP19]], 121 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP19]], i32 3 -; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP21]] -; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP19]], i32 2 -; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP22]] -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP19]], i32 1 -; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP23]] -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[TMP19]], i32 0 -; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP24]] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[TMP18]], i32 3 +; CHECK-NEXT: [[ADD28:%.*]] = fadd float [[ADD6]], [[TMP20]] +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[TMP18]], i32 2 +; CHECK-NEXT: [[ADD29:%.*]] = fadd float [[ADD28]], [[TMP21]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[TMP18]], i32 1 +; CHECK-NEXT: [[ADD30:%.*]] = fadd float [[ADD29]], [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[TMP18]], i32 0 +; CHECK-NEXT: [[ADD31:%.*]] = fadd float [[ADD30]], [[TMP23]] ; CHECK-NEXT: ret float [[ADD31]] ; entry: @@ -255,7 +254,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP5]], i32 2 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[TMP6]], float [[TMP7]], i32 3 -; CHECK-NEXT: [[TMP9]] = fmul <4 x float> , [[TMP8]] +; CHECK-NEXT: [[TMP9]] = fmul <4 x float> [[TMP8]], ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], 128 ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] Index: test/Transforms/SLPVectorizer/X86/pr35497.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/pr35497.ll +++ test/Transforms/SLPVectorizer/X86/pr35497.ll @@ -55,7 +55,7 @@ ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 5 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> undef, i64 [[TMP0]], i32 1 ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[TMP2]], ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 4 ; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw <2 x i64> [[TMP3]], zeroinitializer ; CHECK-NEXT: [[ARRAYIDX2_5:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 1 @@ -63,7 +63,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i64> undef, i64 [[TMP5]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> [[TMP6]], i64 [[ADD]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = shl <2 x i64> [[TMP7]], -; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> , [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i64> [[TMP8]], ; CHECK-NEXT: [[ARRAYIDX2_6:%.*]] = getelementptr inbounds [0 x i64], [0 x i64]* undef, i64 0, i64 0 ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[ARRAYIDX2_6]] to <2 x i64>* ; CHECK-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* [[TMP10]], align 1 Index: test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -88,7 +88,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -124,7 +124,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -160,7 +160,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add nuw <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -196,7 +196,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[IDX1]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -232,7 +232,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd nnan <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -268,7 +268,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -304,7 +304,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd fast <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void @@ -340,7 +340,7 @@ ; CHECK-NEXT: [[IDX4:%.*]] = getelementptr inbounds float, float* [[X]], i64 3 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> , [[TMP2]] +; CHECK-NEXT: [[TMP3:%.*]] = fadd arcp <4 x float> [[TMP2]], ; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[IDX1]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP3]], <4 x float>* [[TMP4]], align 4 ; CHECK-NEXT: ret void Index: test/Transforms/SLPVectorizer/X86/reduction.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reduction.ll +++ test/Transforms/SLPVectorizer/X86/reduction.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i32 [[MUL]] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>* ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 ; CHECK-NEXT: [[ADD5:%.*]] = fadd double [[TMP3]], [[TMP4]] Index: test/Transforms/SLPVectorizer/X86/reduction_loads.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reduction_loads.ll +++ test/Transforms/SLPVectorizer/X86/reduction_loads.ll @@ -14,10 +14,10 @@ ; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 7 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[BIN_EXTRA:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OP_EXTRA:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P]] to <8 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = mul <8 x i32> [[TMP1]], ; CHECK-NEXT: [[ADD:%.*]] = add i32 undef, [[SUM]] ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 undef, [[ADD]] ; CHECK-NEXT: [[ADD_2:%.*]] = add i32 undef, [[ADD_1]] @@ -32,11 +32,11 @@ ; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <8 x i32> [[BIN_RDX2]], <8 x i32> undef, <8 x i32> ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <8 x i32> [[BIN_RDX2]], [[RDX_SHUF3]] ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[BIN_RDX4]], i32 0 -; CHECK-NEXT: [[BIN_EXTRA]] = add i32 [[TMP3]], [[SUM]] +; CHECK-NEXT: [[OP_EXTRA]] = add i32 [[TMP3]], [[SUM]] ; CHECK-NEXT: [[ADD_7:%.*]] = add i32 undef, [[ADD_6]] ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[BIN_EXTRA]] +; CHECK-NEXT: ret i32 [[OP_EXTRA]] ; entry: %arrayidx.1 = getelementptr inbounds i32, i32* %p, i64 1 Index: test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll +++ test/Transforms/SLPVectorizer/X86/reorder_repeated_ops.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <2 x i32> , [[REORDER_SHUFFLE]] ; CHECK-NEXT: [[TMP4:%.*]] = sub <2 x i32> [[TMP3]], undef ; CHECK-NEXT: [[SHUFFLE8:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> , [[SHUFFLE8]] +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[SHUFFLE8]], ; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP14:%.*]] = icmp sgt i32 [[TMP12]], undef @@ -37,7 +37,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <2 x i32> undef, [[TMP2]] ; CHECK-NEXT: [[TMP8:%.*]] = sub <2 x i32> [[TMP7]], undef ; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <4 x i32> -; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> , [[SHUFFLE]] +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[SHUFFLE]], ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 undef, undef ; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP26]], i32 undef, i32 undef ; CHECK-NEXT: [[TMP28:%.*]] = icmp sgt i32 [[TMP27]], undef Index: test/Transforms/SLPVectorizer/X86/resched.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/resched.ll +++ test/Transforms/SLPVectorizer/X86/resched.ll @@ -72,7 +72,7 @@ ; CHECK-NEXT: [[TMP41:%.*]] = insertelement <16 x i32> [[TMP40]], i32 [[SHR_13_I_I]], i32 14 ; CHECK-NEXT: [[TMP42:%.*]] = insertelement <16 x i32> [[TMP41]], i32 [[SHR_14_I_I]], i32 15 ; CHECK-NEXT: [[TMP43:%.*]] = trunc <16 x i32> [[TMP42]] to <16 x i8> -; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> , [[TMP43]] +; CHECK-NEXT: [[TMP44:%.*]] = and <16 x i8> [[TMP43]], ; CHECK-NEXT: [[ARRAYIDX_I_I7_15_I_I:%.*]] = getelementptr inbounds %"struct.std::array", %"struct.std::array"* undef, i64 0, i32 0, i64 15 ; CHECK-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP44]], <16 x i8>* [[TMP45]], align 1 Index: test/Transforms/SLPVectorizer/X86/saxpy.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/saxpy.ll +++ test/Transforms/SLPVectorizer/X86/saxpy.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 1 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[A]], i32 2 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[A]], i32 3 -; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]] +; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP4]], [[TMP8]] ; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* ; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = add nsw <4 x i32> [[TMP9]], [[TMP11]] Index: test/Transforms/SLPVectorizer/X86/schedule-bundle.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/schedule-bundle.ll +++ test/Transforms/SLPVectorizer/X86/schedule-bundle.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([1 x i32]* @b to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> [[TMP0]], -; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([1 x i32]* @a to <4 x i32>*), align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr ([1 x i32], [1 x i32]* @b, i64 4, i64 0), align 4 ; CHECK-NEXT: [[DOTLOBIT_4:%.*]] = lshr i32 [[TMP3]], 31 Index: test/Transforms/SLPVectorizer/X86/simple-loop.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/simple-loop.ll +++ test/Transforms/SLPVectorizer/X86/simple-loop.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* ; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> , [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> , [[TMP6]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <4 x i32> [[TMP5]], +; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4 Index: test/Transforms/SLPVectorizer/X86/value-bug.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/value-bug.ll +++ test/Transforms/SLPVectorizer/X86/value-bug.ll @@ -33,9 +33,9 @@ ; CHECK-NEXT: br i1 undef, label [[BB32_I]], label [[BB21_I]] ; CHECK: exit: ; CHECK-NEXT: [[TMP9:%.*]] = fpext <2 x float> [[TMP3]] to <2 x double> -; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> , [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = fmul <2 x double> [[TMP9]], ; CHECK-NEXT: [[TMP11:%.*]] = fadd <2 x double> undef, [[TMP10]] -; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> undef, [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <2 x double> [[TMP11]], undef ; CHECK-NEXT: [[TMP13]] = fptrunc <2 x double> [[TMP12]] to <2 x float> ; CHECK-NEXT: br label [[BB283]] ; Index: test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll +++ test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -136,7 +136,7 @@ ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -174,7 +174,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* ; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -422,7 +422,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -546,7 +546,7 @@ ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -584,7 +584,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd fast <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -751,7 +751,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -875,7 +875,7 @@ ; CHECK-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void @@ -913,7 +913,7 @@ ; CHECK-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds float, float* [[DST]], i64 3 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[SRC]] to <4 x float>* ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> , [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[DST]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 ; CHECK-NEXT: ret void Index: test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -91,7 +91,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef @@ -169,7 +169,7 @@ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 -; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[TMP9]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] ; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 undef, undef ; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 undef, i32 undef ; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], undef