diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1417,7 +1417,11 @@ HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane); } else if (NumFreeOpsHash.NumOfAPOs == Min && NumFreeOpsHash.NumOpsWithSameOpcodeParent == SameOpNumber) { - ++HashMap[NumFreeOpsHash.Hash].first; + auto It = HashMap.find(NumFreeOpsHash.Hash); + if (It == HashMap.end()) + HashMap[NumFreeOpsHash.Hash] = std::make_pair(1, Lane); + else + ++It->second.first; } } // Select the lane with the minimum counter. diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose-inseltpoison.ll @@ -68,10 +68,11 @@ ; CHECK-LABEL: @build_vec_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP6]] ; %v0.0 = extractelement <4 x i32> %v0, i32 0 %v0.1 = extractelement <4 x i32> %v0, i32 1 @@ -207,8 +208,8 @@ ; CHECK-LABEL: @reduction_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll --- a/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/transpose.ll @@ -68,10 +68,11 @@ ; CHECK-LABEL: @build_vec_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] -; CHECK-NEXT: ret <4 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP6]] ; %v0.0 = extractelement <4 x i32> %v0, i32 0 %v0.1 = extractelement <4 x i32> %v0, i32 1 @@ -207,8 +208,8 @@ ; CHECK-LABEL: @reduction_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[V0]], [[V1]] -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = lshr <4 x i32> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[TMP6]], diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP3]], i32 1 ; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> poison, [[TMP6]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], poison ; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], poison ; CHECK-NEXT: [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], poison ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i32 0 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll @@ -16,24 +16,23 @@ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[ADD78_1]], i32 3 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[SUB86_1]], i32 4 ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 [[ADD78_2]], i32 5 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP5]], <16 x i32> poison, <16 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD78_1]], i32 1 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD94_1]], i32 2 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_1]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[ADD78_2]], i32 4 -; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[SUB102_3]], i32 5 -; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> poison, <16 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP13:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] -; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i32> [[TMP12]], <16 x i32> [[TMP13]], <16 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = lshr <16 x i32> [[TMP14]], -; CHECK-NEXT: [[TMP16:%.*]] = and <16 x i32> [[TMP15]], -; CHECK-NEXT: [[TMP17:%.*]] = mul nuw <16 x i32> [[TMP16]], -; CHECK-NEXT: [[TMP18:%.*]] = add <16 x i32> [[TMP17]], [[TMP14]] -; CHECK-NEXT: [[TMP19:%.*]] = xor <16 x i32> [[TMP18]], [[TMP17]] -; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP19]]) -; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP20]], 16 +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 [[SUB102_3]], i32 4 +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP12:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x i32> [[TMP11]], <16 x i32> [[TMP12]], <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = lshr <16 x i32> [[TMP13]], +; CHECK-NEXT: [[TMP15:%.*]] = and <16 x i32> [[TMP14]], +; CHECK-NEXT: [[TMP16:%.*]] = mul nuw <16 x i32> [[TMP15]], +; CHECK-NEXT: [[TMP17:%.*]] = add <16 x i32> [[TMP16]], [[TMP13]] +; CHECK-NEXT: [[TMP18:%.*]] = xor <16 x i32> [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP18]]) +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP19]], 16 ; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]] ; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 ; CHECK-NEXT: ret i32 [[SHR120]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/operandorder.ll @@ -421,7 +421,7 @@ ; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[D:%.*]] to <4 x float>* ; CHECK-NEXT: [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP5]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], [[TMP5]] ; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[A:%.*]] to <4 x float>* ; CHECK-NEXT: store <4 x float> [[TMP8]], <4 x float>* [[TMP9]], align 4 ; CHECK-NEXT: ret void