diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1108,6 +1108,10 @@ setOperationAction(ISD::STORE, MVT::v4i16, Custom); setOperationAction(ISD::STORE, MVT::v8i8, Custom); + // Add 32-bit vector stores to help vectorization opportunities. + setOperationAction(ISD::STORE, MVT::v2i16, Custom); + setOperationAction(ISD::STORE, MVT::v4i8, Custom); + setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); @@ -25486,6 +25490,9 @@ return SDValue(); } + if (StoreVT.is32BitVector()) + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); assert(StoreVT.is64BitVector() && "Unexpected VT"); assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-load.ll @@ -14,28 +14,10 @@ define void @add4(ptr noalias nocapture noundef %r, ptr noalias nocapture noundef readonly %a) { ; CHECK-LABEL: @add4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[R:%.*]], align 1 -; CHECK-NEXT: [[ADD:%.*]] = add i8 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i8 [[ADD]], ptr [[R]], align 1 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2_1]], align 1 -; CHECK-NEXT: [[ADD_1:%.*]] = add i8 [[TMP3]], [[TMP2]] -; CHECK-NEXT: store i8 [[ADD_1]], ptr [[ARRAYIDX2_1]], align 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 2 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2_2]], align 1 -; CHECK-NEXT: [[ADD_2:%.*]] = add i8 [[TMP5]], [[TMP4]] -; CHECK-NEXT: store i8 [[ADD_2]], ptr [[ARRAYIDX2_2]], align 1 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX2_3]], align 1 -; CHECK-NEXT: [[ADD_3:%.*]] = add i8 [[TMP7]], [[TMP6]] -; CHECK-NEXT: store i8 [[ADD_3]], ptr [[ARRAYIDX2_3]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[R:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i8> [[TMP1]], [[TMP0]] +; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[R]], align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-and-const-load.ll @@ -14,24 +14,9 @@ define void @and4(ptr noalias nocapture noundef writeonly %dst, ptr noalias nocapture noundef readonly %src) { ; CHECK-LABEL: @and4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[SRC:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[TMP0]], -64 -; CHECK-NEXT: store i8 [[TMP1]], ptr [[DST:%.*]], align 1 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1 -; CHECK-NEXT: [[TMP3:%.*]] = and i8 [[TMP2]], -64 -; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1 -; CHECK-NEXT: store i8 [[TMP3]], ptr [[ARRAYIDX3_1]], align 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1 -; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[TMP4]], -64 -; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2 -; CHECK-NEXT: store i8 [[TMP5]], ptr [[ARRAYIDX3_2]], align 1 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[TMP6]], -64 -; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3 -; CHECK-NEXT: store i8 [[TMP7]], ptr [[ARRAYIDX3_3]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[SRC:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i8> [[TMP0]], +; CHECK-NEXT: store <4 x i8> [[TMP1]], ptr [[DST:%.*]], align 1 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-load.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-load.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-load.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-load.ll @@ -14,28 +14,10 @@ define void @add4(ptr noalias nocapture noundef %r, ptr noalias nocapture noundef readonly %a) { ; CHECK-LABEL: @add4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A:%.*]], align 1 -; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[R:%.*]], align 1 -; CHECK-NEXT: [[MUL:%.*]] = mul i8 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i8 [[MUL]], ptr [[R]], align 1 -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX_1]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX2_1]], align 1 -; CHECK-NEXT: [[MUL_1:%.*]] = mul i8 [[TMP3]], [[TMP2]] -; CHECK-NEXT: store i8 [[MUL_1]], ptr [[ARRAYIDX2_1]], align 1 -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 2 -; CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[ARRAYIDX2_2]], align 1 -; CHECK-NEXT: [[MUL_2:%.*]] = mul i8 [[TMP5]], [[TMP4]] -; CHECK-NEXT: store i8 [[MUL_2]], ptr [[ARRAYIDX2_2]], align 1 -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX_3]], align 1 -; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i8, ptr [[R]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[ARRAYIDX2_3]], align 1 -; CHECK-NEXT: [[MUL_3:%.*]] = mul i8 [[TMP7]], [[TMP6]] -; CHECK-NEXT: store i8 [[MUL_3]], ptr [[ARRAYIDX2_3]], align 1 +; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[R:%.*]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[TMP1]], [[TMP0]] +; CHECK-NEXT: store <4 x i8> [[TMP2]], ptr [[R]], align 1 ; CHECK-NEXT: ret void ; entry: