Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -30,9 +30,9 @@ static cl::opt SLPMaxVF( "riscv-v-slp-max-vf", cl::desc( - "Result used for getMaximumVF query which is used exclusively by " - "SLP vectorizer. Defaults to 1 which disables SLP."), - cl::init(1), cl::Hidden); + "Overrides result used for getMaximumVF query which is used " + "exclusively by SLP vectorizer."), + cl::Hidden); InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) { // TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is @@ -1740,12 +1740,19 @@ } unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { - // This interface is currently only used by SLP. Returning 1 (which is the - // default value for SLPMaxVF) disables SLP. We currently have a cost modeling - // problem w/ constant materialization which causes SLP to perform majorly - // unprofitable transformations. - // TODO: Figure out constant materialization cost modeling and remove. - return SLPMaxVF; + if (SLPMaxVF.getNumOccurrences()) + return SLPMaxVF; + + // Return how many elements can fit in getRegisterBitwidth. This is the + // same routine as used in LoopVectorizer. We should probably be + // accounting for whether we actually have instructions with the right + // lane type, but we don't have enough information to do that without + // some additional plumbing which hasn't been justified yet. + TypeSize RegWidth = + getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector); + // If no vector registers, or absurd element widths, disable + // vectorization by returning 1. + return std::max(1UL, RegWidth.getFixedValue() / ElemWidth); } bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1, Index: llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll +++ llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll @@ -18,31 +18,10 @@ ; DEFAULT-LABEL: define void @fp_add ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[F0:%.*]] = load float, ptr [[Q]], align 4 -; DEFAULT-NEXT: [[PF1:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load float, ptr [[PF1]], align 4 -; DEFAULT-NEXT: [[PF2:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 2 -; DEFAULT-NEXT: [[F2:%.*]] = load float, ptr [[PF2]], align 4 -; DEFAULT-NEXT: [[PF3:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 3 -; DEFAULT-NEXT: [[F3:%.*]] = load float, ptr [[PF3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = fadd float [[E0]], [[F0]] -; DEFAULT-NEXT: [[A1:%.*]] = fadd float [[E1]], [[F1]] -; DEFAULT-NEXT: [[A2:%.*]] = fadd float [[E2]], [[F2]] -; DEFAULT-NEXT: [[A3:%.*]] = fadd float [[E3]], [[F3]] -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] +; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -90,24 +69,9 @@ ; DEFAULT-LABEL: define void @fp_sub ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = fsub float [[E0]], 3.000000e+00 -; DEFAULT-NEXT: [[A1:%.*]] = fsub float [[E1]], 3.000000e+00 -; DEFAULT-NEXT: [[A2:%.*]] = fsub float [[E2]], 3.000000e+00 -; DEFAULT-NEXT: [[A3:%.*]] = fsub float [[E3]], 3.000000e+00 -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], +; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -148,31 +112,10 @@ ; DEFAULT-LABEL: define void @fp_mul ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[F0:%.*]] = load float, ptr [[Q]], align 4 -; DEFAULT-NEXT: [[PF1:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load float, ptr [[PF1]], align 4 -; DEFAULT-NEXT: [[PF2:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 2 -; DEFAULT-NEXT: [[F2:%.*]] = load float, ptr [[PF2]], align 4 -; DEFAULT-NEXT: [[PF3:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 3 -; DEFAULT-NEXT: [[F3:%.*]] = load float, ptr [[PF3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = fmul float [[E0]], [[F0]] -; DEFAULT-NEXT: [[A1:%.*]] = fmul float [[E1]], [[F1]] -; DEFAULT-NEXT: [[A2:%.*]] = fmul float [[E2]], [[F2]] -; DEFAULT-NEXT: [[A3:%.*]] = fmul float [[E3]], [[F3]] -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]] +; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -220,24 +163,9 @@ ; DEFAULT-LABEL: define void @fp_div ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = fdiv float [[E0]], 1.050000e+01 -; DEFAULT-NEXT: [[A1:%.*]] = fdiv float [[E1]], 1.050000e+01 -; DEFAULT-NEXT: [[A2:%.*]] = fdiv float [[E2]], 1.050000e+01 -; DEFAULT-NEXT: [[A3:%.*]] = fdiv float [[E3]], 1.050000e+01 -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], +; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -280,31 +208,10 @@ ; DEFAULT-LABEL: define void @fp_max ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[F0:%.*]] = load float, ptr [[Q]], align 4 -; DEFAULT-NEXT: [[PF1:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load float, ptr [[PF1]], align 4 -; DEFAULT-NEXT: [[PF2:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 2 -; DEFAULT-NEXT: [[F2:%.*]] = load float, ptr [[PF2]], align 4 -; DEFAULT-NEXT: [[PF3:%.*]] = getelementptr inbounds float, ptr [[Q]], i64 3 -; DEFAULT-NEXT: [[F3:%.*]] = load float, ptr [[PF3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = tail call float @llvm.maxnum.f32(float [[E0]], float [[F0]]) -; DEFAULT-NEXT: [[A1:%.*]] = tail call float @llvm.maxnum.f32(float [[E1]], float [[F1]]) -; DEFAULT-NEXT: [[A2:%.*]] = tail call float @llvm.maxnum.f32(float [[E2]], float [[F2]]) -; DEFAULT-NEXT: [[A3:%.*]] = tail call float @llvm.maxnum.f32(float [[E3]], float [[F3]]) -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +; DEFAULT-NEXT: store <4 x float> [[TMP2]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -354,24 +261,9 @@ ; DEFAULT-LABEL: define void @fp_min ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = tail call float @llvm.minnum.f32(float [[E0]], float 1.250000e+00) -; DEFAULT-NEXT: [[A1:%.*]] = tail call float @llvm.minnum.f32(float [[E1]], float 1.250000e+00) -; DEFAULT-NEXT: [[A2:%.*]] = tail call float @llvm.minnum.f32(float [[E2]], float 1.250000e+00) -; DEFAULT-NEXT: [[A3:%.*]] = tail call float @llvm.minnum.f32(float [[E3]], float 1.250000e+00) -; DEFAULT-NEXT: store float [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store float [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store float [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store float [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> ) +; DEFAULT-NEXT: store <4 x float> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -413,24 +305,9 @@ ; DEFAULT-LABEL: define void @fp_convert ; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] { ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load float, ptr [[P]], align 4 -; DEFAULT-NEXT: [[PE1:%.*]] = getelementptr inbounds float, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load float, ptr [[PE1]], align 4 -; DEFAULT-NEXT: [[PE2:%.*]] = getelementptr inbounds float, ptr [[P]], i64 2 -; DEFAULT-NEXT: [[E2:%.*]] = load float, ptr [[PE2]], align 4 -; DEFAULT-NEXT: [[PE3:%.*]] = getelementptr inbounds float, ptr [[P]], i64 3 -; DEFAULT-NEXT: [[E3:%.*]] = load float, ptr [[PE3]], align 4 -; DEFAULT-NEXT: [[A0:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[E0]]) -; DEFAULT-NEXT: [[A1:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[E1]]) -; DEFAULT-NEXT: [[A2:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[E2]]) -; DEFAULT-NEXT: [[A3:%.*]] = tail call i32 @llvm.fptosi.sat.i32.f32(float [[E3]]) -; DEFAULT-NEXT: store i32 [[A0]], ptr [[DST]], align 4 -; DEFAULT-NEXT: [[PA1:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 1 -; DEFAULT-NEXT: store i32 [[A1]], ptr [[PA1]], align 4 -; DEFAULT-NEXT: [[PA2:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 2 -; DEFAULT-NEXT: store i32 [[A2]], ptr [[PA2]], align 4 -; DEFAULT-NEXT: [[PA3:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 3 -; DEFAULT-NEXT: store i32 [[A3]], ptr [[PA3]], align 4 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) +; DEFAULT-NEXT: store <4 x i32> [[TMP1]], ptr [[DST]], align 4 ; DEFAULT-NEXT: ret void ; entry: Index: llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll +++ llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll @@ -13,14 +13,9 @@ ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = add i16 [[E0]], 1 -; DEFAULT-NEXT: [[A1:%.*]] = add i16 [[E1]], 1 -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -47,14 +42,9 @@ ; ; DEFAULT-LABEL: @vec_sub( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = sub i16 [[E0]], 17 -; DEFAULT-NEXT: [[A1:%.*]] = sub i16 [[E1]], 17 -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -81,14 +71,9 @@ ; ; DEFAULT-LABEL: @vec_rsub( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = sub i16 29, [[E0]] -; DEFAULT-NEXT: [[A1:%.*]] = sub i16 29, [[E1]] -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -115,14 +100,9 @@ ; ; DEFAULT-LABEL: @vec_mul( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = mul i16 [[E0]], 7 -; DEFAULT-NEXT: [[A1:%.*]] = mul i16 [[E1]], 7 -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -149,14 +129,9 @@ ; ; DEFAULT-LABEL: @vec_sdiv( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = sdiv i16 [[E0]], 7 -; DEFAULT-NEXT: [[A1:%.*]] = sdiv i16 [[E1]], 7 -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -184,17 +159,10 @@ ; ; DEFAULT-LABEL: @vec_and( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 -; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = and i16 [[E0]], [[F0]] -; DEFAULT-NEXT: [[A1:%.*]] = and i16 [[E1]], [[F1]] -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = and <2 x i16> [[TMP0]], [[TMP1]] +; DEFAULT-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -226,17 +194,10 @@ ; ; DEFAULT-LABEL: @vec_or( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 -; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = or i16 [[E0]], [[F0]] -; DEFAULT-NEXT: [[A1:%.*]] = or i16 [[E1]], [[F1]] -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = or <2 x i16> [[TMP0]], [[TMP1]] +; DEFAULT-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -268,17 +229,10 @@ ; ; DEFAULT-LABEL: @vec_sll( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 -; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = shl i16 [[E0]], [[F0]] -; DEFAULT-NEXT: [[A1:%.*]] = shl i16 [[E1]], [[F1]] -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = shl <2 x i16> [[TMP0]], [[TMP1]] +; DEFAULT-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -311,17 +265,10 @@ ; ; DEFAULT-LABEL: @vec_smin( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 -; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E0]], i16 [[F0]]) -; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E1]], i16 [[F1]]) -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; DEFAULT-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -354,17 +301,10 @@ ; ; DEFAULT-LABEL: @vec_umax( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 -; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 -; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E0]], i16 [[F0]]) -; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E1]], i16 [[F1]]) -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; DEFAULT-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: Index: llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll +++ llvm/test/Transforms/SLPVectorizer/RISCV/load-store.ll @@ -13,12 +13,8 @@ ; ; DEFAULT-LABEL: @simple_copy( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: store i16 [[E0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[E1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: store <2 x i16> [[TMP0]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -42,14 +38,9 @@ ; ; DEFAULT-LABEL: @vec_add( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 -; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 -; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 -; DEFAULT-NEXT: [[A0:%.*]] = add i16 [[E0]], 1 -; DEFAULT-NEXT: [[A1:%.*]] = add i16 [[E1]], 1 -; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; DEFAULT-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -171,13 +162,7 @@ ; ; DEFAULT-LABEL: @splat_store_i32_zero( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: store i32 0, ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i32 0, ptr [[INC1]], align 2 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2 -; DEFAULT-NEXT: store i32 0, ptr [[INC2]], align 2 -; DEFAULT-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3 -; DEFAULT-NEXT: store i32 0, ptr [[INC3]], align 2 +; DEFAULT-NEXT: store <4 x i32> zeroinitializer, ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: @@ -199,13 +184,7 @@ ; ; DEFAULT-LABEL: @splat_store_i32_one( ; DEFAULT-NEXT: entry: -; DEFAULT-NEXT: store i32 1, ptr [[DEST:%.*]], align 4 -; DEFAULT-NEXT: [[INC1:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 1 -; DEFAULT-NEXT: store i32 1, ptr [[INC1]], align 2 -; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 2 -; DEFAULT-NEXT: store i32 1, ptr [[INC2]], align 2 -; DEFAULT-NEXT: [[INC3:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 3 -; DEFAULT-NEXT: store i32 1, ptr [[INC3]], align 2 +; DEFAULT-NEXT: store <4 x i32> , ptr [[DEST:%.*]], align 4 ; DEFAULT-NEXT: ret void ; entry: Index: llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll =================================================================== --- llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll +++ llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll @@ -19,19 +19,8 @@ ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1:[0-9]+]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 -; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @fabsf(float [[VECEXT]]) -; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 -; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @fabsf(float [[VECEXT_1]]) -; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @fabsf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @fabsf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) +; DEFAULT-NEXT: ret <4 x float> [[TMP1]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -64,19 +53,8 @@ ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 -; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.fabs.f32(float [[VECEXT]]) -; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 -; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.fabs.f32(float [[VECEXT_1]]) -; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.fabs.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.fabs.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) +; DEFAULT-NEXT: ret <4 x float> [[TMP1]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -109,19 +87,8 @@ ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 -; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @sqrtf(float [[VECEXT]]) -; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 -; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sqrtf(float [[VECEXT_1]]) -; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sqrtf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sqrtf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) +; DEFAULT-NEXT: ret <4 x float> [[TMP1]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -154,19 +121,8 @@ ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] { ; DEFAULT-NEXT: entry: ; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16 -; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0 -; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.sqrt.f32(float [[VECEXT]]) -; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0 -; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 -; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sqrt.f32(float [[VECEXT_1]]) -; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sqrt.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sqrt.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) +; DEFAULT-NEXT: ret <4 x float> [[TMP1]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -215,13 +171,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -270,13 +224,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -325,13 +277,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -380,13 +330,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -435,13 +383,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16 @@ -490,13 +436,11 @@ ; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1 ; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) ; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1 -; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2 -; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]]) -; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2 -; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3 -; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]]) -; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3 -; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]] +; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> +; DEFAULT-NEXT: [[TMP4:%.*]] = call fast <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP3]]) +; DEFAULT-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> +; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[VECINS_1]], <4 x float> [[TMP5]], <4 x i32> +; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]] ; entry: %0 = load <4 x float>, ptr %a, align 16