diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -440,8 +440,19 @@ TTI::TargetCostKind CostKind, TTI::OperandValueKind OpdInfo, const Instruction *I) { - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind, - OpdInfo, I); + InstructionCost Cost = 0; + if (Opcode == Instruction::Store && isa(Src) && + (OpdInfo == TTI::OK_UniformConstantValue || + OpdInfo == TTI::OK_NonUniformConstantValue)) { + APInt PseudoAddr = APInt::getAllOnes(DL.getPointerSizeInBits()); + // Add a cost of address load + the cost of the vector load. + Cost += RISCVMatInt::getIntMatCost(PseudoAddr, DL.getPointerSizeInBits(), + getST()->getFeatureBits()) + + getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src), + /*AddressSpace=*/0, CostKind); + } + return Cost + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, OpdInfo, I); } void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1079,7 +1079,8 @@ } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, + Opd1PropInfo, Opd2PropInfo, Args, CxtI); } InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, @@ -4077,11 +4078,21 @@ auto *VTy = dyn_cast(Src); + InstructionCost Cost = 0; + + // Add a cost for constant load to vector. + if (Opcode == Instruction::Store && + (OpdInfo == TTI::OK_UniformConstantValue || + OpdInfo == TTI::OK_NonUniformConstantValue)) + Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src), + /*AddressSpace=*/0, CostKind); + // Handle the simple case of non-vectors. // NOTE: this assumes that legalization never creates vector from scalars! - if (!VTy || !LT.second.isVector()) + if (!VTy || !LT.second.isVector()) { // Each load/store unit costs 1. - return LT.first * 1; + return (LT.second.isFloatingPoint() ? Cost : 0) + LT.first * 1; + } bool IsLoad = Opcode == Instruction::Load; @@ -4089,8 +4100,6 @@ const int EltTyBits = DL.getTypeSizeInBits(EltTy); - InstructionCost Cost = 0; - // Source of truth: how many elements were there in the original IR vector? const unsigned SrcNumElt = VTy->getNumElements(); diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6414,6 +6414,12 @@ CommonCost -= (EntryVF - VL.size()) * ScalarEltCost; } InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; + for (unsigned I = 0, Num = VL0->getNumOperands(); I < Num; ++I) { + if (all_of(VL, [I](Value *V) { + return isConstant(cast(V)->getOperand(I)); + })) + Operands[I] = ConstantVector::getNullValue(VecTy); + } InstructionCost VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -626,9 +626,9 @@ define i32 @frem(i32 %arg) { ; SSE1-LABEL: 'frem' ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef -; SSE1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = frem <8 x float> undef, undef +; SSE1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = frem <16 x float> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = frem <2 x double> undef, undef ; SSE1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = frem <4 x double> undef, undef @@ -637,68 +637,68 @@ ; ; SSE2-LABEL: 'frem' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = frem <8 x float> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = frem <16 x float> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef -; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <4 x double> undef, undef +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = frem <8 x double> undef, undef ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SSE42-LABEL: 'frem' ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = frem <8 x float> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = frem <16 x float> undef, undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef -; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <4 x double> undef, undef +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = frem <8 x double> undef, undef ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX-LABEL: 'frem' ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'frem' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'frem' ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = frem <8 x float> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = frem <16 x float> undef, undef ; SLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef -; SLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <4 x double> undef, undef +; SLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = frem <8 x double> undef, undef ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; GLM-LABEL: 'frem' ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = frem <8 x float> undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = frem <16 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = frem <8 x float> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = frem <16 x float> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = frem <4 x double> undef, undef -; GLM-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = frem <8 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <4 x double> undef, undef +; GLM-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = frem <8 x double> undef, undef ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = frem float undef, undef diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll @@ -12,11 +12,13 @@ define void @foo(i64* nocapture writeonly %da) { ; CHECK-128-LABEL: @foo( ; CHECK-128-NEXT: entry: -; CHECK-128-NEXT: [[TMP0:%.*]] = bitcast i64* [[DA:%.*]] to <2 x i64>* -; CHECK-128-NEXT: store <2 x i64> , <2 x i64>* [[TMP0]], align 8 +; CHECK-128-NEXT: store i64 0, i64* [[DA:%.*]], align 8 +; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 1 +; CHECK-128-NEXT: store i64 1, i64* [[ARRAYIDX1]], align 8 ; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 2 -; CHECK-128-NEXT: [[TMP1:%.*]] = bitcast i64* [[ARRAYIDX2]] to <2 x i64>* -; CHECK-128-NEXT: store <2 x i64> , <2 x i64>* [[TMP1]], align 8 +; CHECK-128-NEXT: store i64 2, i64* [[ARRAYIDX2]], align 8 +; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, i64* [[DA]], i64 3 +; CHECK-128-NEXT: store i64 3, i64* [[ARRAYIDX3]], align 8 ; CHECK-128-NEXT: ret void ; ; CHECK-256-LABEL: @foo( @@ -45,8 +47,9 @@ define void @foo8(i8* nocapture writeonly %da) { ; CHECK-LABEL: @foo8( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[DA:%.*]] to <2 x i8>* -; CHECK-NEXT: store <2 x i8> , <2 x i8>* [[TMP0]], align 8 +; CHECK-NEXT: store i8 0, i8* [[DA:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 1 +; CHECK-NEXT: store i8 1, i8* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, i8* [[DA]], i8 2 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_bullet.ll @@ -14,18 +14,19 @@ ; CHECK-NEXT: ret void ; CHECK: if.else: ; CHECK-NEXT: [[M_NUMCONSTRAINTROWS4:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO:%.*]], i64 0, i32 0 +; CHECK-NEXT: [[NUB5:%.*]] = getelementptr inbounds %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960", %"struct.btTypedConstraint::btConstraintInfo1.17.157.357.417.477.960"* [[INFO]], i64 0, i32 1 ; CHECK-NEXT: br i1 undef, label [[LAND_LHS_TRUE_I_1:%.*]], label [[IF_THEN7_1:%.*]] ; CHECK: land.lhs.true.i.1: ; CHECK-NEXT: br i1 undef, label [[FOR_INC_1:%.*]], label [[IF_THEN7_1]] ; CHECK: if.then7.1: -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[M_NUMCONSTRAINTROWS4]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> , <2 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: store i32 1, i32* [[M_NUMCONSTRAINTROWS4]], align 4 +; CHECK-NEXT: store i32 5, i32* [[NUB5]], align 4 ; CHECK-NEXT: br label [[FOR_INC_1]] ; CHECK: for.inc.1: -; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ , [[IF_THEN7_1]] ], [ , [[LAND_LHS_TRUE_I_1]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add nsw <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[M_NUMCONSTRAINTROWS4]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ , [[IF_THEN7_1]] ], [ , [[LAND_LHS_TRUE_I_1]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add nsw <2 x i32> [[TMP0]], +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[M_NUMCONSTRAINTROWS4]] to <2 x i32>* +; CHECK-NEXT: store <2 x i32> [[TMP1]], <2 x i32>* [[TMP2]], align 4 ; CHECK-NEXT: unreachable ; entry: