diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2125,8 +2125,9 @@ /// Return information about the vector formed for the specified index /// of a vector of (the same) instruction. + /// \param EnableFP - If true, check for float constants. TargetTransformInfo::OperandValueInfo - getOperandInfo(ArrayRef VL, unsigned OpIdx); + getOperandInfo(ArrayRef VL, unsigned OpIdx, bool EnableFP); /// \returns the cost of the vectorizable entry. InstructionCost getEntryCost(const TreeEntry *E, @@ -5814,30 +5815,44 @@ return I->getOpcode() == AltOp->getOpcode(); } -TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef VL, unsigned OpIdx) { - +TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef VL, + unsigned OpIdx, bool EnableFP) { TTI::OperandValueKind VK = TTI::OK_UniformConstantValue; TTI::OperandValueProperties VP = TTI::OP_PowerOf2; + // If all float operands are constants then set the operand kind to + // OK_NonUniformConstantValue. Otherwise, return OK_AnyValue. + const auto *I0 = cast(VL.front()); + if (I0->getOperand(OpIdx)->getType()->isFloatingPointTy()) { + if (!EnableFP || any_of(VL, [OpIdx, I0](Value *V) { + const auto *Inst = cast(V); + assert(Inst->getOpcode() == I0->getOpcode() && + "Expected same opcode"); + return !isConstant(Inst->getOperand(OpIdx)); + })) + return {TTI::OK_AnyValue, TTI::OP_None}; + return {TTI::OK_NonUniformConstantValue, TTI::OP_None}; + } + // If all operands are exactly the same ConstantInt then set the // operand kind to OK_UniformConstantValue. // If instead not all operands are constants, then set the operand kind // to OK_AnyValue. If all operands are constants but not the same, // then set the operand kind to OK_NonUniformConstantValue. ConstantInt *CInt0 = nullptr; - for (unsigned i = 0, e = VL.size(); i < e; ++i) { - const Instruction *I = cast(VL[i]); - assert(I->getOpcode() == cast(VL[0])->getOpcode()); - ConstantInt *CInt = dyn_cast(I->getOperand(OpIdx)); + for (Value *V : VL) { + const auto *Inst = cast(V); + assert(Inst->getOpcode() == cast(VL[0])->getOpcode() && + "Expected same opcode"); + auto *CInt = dyn_cast(Inst->getOperand(OpIdx)); if (!CInt) { VK = TTI::OK_AnyValue; VP = TTI::OP_None; break; } - if (VP == TTI::OP_PowerOf2 && - !CInt->getValue().isPowerOf2()) + if (VP == TTI::OP_PowerOf2 && !CInt->getValue().isPowerOf2()) VP = TTI::OP_None; - if (i == 0) { + if (!CInt0) { CInt0 = CInt; continue; } @@ -6415,7 +6430,8 @@ // Certain instructions can be cheaper to vectorize if they have a // constant second vector operand. const unsigned OpIdx = isa(VL0) ? 1 : 0; - auto Op2Info = getOperandInfo(VL, OpIdx); + // TODO: impact of enabling the analysis there is yet to be determined + auto Op2Info = getOperandInfo(VL, OpIdx, /*EnableFP=*/false); SmallVector Operands(VL0->operand_values()); InstructionCost ScalarEltCost = @@ -6500,19 +6516,12 @@ auto *SI = cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); Align Alignment = SI->getAlign(); - TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(SI->getOperand(0)); + TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0, /*EnableFP=*/true); InstructionCost ScalarEltCost = TTI->getMemoryOpCost( Instruction::Store, ScalarTy, Alignment, 0, CostKind, OpInfo, VL0); InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost; TTI::OperandValueKind OpVK = TTI::OK_AnyValue; - if (all_of(E->Scalars, - [](Value *V) { - return isConstant(cast(V)->getOperand(0)); - }) && - any_of(E->Scalars, [](Value *V) { - Value *Op = cast(V)->getOperand(0); - return !isa(Op) && !cast(Op)->isZeroValue(); - })) + if (OpInfo.isConstant()) OpVK = TTI::OK_NonUniformConstantValue; InstructionCost VecStCost = TTI->getMemoryOpCost( Instruction::Store, VecTy, Alignment, 0, CostKind, diff --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll @@ -5,9 +5,7 @@ ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = alloca { double, double }, align 8 -; CHECK-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[C]], i64 0, i32 1 -; CHECK-NEXT: store double 0.000000e+00, ptr [[C]], align 8 -; CHECK-NEXT: store double 1.000000e+00, ptr [[C_IMAGP]], align 8 +; CHECK-NEXT: store <2 x double> , ptr [[C]], align 8 ; CHECK-NEXT: ret void ; entry: