Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2390,6 +2390,7 @@ return; } case Instruction::Select: + case Instruction::FNeg: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -2409,7 +2410,7 @@ case Instruction::Or: case Instruction::Xor: { auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); - LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); + LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); // Sort operands of the instructions so that each side is more likely to // have the same opcode. @@ -2881,6 +2882,7 @@ int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } + case Instruction::FNeg: case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: @@ -2918,7 +2920,8 @@ ConstantInt *CInt0 = nullptr; for (unsigned i = 0, e = VL.size(); i < e; ++i) { const Instruction *I = cast(VL[i]); - ConstantInt *CInt = dyn_cast(I->getOperand(1)); + unsigned OpIdx = isa(I) ? 1 : 0; + ConstantInt *CInt = dyn_cast(I->getOperand(OpIdx)); if (!CInt) { Op2VK = TargetTransformInfo::OK_AnyValue; Op2VP = TargetTransformInfo::OP_None; @@ -3698,6 +3701,31 @@ ++NumVectorInstructions; return V; } + case Instruction::FNeg: { + setInsertPointAfterBundle(E->Scalars, S); + + Value *Op = vectorizeTree(E->getOperand(0)); + + if (E->VectorizedValue) { + LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n"); + return E->VectorizedValue; + } + + Value *V = Builder.CreateUnOp( + static_cast(S.getOpcode()), Op); + propagateIRFlags(V, E->Scalars, VL0); + if (auto *I = dyn_cast(V)) + V = propagateMetadata(I, E->Scalars); + + if (NeedToShuffleReuses) { + V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), + E->ReuseShuffleIndices, "shuffle"); + } + E->VectorizedValue = V; + ++NumVectorInstructions; + + return V; + } case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/phi3.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/phi3.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/phi3.ll @@ -54,13 +54,11 @@ define void @Rf_GReset_unary_fneg() { ; CHECK-LABEL: @Rf_GReset_unary_fneg( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = fneg double undef ; CHECK-NEXT: [[TMP0:%.*]] = load double, double* @d, align 8 -; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[TMP0]], i32 1 +; CHECK-NEXT: [[TMP2:%.*]] = fneg <2 x double> [[TMP1]] ; CHECK-NEXT: br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label [[IF_THEN:%.*]], label [[IF_END7:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[SUB]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[SUB1]], i32 1 ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x double> [[TMP2]], undef ; CHECK-NEXT: [[TMP4:%.*]] = fdiv <2 x double> [[TMP3]], undef ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP4]], i32 0 Index: llvm/trunk/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll =================================================================== --- llvm/trunk/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll +++ llvm/trunk/test/Transforms/SLPVectorizer/X86/propagate_ir_flags.ll @@ -559,15 +559,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[SUB1:%.*]] = fneg fast double [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 -; CHECK-NEXT: [[SUB2:%.*]] = fneg fast double [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fneg fast <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret void ; %idx1 = getelementptr inbounds double, double* %x, i64 0 @@ -632,15 +627,10 @@ ; CHECK-NEXT: [[TMP1:%.*]] = bitcast double* [[IDX1]] to <2 x double>* ; CHECK-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* [[TMP1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = fcmp oge <2 x double> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP2]], i32 0 -; CHECK-NEXT: [[SUB1:%.*]] = fneg double [[TMP4]] -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP2]], i32 1 -; CHECK-NEXT: [[SUB2:%.*]] = fneg double [[TMP5]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x double> undef, double [[SUB1]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[SUB2]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast double* [[IDX1]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8 +; CHECK-NEXT: [[TMP4:%.*]] = fneg <2 x double> [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP3]], <2 x double> [[TMP2]], <2 x double> [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = bitcast double* [[IDX1]] to <2 x double>* +; CHECK-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 ; CHECK-NEXT: ret void ; %idx1 = getelementptr inbounds double, double* %x, i64 0