diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -191,8 +191,8 @@ SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - Op0.getValueType(), Op0, Op1, Op2); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { @@ -200,7 +200,7 @@ SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, - Op2); + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { @@ -225,7 +225,8 @@ Opers[i] = Oper; } - SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); + SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs), + Opers, N->getFlags()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -255,6 +256,7 @@ ResVT.getVectorElementType(), OvVT.getVectorElementType()); SDNode *ScalarNode = DAG.getNode( N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); + ScalarNode->setFlags(N->getFlags()); // Replace the other vector result not being explicitly scalarized here. unsigned OtherNo = 1 - ResNo; @@ -364,7 +366,7 @@ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, DAG.getVectorIdxConstant(0, DL)); } - return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { @@ -997,10 +999,10 @@ GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), - Op0Lo, Op1Lo, Op2Lo); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), - Op0Hi, Op1Hi, Op2Hi); + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, + Op2Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, + Op2Hi, N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { @@ -1012,8 +1014,10 @@ SDValue Op2 = N->getOperand(2); unsigned Opcode = N->getOpcode(); - Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2); - Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2, + N->getFlags()); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2, + N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, @@ -1294,8 +1298,10 @@ EVT LoValueVTs[] = {LoVT, MVT::Other}; EVT HiValueVTs[] = {HiVT, MVT::Other}; - Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); - Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo, + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi, + N->getFlags()); // Build a factor node to remember that this Op is independent of the // other one. @@ -1385,6 +1391,8 @@ SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); + LoNode->setFlags(N->getFlags()); + HiNode->setFlags(N->getFlags()); Lo = SDValue(LoNode, ResNo); Hi = SDValue(HiNode, ResNo); @@ -1710,11 +1718,13 @@ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1), + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1), + N->getFlags()); } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); } } diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll --- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll @@ -87,8 +87,33 @@ define <8 x float> @v8f32_no_daz(<8 x float> %f) #0 { ; NHM-LABEL: v8f32_no_daz: ; NHM: # %bb.0: -; NHM-NEXT: sqrtps %xmm0, %xmm0 -; NHM-NEXT: sqrtps %xmm1, %xmm1 +; NHM-NEXT: movaps %xmm0, %xmm2 +; NHM-NEXT: rsqrtps %xmm0, %xmm3 +; NHM-NEXT: mulps %xmm3, %xmm0 +; NHM-NEXT: movaps {{.*#+}} xmm4 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; NHM-NEXT: movaps %xmm0, %xmm5 +; NHM-NEXT: mulps %xmm4, %xmm5 +; NHM-NEXT: mulps %xmm3, %xmm0 +; NHM-NEXT: movaps {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] +; NHM-NEXT: addps %xmm3, %xmm0 +; NHM-NEXT: mulps %xmm5, %xmm0 +; NHM-NEXT: movaps {{.*#+}} xmm5 = [NaN,NaN,NaN,NaN] +; NHM-NEXT: andps %xmm5, %xmm2 +; NHM-NEXT: movaps {{.*#+}} xmm6 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; NHM-NEXT: movaps %xmm6, %xmm7 +; NHM-NEXT: cmpleps %xmm2, %xmm7 +; NHM-NEXT: andps %xmm7, %xmm0 +; NHM-NEXT: rsqrtps %xmm1, %xmm7 +; NHM-NEXT: movaps %xmm1, %xmm2 +; NHM-NEXT: mulps %xmm7, %xmm2 +; NHM-NEXT: mulps %xmm2, %xmm4 +; NHM-NEXT: mulps %xmm7, %xmm2 +; NHM-NEXT: addps %xmm3, %xmm2 +; NHM-NEXT: mulps %xmm4, %xmm2 +; NHM-NEXT: andps %xmm5, %xmm1 +; NHM-NEXT: cmpleps %xmm1, %xmm6 +; NHM-NEXT: andps %xmm6, %xmm2 +; NHM-NEXT: movaps %xmm2, %xmm1 ; NHM-NEXT: retq ; ; SNB-LABEL: v8f32_no_daz: @@ -209,8 +234,28 @@ define <8 x float> @v8f32_daz(<8 x float> %f) #1 { ; NHM-LABEL: v8f32_daz: ; NHM: # %bb.0: -; NHM-NEXT: sqrtps %xmm0, %xmm0 -; NHM-NEXT: sqrtps %xmm1, %xmm1 +; NHM-NEXT: rsqrtps %xmm0, %xmm2 +; NHM-NEXT: movaps %xmm0, %xmm3 +; NHM-NEXT: mulps %xmm2, %xmm3 +; NHM-NEXT: movaps {{.*#+}} xmm4 = [-5.0E-1,-5.0E-1,-5.0E-1,-5.0E-1] +; NHM-NEXT: movaps %xmm3, %xmm5 +; NHM-NEXT: mulps %xmm4, %xmm5 +; NHM-NEXT: mulps %xmm2, %xmm3 +; NHM-NEXT: movaps {{.*#+}} xmm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] +; NHM-NEXT: addps %xmm2, %xmm3 +; NHM-NEXT: mulps %xmm5, %xmm3 +; NHM-NEXT: xorps %xmm5, %xmm5 +; NHM-NEXT: cmpneqps %xmm5, %xmm0 +; NHM-NEXT: andps %xmm3, %xmm0 +; NHM-NEXT: rsqrtps %xmm1, %xmm3 +; NHM-NEXT: movaps %xmm1, %xmm6 +; NHM-NEXT: mulps %xmm3, %xmm6 +; NHM-NEXT: mulps %xmm6, %xmm4 +; NHM-NEXT: mulps %xmm3, %xmm6 +; NHM-NEXT: addps %xmm2, %xmm6 +; NHM-NEXT: mulps %xmm4, %xmm6 +; NHM-NEXT: cmpneqps %xmm5, %xmm1 +; NHM-NEXT: andps %xmm6, %xmm1 ; NHM-NEXT: retq ; ; SNB-LABEL: v8f32_daz: