diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -173,6 +173,7 @@ SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; + SDValue ExpandFNEG(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, SmallVectorImpl &Results); @@ -1568,6 +1569,25 @@ return modifySignAsInt(MagAsInt, DL, CopiedSign); } +SDValue SelectionDAGLegalize::ExpandFNEG(SDNode *Node) const { + // Expand using integer operations. (We used to expand to FSUB, but that's + // not correct in cases involving NaNs.) + SDLoc DL(Node); + + // Get the sign bit as an integer. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Node->getOperand(0)); + EVT IntVT = SignAsInt.IntValue.getValueType(); + + // Flip the sign. + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignFlip = DAG.getNode(ISD::XOR, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // Convert back to float. + return modifySignAsInt(SignAsInt, DL, SignFlip); +} + SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { SDLoc DL(Node); SDValue Value = Node->getOperand(0); @@ -3218,12 +3238,7 @@ Results.push_back(ExpandFCOPYSIGN(Node)); break; case ISD::FNEG: - // Expand Y = FNEG(X) -> Y = SUB -0.0, X - Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); - // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. - Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, - Node->getOperand(0)); - Results.push_back(Tmp1); + Results.push_back(ExpandFNEG(Node)); break; case ISD::FABS: Results.push_back(ExpandFABS(Node)); @@ -3904,10 +3919,14 @@ return true; break; case ISD::STRICT_FSUB: { - if (TLI.getStrictFPOperationAction(Node->getOpcode(), + if (TLI.getStrictFPOperationAction(ISD::STRICT_FSUB, Node->getValueType(0)) == TargetLowering::Legal) return true; + if (TLI.getStrictFPOperationAction(ISD::STRICT_FADD, + Node->getValueType(0)) + != TargetLowering::Legal) + break; EVT VT = Node->getValueType(0); const SDNodeFlags Flags = Node->getFlags(); diff --git a/llvm/test/CodeGen/AArch64/arm64-fp128.ll b/llvm/test/CodeGen/AArch64/arm64-fp128.ll --- a/llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -262,19 +262,17 @@ } define fp128 @test_neg(fp128 %in) { -; CHECK: [[$MINUS0:.LCPI[0-9]+_0]]: -; Make sure the weird hex constant below *is* -0.0 -; CHECK-NEXT: fp128 -0 - ; CHECK-LABEL: test_neg: - ; Could in principle be optimized to fneg which we can't select, this makes - ; sure that doesn't happen. +;; We convert this to fneg, and target-independent code expands it with +;; integer operations. %ret = fsub fp128 0xL00000000000000008000000000000000, %in -; CHECK: mov v1.16b, v0.16b -; CHECK: ldr q0, [{{x[0-9]+}}, :lo12:[[$MINUS0]]] -; CHECK: bl __subtf3 - ret fp128 %ret -; CHECK: ret + +; CHECK: str q0, [sp, #-16]! +; CHECK-NEXT: ldrb w8, [sp, #15] +; CHECK-NEXT: eor w8, w8, #0x80 +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: ldr q0, [sp], #16 +; CHECK-NEXT: ret } diff --git a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll --- a/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -67,31 +67,20 @@ define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) { ; CHECK-LABEL: fneg_float64_t: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r7, lr} -; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vldr d0, .LCPI2_0 -; CHECK-NEXT: vmov r2, r3, d9 -; CHECK-NEXT: vmov r4, r5, d0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dsub -; CHECK-NEXT: vmov r2, r3, d8 -; CHECK-NEXT: vmov d9, r0, r1 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r1, r5 -; CHECK-NEXT: bl __aeabi_dsub -; CHECK-NEXT: vmov d8, r0, r1 -; CHECK-NEXT: vmov q0, q4 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: pop {r4, r5, r7, pc} -; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI2_0: -; CHECK-NEXT: .long 0 @ double -0 -; CHECK-NEXT: .long 2147483648 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: vstr d1, [sp] +; CHECK-NEXT: ldrb.w r0, [sp, #7] +; CHECK-NEXT: vstr d0, [sp, #8] +; CHECK-NEXT: ldrb.w r1, [sp, #15] +; CHECK-NEXT: eor r0, r0, #128 +; CHECK-NEXT: strb.w r0, [sp, #7] +; CHECK-NEXT: vldr d1, [sp] +; CHECK-NEXT: eor r0, r1, #128 +; CHECK-NEXT: strb.w r0, [sp, #15] +; CHECK-NEXT: vldr d0, [sp, #8] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: bx lr entry: %0 = fsub nnan ninf nsz <2 x double> , %src ret <2 x double> %0