diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1127,6 +1127,18 @@ getTruncStoreAction(ValVT, MemVT) == Custom); } + LegalizeAction getCopySignAction(EVT ValVT, EVT SignVT) const { + unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; + unsigned SignI = (unsigned) SignVT.getSimpleVT().SimpleTy; + assert(ValI < MVT::LAST_VALUETYPE && SignI < MVT::LAST_VALUETYPE && + "Table isn't big enough!"); + return CopySignActions[ValI][SignI]; + } + + bool isCopySignLegal(EVT ValVT, EVT SignVT) const { + return getCopySignAction(ValVT, SignVT) == Legal; + } + /// Return how the indexed load should be treated: either it is legal, needs /// to be promoted to a larger size, needs to be expanded to some other code /// sequence, or the target has a custom expander for it. @@ -2045,6 +2057,14 @@ TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } + void setCopySignAction(MVT ValVT, MVT SignVT, + LegalizeAction Action) { + assert(ValVT.isValid() && SignVT.isValid() && "Table isn't big enough!"); + unsigned ValI = (unsigned) ValVT.SimpleTy; + unsigned SignI = (unsigned) SignVT.SimpleTy; + CopySignActions[ValI][SignI] = Action; + } + /// Indicate that the specified indexed load does or does not work with the /// specified type and indicate what to do abort it. /// @@ -2793,6 +2813,10 @@ /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; + /// For each magnitude value type and each signal value type, keep a + /// LegalizeAction that indicates how FCOPYSIGN is handled. + LegalizeAction CopySignActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + ValueTypeActionImpl ValueTypeActions; private: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12666,17 +12666,16 @@ /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) -static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(const TargetLowering &TLI, + SDNode *N) { SDValue N1 = N->getOperand(1); if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)) { - // Do not optimize out type conversion of f128 type yet. - // For some targets like x86_64, configuration is changed to keep one f128 - // value in one SSE register, but instruction selection cannot handle - // FCOPYSIGN on SSE registers yet. + SDValue N0 = N->getOperand(1); + EVT N0VT = N0->getValueType(0); EVT N1VT = N1->getValueType(0); EVT N1Op0VT = N1->getOperand(0).getValueType(); - return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + return (N1VT == N1Op0VT || TLI.isCopySignLegal(N0VT, N1Op0VT)); } return false; } @@ -12722,7 +12721,7 @@ // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(TLI, N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -626,6 +626,11 @@ setIndexedStoreAction(IM, VT, Expand); } + for (MVT SignTy : MVT::all_valuetypes()) { + LegalizeAction Action = SignTy != MVT::f128 ? Legal : Expand; + setCopySignAction(VT, SignTy, Action); + } + // Most backends expect to see the node which just returns the value loaded. setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -149,6 +149,11 @@ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16}; + if (Subtarget.hasStdExtF() || Subtarget.hasStdExtD()) { + setCopySignAction(MVT::f32, MVT::f64, Expand); + setCopySignAction(MVT::f64, MVT::f32, Promote); + } + if (Subtarget.hasStdExtF()) { setOperationAction(ISD::FMINNUM, MVT::f32, Legal); setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f \ +; RUN: -target-abi ilp32f < %s | FileCheck %s -check-prefix=RV32IF +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32IFD +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64IFD + +; Check that DAGCombiner only folds casts into the sign argument of copysign +; when appropriate (i.e. when it would be expanded because we don't handle mixed +; precision magnitude and sign arguments). + +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) + +define double @fold_promote(double %a, float %b) nounwind { +; RV32I-LABEL: fold_promote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a3, 524288 +; RV32I-NEXT: and a2, a2, a3 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: and a1, a1, a3 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_promote: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, 1 +; RV64I-NEXT: slli a2, a2, 31 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_promote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) +; RV32IF-NEXT: sw s0, 8(sp) +; RV32IF-NEXT: sw s1, 4(sp) +; RV32IF-NEXT: mv s0, a1 +; RV32IF-NEXT: mv s1, a0 +; RV32IF-NEXT: call __extendsfdf2 +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: and a1, a1, a0 +; RV32IF-NEXT: addi a0, a0, -1 +; RV32IF-NEXT: and a0, s0, a0 +; RV32IF-NEXT: or a1, a0, a1 +; RV32IF-NEXT: mv a0, s1 +; RV32IF-NEXT: lw s1, 4(sp) +; RV32IF-NEXT: lw s0, 8(sp) +; RV32IF-NEXT: lw ra, 12(sp) +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_promote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.d.s ft0, fa1 +; RV32IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_promote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.d.s ft0, fa1 +; RV64IFD-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fpext float %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_demote(float %a, double %b) nounwind { +; RV32I-LABEL: fold_demote: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 524288 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: fold_demote: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: addi a2, zero, -1 +; RV64I-NEXT: slli a2, a2, 63 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32IF-LABEL: fold_demote: +; RV32IF: # %bb.0: +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: sw ra, 12(sp) +; RV32IF-NEXT: fsw fs0, 8(sp) +; RV32IF-NEXT: fmv.s fs0, fa0 +; RV32IF-NEXT: call __truncdfsf2 +; RV32IF-NEXT: fsgnj.s fa0, fs0, fa0 +; RV32IF-NEXT: flw fs0, 8(sp) +; RV32IF-NEXT: lw ra, 12(sp) +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; +; RV32IFD-LABEL: fold_demote: +; RV32IFD: # %bb.0: +; RV32IFD-NEXT: fcvt.s.d ft0, fa1 +; RV32IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32IFD-NEXT: ret +; +; RV64IFD-LABEL: fold_demote: +; RV64IFD: # %bb.0: +; RV64IFD-NEXT: fcvt.s.d ft0, fa1 +; RV64IFD-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64IFD-NEXT: ret + %c = fptrunc double %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +}