diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2585,6 +2585,11 @@ // same blocks of its users. virtual bool shouldConsiderGEPOffsetSplit() const { return false; } + // Return true if FCOPYSIGN can be lowered with the given FP types. + virtual bool canCopySign(EVT ValueTy, EVT SignTy) const { + return isOperationExpand(ISD::FCOPYSIGN, ValueTy) || ValueTy == SignTy; + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12581,17 +12581,14 @@ /// copysign(x, fp_extend(y)) -> copysign(x, y) /// copysign(x, fp_round(y)) -> copysign(x, y) -static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(const TargetLowering &TLI, + SDNode *N) { SDValue N1 = N->getOperand(1); if ((N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)) { - // Do not optimize out type conversion of f128 type yet. - // For some targets like x86_64, configuration is changed to keep one f128 - // value in one SSE register, but instruction selection cannot handle - // FCOPYSIGN on SSE registers yet. EVT N1VT = N1->getValueType(0); EVT N1Op0VT = N1->getOperand(0).getValueType(); - return (N1VT == N1Op0VT || N1Op0VT != MVT::f128); + return TLI.canCopySign(N1VT, N1Op0VT); } return false; } @@ -12637,7 +12634,7 @@ // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(TLI, N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -749,6 +749,10 @@ bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override; void finalizeLowering(MachineFunction &MF) const override; + + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return true; + } }; namespace AArch64 { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -323,6 +323,10 @@ return MVT::i32; } + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; bool SelectFlatOffset(bool IsSigned, SelectionDAG &DAG, SDNode *N, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -805,6 +805,10 @@ bool shouldConsiderGEPOffsetSplit() const override { return true; } + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + bool isUnsupportedFloatingType(EVT VT) const; SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -276,6 +276,10 @@ return TargetLowering::getInlineAsmMemConstraint(ConstraintCode); } + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + // Intrinsics SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h --- a/llvm/lib/Target/Mips/MipsISelLowering.h +++ b/llvm/lib/Target/Mips/MipsISelLowering.h @@ -687,6 +687,10 @@ return true; } + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + /// Emit a sign-extension using sll/sra, seb, or seh appropriately. MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr &MI, MachineBasicBlock *BB, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -946,6 +946,10 @@ unsigned JTI, MCContext &Ctx) const override; + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + private: struct ReuseLoadInfo { SDValue Ptr; diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -524,6 +524,10 @@ return true; } + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + private: const SystemZSubtarget &Subtarget; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -66,12 +66,6 @@ defm TRUNC : UnaryFP; defm NEAREST : UnaryFP; -// DAGCombine oddly folds casts into the rhs of copysign. Unfold them. -def : Pat<(fcopysign F64:$lhs, F32:$rhs), - (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>; -def : Pat<(fcopysign F32:$lhs, F64:$rhs), - (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>; - // WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>; def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1135,6 +1135,10 @@ /// math ops). bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override; + bool canCopySign(EVT ValueTy, EVT SignTy) const override { + return SignTy != MVT::f128; + } + bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const override { // If we can replace more than 2 scalar stores, there will be a reduction diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -0,0 +1,93 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I-ILP32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I-LP64 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi ilp32d < %s | FileCheck %s -check-prefix=RV32I-ILP32D +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mattr=+f -mattr=+d \ +; RUN: -target-abi lp64d < %s | FileCheck %s -check-prefix=RV64I-LP64D + +; Check that DAGCombiner only folds casts into the sign argument of copysign +; when appropriate (i.e. when it would be expanded because we don't handle mixed +; precision magnitude and sign arguments). + +declare double @llvm.copysign.f64(double, double) +declare float @llvm.copysign.f32(float, float) + +define double @fold_promote(double %a, float %b) nounwind { +; RV32I-ILP32-LABEL: fold_promote: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: lui a3, 524288 +; RV32I-ILP32-NEXT: and a2, a2, a3 +; RV32I-ILP32-NEXT: addi a3, a3, -1 +; RV32I-ILP32-NEXT: and a1, a1, a3 +; RV32I-ILP32-NEXT: or a1, a1, a2 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: fold_promote: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: addi a2, zero, -1 +; RV64I-LP64-NEXT: slli a2, a2, 63 +; RV64I-LP64-NEXT: addi a2, a2, -1 +; RV64I-LP64-NEXT: and a0, a0, a2 +; RV64I-LP64-NEXT: addi a2, zero, 1 +; RV64I-LP64-NEXT: slli a2, a2, 31 +; RV64I-LP64-NEXT: and a1, a1, a2 +; RV64I-LP64-NEXT: slli a1, a1, 32 +; RV64I-LP64-NEXT: or a0, a0, a1 +; RV64I-LP64-NEXT: ret +; +; RV32I-ILP32D-LABEL: fold_promote: +; RV32I-ILP32D: # %bb.0: +; RV32I-ILP32D-NEXT: fcvt.d.s ft0, fa1 +; RV32I-ILP32D-NEXT: fsgnj.d fa0, fa0, ft0 +; RV32I-ILP32D-NEXT: ret +; +; RV64I-LP64D-LABEL: fold_promote: +; RV64I-LP64D: # %bb.0: +; RV64I-LP64D-NEXT: fcvt.d.s ft0, fa1 +; RV64I-LP64D-NEXT: fsgnj.d fa0, fa0, ft0 +; RV64I-LP64D-NEXT: ret + %c = fpext float %b to double + %t = call double @llvm.copysign.f64(double %a, double %c) + ret double %t +} + +define float @fold_demote(float %a, double %b) nounwind { +; RV32I-ILP32-LABEL: fold_demote: +; RV32I-ILP32: # %bb.0: +; RV32I-ILP32-NEXT: lui a1, 524288 +; RV32I-ILP32-NEXT: and a2, a2, a1 +; RV32I-ILP32-NEXT: addi a1, a1, -1 +; RV32I-ILP32-NEXT: and a0, a0, a1 +; RV32I-ILP32-NEXT: or a0, a0, a2 +; RV32I-ILP32-NEXT: ret +; +; RV64I-LP64-LABEL: fold_demote: +; RV64I-LP64: # %bb.0: +; RV64I-LP64-NEXT: lui a2, 524288 +; RV64I-LP64-NEXT: addiw a2, a2, -1 +; RV64I-LP64-NEXT: and a0, a0, a2 +; RV64I-LP64-NEXT: addi a2, zero, -1 +; RV64I-LP64-NEXT: slli a2, a2, 63 +; RV64I-LP64-NEXT: and a1, a1, a2 +; RV64I-LP64-NEXT: srli a1, a1, 32 +; RV64I-LP64-NEXT: or a0, a0, a1 +; RV64I-LP64-NEXT: ret +; +; RV32I-ILP32D-LABEL: fold_demote: +; RV32I-ILP32D: # %bb.0: +; RV32I-ILP32D-NEXT: fcvt.s.d ft0, fa1 +; RV32I-ILP32D-NEXT: fsgnj.s fa0, fa0, ft0 +; RV32I-ILP32D-NEXT: ret +; +; RV64I-LP64D-LABEL: fold_demote: +; RV64I-LP64D: # %bb.0: +; RV64I-LP64D-NEXT: fcvt.s.d ft0, fa1 +; RV64I-LP64D-NEXT: fsgnj.s fa0, fa0, ft0 +; RV64I-LP64D-NEXT: ret + %c = fptrunc double %b to float + %t = call float @llvm.copysign.f32(float %a, float %c) + ret float %t +} diff --git a/llvm/test/CodeGen/WebAssembly/copysign-casts.ll b/llvm/test/CodeGen/WebAssembly/copysign-casts.ll --- a/llvm/test/CodeGen/WebAssembly/copysign-casts.ll +++ b/llvm/test/CodeGen/WebAssembly/copysign-casts.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -asm-verbose=false -wasm-keep-registers | FileCheck %s -; DAGCombiner oddly folds casts into the rhs of copysign. Test that they get -; unfolded. +; Check that DAGCombiner does not fold casts into the sign argument of copysign. target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown"