diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9076,6 +9076,10 @@ return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast(N.getOperand(0))) + return CN; + return nullptr; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -95,6 +95,7 @@ // Predicated instructions with the result of inactive lanes provided by the // last operand. + FNEG_MERGE_PASSTHRU, SIGN_EXTEND_INREG_MERGE_PASSTHRU, ZERO_EXTEND_INREG_MERGE_PASSTHRU, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -135,6 +135,7 @@ default: return false; case AArch64ISD::DUP_MERGE_PASSTHRU: + case AArch64ISD::FNEG_MERGE_PASSTHRU: case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU: case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU: return true; @@ -969,6 +970,7 @@ setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMUL, VT, Custom); + setOperationAction(ISD::FNEG, VT, Custom); setOperationAction(ISD::FSUB, VT, Custom); } } @@ -1462,6 +1464,7 @@ MAKE_CASE(AArch64ISD::UDIV_PRED) MAKE_CASE(AArch64ISD::UMAX_PRED) MAKE_CASE(AArch64ISD::UMIN_PRED) + MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU) MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO) @@ -3322,6 +3325,9 @@ case Intrinsic::aarch64_sve_convert_from_svbool: return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(), Op.getOperand(1)); + case Intrinsic::aarch64_sve_fneg: + return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(), + Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); case Intrinsic::aarch64_sve_convert_to_svbool: { EVT OutVT = Op.getValueType(); EVT InVT = Op.getOperand(1).getValueType(); @@ -3616,6 +3622,8 @@ if (Op.getValueType() == MVT::f128) return LowerF128Call(Op, DAG, RTLIB::DIV_F128); return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED); + case ISD::FNEG: + return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU); case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -199,8 +199,9 @@ ]>; // Predicated operations with the result of inactive lanes provided by the last operand. -def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; -def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>; +def AArch64sxt_mt : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; +def AArch64uxt_mt : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>; def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>; def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>; @@ -349,8 +350,8 @@ defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; - defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; - defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>; + defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs, null_frag>; + defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", null_frag, AArch64fneg_mt>; defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>; defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -312,6 +312,11 @@ : Pat<(vtd (op vt1:$Op1)), (inst $Op1)>; +class SVE_1_Op_Passthru_Pat +: Pat<(vtd (op pg:$Op1, vts:$Op2, vtd:$Op3)), + (inst $Op3, $Op1, $Op2)>; + class SVE_1_Op_Imm_OptLsl_Reverse_Pat : Pat<(vt (op (vt (AArch64dup (it (cpx i32:$imm, i32:$shift)))), (vt zprty:$Op1))), @@ -3755,15 +3760,24 @@ def : SVE_3_Op_Pat(NAME # _D)>; } +// TODO: Remove int_op once its last use is converted to ir_op. multiclass sve_int_un_pred_arit_1_fp opc, string asm, - SDPatternOperator op> { + SDPatternOperator int_op, + SDPatternOperator ir_op> { def _H : sve_int_un_pred_arit<0b01, { opc, 0b1 }, asm, ZPR16>; def _S : sve_int_un_pred_arit<0b10, { opc, 0b1 }, asm, ZPR32>; def _D : sve_int_un_pred_arit<0b11, { opc, 0b1 }, asm, ZPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _H)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _S)>; + def : SVE_1_Op_Passthru_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -323,6 +323,78 @@ ret %r } +define @fneg_nxv8f16( %a) { +; CHECK-LABEL: fneg_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv4f16( %a) { +; CHECK-LABEL: fneg_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f16( %a) { +; CHECK-LABEL: fneg_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.h, p0/m, z0.h +; CHECK-NEXT: ret + %minus.one = insertelement undef, half -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv4f32( %a) { +; CHECK-LABEL: fneg_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %minus.one = insertelement undef, float -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f32( %a) { +; CHECK-LABEL: fneg_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.s, p0/m, z0.s +; CHECK-NEXT: ret + %minus.one = insertelement undef, float -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + +define @fneg_nxv2f64( %a) { +; CHECK-LABEL: fneg_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fneg z0.d, p0/m, z0.d +; CHECK-NEXT: ret + %minus.one = insertelement undef, double -1.0, i64 0 + %minus.one.vec = shufflevector %minus.one, undef, zeroinitializer + %neg = fmul %a, %minus.one.vec + ret %neg +} + define @frecps_h( %a, %b) { ; CHECK-LABEL: frecps_h: ; CHECK: // %bb.0: