diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1272,6 +1272,7 @@ setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::FADD, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Custom); setOperationAction(ISD::FDIV, VT, Custom); setOperationAction(ISD::FMA, VT, Custom); setOperationAction(ISD::FMAXIMUM, VT, Custom); @@ -6985,6 +6986,30 @@ SDValue In2 = Op.getOperand(1); EVT SrcVT = In2.getValueType(); + if (VT.isScalableVector()) { + if (VT != SrcVT) + return SDValue(); + + // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK) + // + // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU; + // maybe useful for copysign operations with mismatched VTs. + // + // IntVT here is chosen so it's a legal type with the same element width + // as the input. + EVT IntVT = + getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger()); + unsigned NumBits = VT.getScalarSizeInBits(); + SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT); + SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT); + SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask, + getSVESafeBitCast(IntVT, In2, DAG)); + SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask, + getSVESafeBitCast(IntVT, In1, DAG)); + SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude); + return getSVESafeBitCast(VT, IntResult, DAG); + } + if (SrcVT.bitsLT(VT)) In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); else if (SrcVT.bitsGT(VT)) diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple aarch64-eabi -mattr=+sve -o - | FileCheck --check-prefixes=CHECK %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +;============ v2f32 + +define @test_copysign_v2f32_v2f32( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v2f32_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v2f32( %a, %b) + ret %r +} + +define @test_copysign_v2f32_v2f64( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v2f32_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fptrunc %b to + %r = call @llvm.copysign.v2f32( %a, %tmp0) + ret %r +} + +declare @llvm.copysign.v2f32( %a, %b) #0 + +;============ v4f32 + +define @test_copysign_v4f32_v4f32( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f32_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v4f32( %a, %b) + ret %r +} + +; SplitVecOp #1 +define @test_copysign_v4f32_v4f64( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f32_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z2.s, p0/m, z2.d +; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: and z0.s, z0.s, #0x7fffffff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fptrunc %b to + %r = call @llvm.copysign.v4f32( %a, %tmp0) + ret %r +} + +declare @llvm.copysign.v4f32( %a, %b) #0 + +;============ v2f64 + +define @test_copysign_v2f64_v232( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v2f64_v232: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z1.d, p0/m, z1.s +; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fpext %b to + %r = call @llvm.copysign.v2f64( %a, %tmp0) + ret %r +} + +define @test_copysign_v2f64_v2f64( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v2f64_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v2f64( %a, %b) + ret %r +} + +declare @llvm.copysign.v2f64( %a, %b) #0 + +;============ v4f64 + +; SplitVecRes mismatched +define @test_copysign_v4f64_v4f32( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f64_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: uunpkhi z3.d, z2.s +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: fcvt z3.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z2.s +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff +; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 +; CHECK-NEXT: and z3.d, z3.d, #0x8000000000000000 +; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: orr z1.d, z3.d, z1.d +; CHECK-NEXT: ret + %tmp0 = fpext %b to + %r = call @llvm.copysign.v4f64( %a, %tmp0) + ret %r +} + +; SplitVecRes same +define @test_copysign_v4f64_v4f64( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f64_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 +; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff +; CHECK-NEXT: and z3.d, z3.d, #0x8000000000000000 +; CHECK-NEXT: orr z0.d, z2.d, z0.d +; CHECK-NEXT: orr z1.d, z3.d, z1.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v4f64( %a, %b) + ret %r +} + +declare @llvm.copysign.v4f64( %a, %b) #0 + +;============ v4f16 + +define @test_copysign_v4f16_v4f16( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v4f16( %a, %b) + ret %r +} + +define @test_copysign_v4f16_v4f32( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fptrunc %b to + %r = call @llvm.copysign.v4f16( %a, %tmp0) + ret %r +} + +define @test_copysign_v4f16_v4f64( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v4f16_v4f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d +; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fptrunc %b to + %r = call @llvm.copysign.v4f16( %a, %tmp0) + ret %r +} + +declare @llvm.copysign.v4f16( %a, %b) #0 + +;============ v8f16 + +define @test_copysign_v8f16_v8f16( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v8f16_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %r = call @llvm.copysign.v8f16( %a, %b) + ret %r +} + +define @test_copysign_v8f16_v8f32( %a, %b) #0 { +; CHECK-LABEL: test_copysign_v8f16_v8f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcvt z2.h, p0/m, z2.s +; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: and z0.h, z0.h, #0x7fff +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %tmp0 = fptrunc %b to + %r = call @llvm.copysign.v8f16( %a, %tmp0) + ret %r +} + +declare @llvm.copysign.v8f16( %a, %b) #0 + +attributes #0 = { nounwind }