diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h @@ -57,6 +57,7 @@ LegalizerHelper &Helper) const; bool legalizeCTTZ(MachineInstr &MI, LegalizerHelper &Helper) const; bool legalizeMemOps(MachineInstr &MI, LegalizerHelper &Helper) const; + bool legalizeFCopySign(MachineInstr &MI, LegalizerHelper &Helper) const; const AArch64Subtarget *ST; }; } // End llvm namespace. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -814,6 +814,11 @@ getActionDefinitionsBuilder({G_LROUND, G_LLROUND}) .legalFor({{s64, s32}, {s64, s64}}); + // TODO: Custom legalization for vector types. + // TODO: Custom legalization for mismatched types. + // TODO: s16 support. + getActionDefinitionsBuilder(G_FCOPYSIGN).customFor({{s32, s32}, {s64, s64}}); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } @@ -856,6 +861,8 @@ case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: return legalizeMemOps(MI, Helper); + case TargetOpcode::G_FCOPYSIGN: + return legalizeFCopySign(MI, Helper); } llvm_unreachable("expected switch to return"); @@ -1438,3 +1445,63 @@ return false; } + +bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI, + LegalizerHelper &Helper) const { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); + Register Dst = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst); + assert(DstTy.isScalar() && "Only expected scalars right now!"); + const unsigned DstSize = DstTy.getSizeInBits(); + assert((DstSize == 32 || DstSize == 64) && "Unexpected dst type!"); + assert(MRI.getType(MI.getOperand(2).getReg()) == DstTy && + "Expected homogeneous types!"); + + // We want to materialize a mask with the high bit set. + uint64_t EltMask; + LLT VecTy; + + // TODO: s16 support. + switch (DstSize) { + default: + llvm_unreachable("Unexpected type for G_FCOPYSIGN!"); + case 64: { + // AdvSIMD immediate moves cannot materialize out mask in a single + // instruction for 64-bit elements. Instead, materialize zero and then + // negate it. + EltMask = 0; + VecTy = LLT::fixed_vector(2, DstTy); + break; + } + case 32: + EltMask = 0x80000000ULL; + VecTy = LLT::fixed_vector(4, DstTy); + break; + } + + // Widen In1 and In2 to 128 bits. We want these to eventually become + // INSERT_SUBREGs. + auto Undef = MIRBuilder.buildUndef(VecTy); + auto Zero = MIRBuilder.buildConstant(DstTy, 0); + auto Ins1 = MIRBuilder.buildInsertVectorElement( + VecTy, Undef, MI.getOperand(1).getReg(), Zero); + auto Ins2 = MIRBuilder.buildInsertVectorElement( + VecTy, Undef, MI.getOperand(2).getReg(), Zero); + + // Construct the mask. + auto Mask = MIRBuilder.buildConstant(VecTy, EltMask); + if (DstSize == 64) + Mask = MIRBuilder.buildFNeg(VecTy, Mask); + + auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask}); + + // Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We + // want this to eventually become an EXTRACT_SUBREG. + SmallVector DstRegs(1, Dst); + for (unsigned I = 1, E = VecTy.getNumElements(); I < E; ++I) + DstRegs.push_back(MRI.createGenericVirtualRegister(DstTy)); + MIRBuilder.buildUnmerge(DstRegs, Sel); + MI.eraseFromParent(); + return true; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir @@ -0,0 +1,56 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s + +... +--- +name: legalize_s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0, $s1 + ; CHECK-LABEL: name: legalize_s32 + ; CHECK: liveins: $s0, $s1 + ; CHECK: %val:_(s32) = COPY $s0 + ; CHECK: %sign:_(s32) = COPY $s1 + ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32) + ; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]] + ; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>) + ; CHECK: $s0 = COPY %fcopysign(s32) + ; CHECK: RET_ReallyLR implicit $s0 + %val:_(s32) = COPY $s0 + %sign:_(s32) = COPY $s1 + %fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32) + $s0 = COPY %fcopysign(s32) + RET_ReallyLR implicit $s0 + +... +--- +name: legalize_s64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $d0, $d1 + ; CHECK-LABEL: name: legalize_s64 + ; CHECK: liveins: $d0, $d1 + ; CHECK: %val:_(s64) = COPY $d0 + ; CHECK: %sign:_(s64) = COPY $d1 + ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64) + ; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64) + ; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]] + ; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]] + ; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>) + ; CHECK: $d0 = COPY %fcopysign(s64) + ; CHECK: RET_ReallyLR implicit $d0 + %val:_(s64) = COPY $d0 + %sign:_(s64) = COPY $d1 + %fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64) + $d0 = COPY %fcopysign(s64) + RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -487,8 +487,8 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_FCOPYSIGN (opcode {{[0-9]+}}): 2 type indices -# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined -# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. the first uncovered type index: 2, OK +# DEBUG-NEXT: .. the first uncovered imm index: 0, OK # DEBUG-NEXT: G_IS_FPCLASS (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined