Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -38,6 +38,9 @@ bool legalizeAddrSpaceCast(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const; + bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &MIRBuilder, bool Signed) const; + }; } // End llvm namespace. #endif Index: lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -280,6 +280,7 @@ getActionDefinitionsBuilder({G_SITOFP, G_UITOFP}) .legalFor({{S32, S32}, {S64, S32}}) .lowerFor({{S32, S64}}) + .customFor({{S64, S64}}) .scalarize(0); getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI}) @@ -672,6 +673,10 @@ switch (MI.getOpcode()) { case TargetOpcode::G_ADDRSPACE_CAST: return legalizeAddrSpaceCast(MI, MRI, MIRBuilder); + case TargetOpcode::G_SITOFP: + return legalizeITOFP(MI, MRI, MIRBuilder, true); + case TargetOpcode::G_UITOFP: + return legalizeITOFP(MI, MRI, MIRBuilder, false); default: return false; } @@ -828,3 +833,35 @@ MI.eraseFromParent(); return true; } + +bool AMDGPULegalizerInfo::legalizeITOFP( + MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B, bool Signed) const { + B.setInstr(MI); + + unsigned Dst = MI.getOperand(0).getReg(); + unsigned Src = MI.getOperand(1).getReg(); + + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); + + auto Unmerge = B.buildUnmerge({S32, S32}, Src); + + auto CvtHi = Signed ? + B.buildSITOFP(S64, Unmerge.getReg(1)) : + B.buildUITOFP(S64, Unmerge.getReg(1)); + + auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0)); + + auto ThirtyTwo = B.buildConstant(S32, 32); + auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false) + .addUse(CvtHi.getReg(0)) + .addUse(ThirtyTwo.getReg(0)); + + // TODO: Should this propagate fast-math-flags? + B.buildFAdd(Dst, LdExp, CvtLo); + MI.eraseFromParent(); + return true; +} Index: test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -122,3 +122,23 @@ %1:_(s32) = G_SITOFP %0 $vgpr0 = COPY %1 ... + +--- +name: test_sitofp_s64_to_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sitofp_s64_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[SITOFP:%[0-9]+]]:_(s64) = G_SITOFP [[UV1]](s32) + ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[SITOFP]](s64), [[C]](s32) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_SITOFP %0 + $vgpr0_vgpr1 = COPY %1 +... Index: test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir =================================================================== --- test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -90,3 +90,23 @@ %1:_(s32) = G_UITOFP %0 $vgpr0 = COPY %1 ... + +--- +name: test_uitofp_s64_to_s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_uitofp_s64_to_s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[UITOFP:%[0-9]+]]:_(s64) = G_UITOFP [[UV1]](s32) + ; CHECK: [[UITOFP1:%[0-9]+]]:_(s64) = G_UITOFP [[UV]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK: [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.ldexp), [[UITOFP]](s64), [[C]](s32) + ; CHECK: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[INT]], [[UITOFP1]] + ; CHECK: $vgpr0_vgpr1 = COPY [[FADD]](s64) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = G_UITOFP %0 + $vgpr0_vgpr1 = COPY %1 +...