Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -644,7 +644,7 @@ .legalFor({{S32, S32}, {S64, S32}, {S16, S32}}) .lowerFor({{S32, S64}}) .lowerIf(typeIs(1, S1)) - .customFor({{S64, S64}}); + .customFor({{S64, S64}, {S16, S64}}); if (ST.has16BitInsts()) IToFP.legalFor({{S16, S16}}); IToFP.clampScalar(1, S32, S64) @@ -1815,7 +1815,16 @@ const LLT S64 = LLT::scalar(64); const LLT S32 = LLT::scalar(32); + const LLT S16 = LLT::scalar(16); + + // i64 -> f16 + if (MRI.getType(Src) == S64 && MRI.getType(Dst)==S16) { + B.buildFPTrunc(Dst, B.buildInstr(MI.getOpcode(), {S32}, {Src})); + MI.eraseFromParent(); + return true; + } + // i64 -> f64 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64); auto Unmerge = B.buildUnmerge({S32, S32}, Src); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir @@ -535,3 +535,99 @@ %2:_(s32) = G_SITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_sitofp_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: test_sitofp_s64_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_sitofp_s64_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64) + ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]] + ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]] + ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32) + ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]] + ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64) + ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]] + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32) + ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]] + ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]] + ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]] + ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_SITOFP %0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir @@ -479,3 +479,77 @@ %2:_(s32) = G_UITOFP %1 $vgpr0 = COPY %2 ... + +--- +name: test_uitofp_s64_to_s16 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GFX6-LABEL: name: test_uitofp_s64_to_s16 + ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX8-LABEL: name: test_uitofp_s64_to_s16 + ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190 + ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]] + ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]] + ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]] + ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807 + ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32) + ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]] + ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775 + ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]] + ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40 + ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32) + ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23 + ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32) + ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64) + ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]] + ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888 + ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]] + ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]] + ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]] + ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]] + ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]] + ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]] + ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32) + ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16) + ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s16) = G_UITOFP %0 + %2:_(s32) = G_ANYEXT %1 + $vgpr0 = COPY %2 +...