diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu --- a/clang/test/CodeGenCUDA/bf16.cu +++ b/clang/test/CodeGenCUDA/bf16.cu @@ -2,7 +2,7 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "nvptx64-nvidia-cuda" \ -// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -S -o - %s | FileCheck %s +// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -O1 -S -o - %s | FileCheck %s #include "Inputs/cuda.h" @@ -11,10 +11,11 @@ // CHECK: .param .b16 _Z8test_argPDF16bDF16b_param_1 // __device__ void test_arg(__bf16 *out, __bf16 in) { -// CHECK: ld.param.b16 %{{h.*}}, [_Z8test_argPDF16bDF16b_param_1]; +// CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [_Z8test_argPDF16bDF16b_param_0]; +// CHECK-DAG: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_argPDF16bDF16b_param_1]; __bf16 bf16 = in; *out = bf16; -// CHECK: st.b16 +// CHECK: st.b16 [%[[A]]], %[[R]] // CHECK: ret; } @@ -22,25 +23,27 @@ // CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z8test_retDF16b( // CHECK: .param .b16 _Z8test_retDF16b_param_0 __device__ __bf16 test_ret( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z8test_retDF16b_param_0]; +// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0]; return in; -// CHECK: st.param.b16 [func_retval0+0], %h +// CHECK: st.param.b16 [func_retval0+0], %[[R]] // CHECK: ret; } +__device__ __bf16 external_func( __bf16 in); + // CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z9test_callDF16b( // CHECK: .param .b16 _Z9test_callDF16b_param_0 __device__ __bf16 test_call( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z9test_callDF16b_param_0]; -// CHECK: st.param.b16 [param0+0], %h2; +// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0]; +// CHECK: st.param.b16 [param0+0], %[[R]]; // CHECK: .param .b32 retval0; // CHECK: call.uni (retval0), -// CHECK-NEXT: _Z8test_retDF16b, +// CHECK-NEXT: _Z13external_funcDF16b, // CHECK-NEXT: ( // CHECK-NEXT: param0 // CHECK-NEXT ); -// CHECK: ld.param.b16 %h{{.*}}, [retval0+0]; - return test_ret(in); -// CHECK: st.param.b16 [func_retval0+0], %h +// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0]; + return external_func(in); +// CHECK: st.param.b16 [func_retval0+0], %[[RET]] // CHECK: ret; } diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -309,10 +309,6 @@ Ret = (5 << 28); } else if (RC == &NVPTX::Float64RegsRegClass) { Ret = (6 << 28); - } else if (RC == &NVPTX::Float16RegsRegClass) { - Ret = (7 << 28); - } else if (RC == &NVPTX::Float16x2RegsRegClass) { - Ret = (8 << 28); } else { report_fatal_error("Bad register class"); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -410,10 +410,10 @@ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); - addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); - addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass); - addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass); + addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass); // Conversion to/from FP16/FP16x2 is always legal. setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -51,11 +51,6 @@ } else if (DestRC == &NVPTX::Int64RegsRegClass) { Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr : NVPTX::BITCONVERT_64_F2I); - } else if (DestRC == &NVPTX::Float16RegsRegClass) { - Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr - : NVPTX::BITCONVERT_16_I2F); - } else if (DestRC == &NVPTX::Float16x2RegsRegClass) { - Op = NVPTX::IMOV32rr; } else if (DestRC == &NVPTX::Float32RegsRegClass) { Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr : NVPTX::BITCONVERT_32_I2F); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,10 +164,10 @@ !eq(name, "i16"): Int16Regs, !eq(name, "i32"): Int32Regs, !eq(name, "i64"): Int64Regs, - !eq(name, "f16"): Float16Regs, - !eq(name, "v2f16"): Float16x2Regs, - !eq(name, "bf16"): Float16Regs, - !eq(name, "v2bf16"): Float16x2Regs, + !eq(name, "f16"): Int16Regs, + !eq(name, "v2f16"): Int32Regs, + !eq(name, "bf16"): Int16Regs, + !eq(name, "v2bf16"): Int32Regs, !eq(name, "f32"): Float32Regs, !eq(name, "f64"): Float64Regs, !eq(name, "ai32"): Int32ArgRegs, @@ -280,29 +280,29 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math]>; def f16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, doF32FTZ]>; def f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math]>; } @@ -354,29 +354,29 @@ Requires<[allowFMA]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA]>; def f16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set (v2f16 Float16x2Regs:$dst), (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, allowFMA]>; // These have strange names so we don't perturb existing mir tests. @@ -417,28 +417,28 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, Requires<[noFMA]>; def _rnf16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA]>; def _rnf16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, noFMA]>; } @@ -513,7 +513,7 @@ FromName, ".u64 \t$dst, $src;"), []>; def _f16 : NVPTXInst<(outs RC:$dst), - (ins Float16Regs:$src, CvtMode:$mode), + (ins Int16Regs:$src, CvtMode:$mode), !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", FromName, ".f16 \t$dst, $src;"), []>; def _f32 : @@ -537,7 +537,7 @@ defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; - defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>; + defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; @@ -576,7 +576,7 @@ Requires<[hasPTX<70>, hasSM<80>]>; } - defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Float16x2Regs>; + defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>; defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>; } @@ -640,21 +640,21 @@ defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>; defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; -defm SELP_f16 : SELP_PATTERN<"b16", f16, Float16Regs, f16imm, fpimm>; +defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>; defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; // This does not work as tablegen fails to infer the type of 'imm'. // def v2f16imm : Operand; -// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Float16x2Regs, v2f16imm, imm>; +// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; def SELP_f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), "selp.b32 \t$dst, $a, $b, $p;", - [(set Float16x2Regs:$dst, - (select Int1Regs:$p, (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>; + [(set Int32Regs:$dst, + (select Int1Regs:$p, (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>; //----------------------------------- // Test Instructions @@ -783,26 +783,26 @@ def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; // Matchers for signed, unsigned mul.wide ISD nodes. -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, +def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), + (MULWIDES32 i16:$a, i16:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), (MULWIDES32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), +def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), +def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), (MULWIDES64Imm Int32Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), +def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)), (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), @@ -1003,7 +1003,7 @@ // fp16 immediate values in .f16 instructions. Instead we have to load // the constant into a register using mov.b16. def LOAD_CONST_F16 : - NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a), + NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a), "mov.b16 \t$dst, $a;", []>; defm FADD : F3_fma_component<"add", fadd>; @@ -1028,10 +1028,10 @@ !strconcat(OpcStr, " \t$dst, $src;"), [(set RC:$dst, (fneg (T RC:$src)))]>, Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; -def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Float16Regs, doF32FTZ>; -def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Float16Regs, True>; -def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; -def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Float16x2Regs, True>; +def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; +def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; +def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; +def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>; // // F64 division @@ -1211,10 +1211,10 @@ Requires<[useFP16Math, Pred]>; } -defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Float16Regs, doF32FTZ>; -defm FMA16 : FMA_F16<"fma.rn.f16", f16, Float16Regs, True>; -defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; -defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Float16x2Regs, True>; +defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>; +defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>; +defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; +defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>; defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>; defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; @@ -1651,13 +1651,13 @@ defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; def SETP_f16rr : NVPTXInst<(outs Int1Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp), + (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", []>, Requires<[useFP16Math]>; def SETP_f16x2rr : NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), - (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp), + (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;", []>, Requires<[useFP16Math]>; @@ -1690,7 +1690,7 @@ defm SET_b64 : SET<"b64", Int64Regs, i64imm>; defm SET_s64 : SET<"s64", Int64Regs, i64imm>; defm SET_u64 : SET<"u64", Int64Regs, i64imm>; -defm SET_f16 : SET<"f16", Float16Regs, f16imm>; +defm SET_f16 : SET<"f16", Int16Regs, f16imm>; defm SET_f32 : SET<"f32", Float32Regs, f32imm>; defm SET_f64 : SET<"f64", Float64Regs, f64imm>; @@ -1760,7 +1760,7 @@ def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), "mov.u64 \t$dst, $sss;", []>; - def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src), + def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), // We have to use .b16 here as there's no mov.f16. "mov.b16 \t$dst, $src;", []>; def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), @@ -1824,14 +1824,14 @@ Instruction set_64ri, Instruction set_64ir> { // i16 -> pred - def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i1 (OpNode i16:$a, i16:$b)), (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), (setp_16ri Int16Regs:$a, imm:$b, Mode)>; def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), (setp_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> pred - def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), + def : Pat<(i1 (OpNode i32:$a, i32:$b)), (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), (setp_32ri Int32Regs:$a, imm:$b, Mode)>; @@ -1846,14 +1846,14 @@ (setp_64ir imm:$a, Int64Regs:$b, Mode)>; // i16 -> i32 - def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i32 (OpNode i16:$a, i16:$b)), (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), (set_16ri Int16Regs:$a, imm:$b, Mode)>; def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), (set_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> i32 - def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), + def : Pat<(i32 (OpNode i32:$a, i32:$b)), (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), (set_32ri Int32Regs:$a, imm:$b, Mode)>; @@ -1926,23 +1926,23 @@ multiclass FSET_FORMAT { // f16 -> pred - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> pred @@ -1971,23 +1971,23 @@ (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; // f16 -> i32 - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> i32 @@ -2276,16 +2276,16 @@ def LoadParamMemV4I32 : LoadParamV4MemInst; def LoadParamMemV4I16 : LoadParamV4MemInst; def LoadParamMemV4I8 : LoadParamV4MemInst; -def LoadParamMemF16 : LoadParamMemInst; -def LoadParamMemF16x2 : LoadParamMemInst; +def LoadParamMemF16 : LoadParamMemInst; +def LoadParamMemF16x2 : LoadParamMemInst; def LoadParamMemF32 : LoadParamMemInst; def LoadParamMemF64 : LoadParamMemInst; -def LoadParamMemV2F16 : LoadParamV2MemInst; -def LoadParamMemV2F16x2: LoadParamV2MemInst; +def LoadParamMemV2F16 : LoadParamV2MemInst; +def LoadParamMemV2F16x2: LoadParamV2MemInst; def LoadParamMemV2F32 : LoadParamV2MemInst; def LoadParamMemV2F64 : LoadParamV2MemInst; -def LoadParamMemV4F16 : LoadParamV4MemInst; -def LoadParamMemV4F16x2: LoadParamV4MemInst; +def LoadParamMemV4F16 : LoadParamV4MemInst; +def LoadParamMemV4F16x2: LoadParamV4MemInst; def LoadParamMemV4F32 : LoadParamV4MemInst; def StoreParamI64 : StoreParamInst; @@ -2302,16 +2302,16 @@ def StoreParamV4I16 : StoreParamV4Inst; def StoreParamV4I8 : StoreParamV4Inst; -def StoreParamF16 : StoreParamInst; -def StoreParamF16x2 : StoreParamInst; +def StoreParamF16 : StoreParamInst; +def StoreParamF16x2 : StoreParamInst; def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; -def StoreParamV2F16 : StoreParamV2Inst; -def StoreParamV2F16x2 : StoreParamV2Inst; +def StoreParamV2F16 : StoreParamV2Inst; +def StoreParamV2F16x2 : StoreParamV2Inst; def StoreParamV2F32 : StoreParamV2Inst; def StoreParamV2F64 : StoreParamV2Inst; -def StoreParamV4F16 : StoreParamV4Inst; -def StoreParamV4F16x2 : StoreParamV4Inst; +def StoreParamV4F16 : StoreParamV4Inst; +def StoreParamV4F16x2 : StoreParamV4Inst; def StoreParamV4F32 : StoreParamV4Inst; def StoreRetvalI64 : StoreRetvalInst; @@ -2328,15 +2328,15 @@ def StoreRetvalF64 : StoreRetvalInst; def StoreRetvalF32 : StoreRetvalInst; -def StoreRetvalF16 : StoreRetvalInst; -def StoreRetvalF16x2 : StoreRetvalInst; +def StoreRetvalF16 : StoreRetvalInst; +def StoreRetvalF16x2 : StoreRetvalInst; def StoreRetvalV2F64 : StoreRetvalV2Inst; def StoreRetvalV2F32 : StoreRetvalV2Inst; -def StoreRetvalV2F16 : StoreRetvalV2Inst; -def StoreRetvalV2F16x2: StoreRetvalV2Inst; +def StoreRetvalV2F16 : StoreRetvalV2Inst; +def StoreRetvalV2F16x2: StoreRetvalV2Inst; def StoreRetvalV4F32 : StoreRetvalV4Inst; -def StoreRetvalV4F16 : StoreRetvalV4Inst; -def StoreRetvalV4F16x2: StoreRetvalV4Inst; +def StoreRetvalV4F16 : StoreRetvalV4Inst; +def StoreRetvalV4F16x2: StoreRetvalV4Inst; def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; @@ -2347,19 +2347,30 @@ NVPTXInst<(outs), (ins regclass:$a), "$a, ", [(CallArg (i32 0), regclass:$a)]>; +class CallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), vt:$a)]>; + class LastCallArgInst : NVPTXInst<(outs), (ins regclass:$a), "$a", [(LastCallArg (i32 0), regclass:$a)]>; +class LastCallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), vt:$a)]>; def CallArgI64 : CallArgInst; -def CallArgI32 : CallArgInst; -def CallArgI16 : CallArgInst; +def CallArgI32 : CallArgInstVT; +def CallArgI16 : CallArgInstVT; +def CallArgF16 : CallArgInstVT; +def CallArgBF16 : CallArgInstVT; def CallArgF64 : CallArgInst; def CallArgF32 : CallArgInst; def LastCallArgI64 : LastCallArgInst; -def LastCallArgI32 : LastCallArgInst; -def LastCallArgI16 : LastCallArgInst; +def LastCallArgI32 : LastCallArgInstVT; +def LastCallArgI16 : LastCallArgInstVT; +def LastCallArgF16 : LastCallArgInstVT; +def LastCallArgBF16 : LastCallArgInstVT; def LastCallArgF64 : LastCallArgInst; def LastCallArgF32 : LastCallArgInst; @@ -2376,7 +2387,7 @@ def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", - [(CallVoid Int32Regs:$addr)]>; + [(CallVoid i32:$addr)]>; def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", [(CallVoid Int64Regs:$addr)]>; def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", @@ -2413,36 +2424,36 @@ !strconcat("mov", asmstr, " \t$dst, $src;"), [(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>; -class MoveParamSymbolInst : NVPTXInst<(outs regclass:$dst), (ins srcty:$src), !strconcat("mov", asmstr, " \t$dst, $src;"), - [(set regclass:$dst, (MoveParam texternalsym:$src))]>; + [(set vt:$dst, (MoveParam texternalsym:$src))]>; def MoveParamI64 : MoveParamInst; def MoveParamI32 : MoveParamInst; -def MoveParamSymbolI64 : MoveParamSymbolInst; -def MoveParamSymbolI32 : MoveParamSymbolInst; +def MoveParamSymbolI64 : MoveParamSymbolInst; +def MoveParamSymbolI32 : MoveParamSymbolInst; def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.u16.u32 \t$dst, $src;", - [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; + "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ? + [(set i16:$dst, (MoveParam i16:$src))]>; def MoveParamF64 : MoveParamInst; def MoveParamF32 : MoveParamInst; -def MoveParamF16 : MoveParamInst; +def MoveParamF16 : MoveParamInst; -class PseudoUseParamInst : +class PseudoUseParamInst : NVPTXInst<(outs), (ins regclass:$src), "// Pseudo use of $src", - [(PseudoUseParam regclass:$src)]>; + [(PseudoUseParam vt:$src)]>; -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; class ProxyRegInst : NVPTXInst<(outs regclass:$dst), (ins regclass:$src), @@ -2454,12 +2465,12 @@ def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; - def ProxyRegF16 : ProxyRegInst<"b16", f16, Float16Regs>; - def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Float16Regs>; + def ProxyRegF16 : ProxyRegInst<"b16", f16, Int16Regs>; + def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Int16Regs>; def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; - def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Float16x2Regs>; - def ProxyRegBF16x2 : ProxyRegInst<"b32", v2bf16, Float16x2Regs>; + def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Int32Regs>; + def ProxyRegBF16x2 : ProxyRegInst<"b32", v2bf16, Int32Regs>; } // @@ -2509,8 +2520,8 @@ defm LD_i16 : LD; defm LD_i32 : LD; defm LD_i64 : LD; - defm LD_f16 : LD; - defm LD_f16x2 : LD; + defm LD_f16 : LD; + defm LD_f16x2 : LD; defm LD_f32 : LD; defm LD_f64 : LD; } @@ -2559,8 +2570,8 @@ defm ST_i16 : ST; defm ST_i32 : ST; defm ST_i64 : ST; - defm ST_f16 : ST; - defm ST_f16x2 : ST; + defm ST_f16 : ST; + defm ST_f16x2 : ST; defm ST_f32 : ST; defm ST_f64 : ST; } @@ -2647,8 +2658,8 @@ defm LDV_i16 : LD_VEC; defm LDV_i32 : LD_VEC; defm LDV_i64 : LD_VEC; - defm LDV_f16 : LD_VEC; - defm LDV_f16x2 : LD_VEC; + defm LDV_f16 : LD_VEC; + defm LDV_f16x2 : LD_VEC; defm LDV_f32 : LD_VEC; defm LDV_f64 : LD_VEC; } @@ -2742,8 +2753,8 @@ defm STV_i16 : ST_VEC; defm STV_i32 : ST_VEC; defm STV_i64 : ST_VEC; - defm STV_f16 : ST_VEC; - defm STV_f16x2 : ST_VEC; + defm STV_f16 : ST_VEC; + defm STV_f16x2 : ST_VEC; defm STV_f32 : ST_VEC; defm STV_f64 : ST_VEC; } @@ -2840,24 +2851,24 @@ // f16 -> sint -def : Pat<(i1 (fp_to_sint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s16_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i32 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s32_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i64 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))), + (SETP_b16ri (BITCONVERT_16_F2I Int16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s64_f16 Int16Regs:$a, CvtRZI)>; // f16 -> uint -def : Pat<(i1 (fp_to_uint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u16_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u32_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))), + (SETP_b16ri (BITCONVERT_16_F2I Int16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u16_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u32_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u64_f16 Int16Regs:$a, CvtRZI)>; // f32 -> sint def : Pat<(i1 (fp_to_sint Float32Regs:$a)), @@ -2994,17 +3005,17 @@ // Select instructions with 32-bit predicates -def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), +def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b), (SELP_b16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), +def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), (SELP_b64rr Int64Regs:$a, Int64Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, (f16 Float16Regs:$a), (f16 Float16Regs:$b)), - (SELP_f16rr Float16Regs:$a, Float16Regs:$b, +def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)), + (SELP_f16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), (SELP_f32rr Float32Regs:$a, Float32Regs:$b, @@ -3070,39 +3081,39 @@ // Extract element of f16x2 register. PTX does not provide any way // to access elements of f16x2 vector directly, so we need to // extract it using a temporary register. - def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), + def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst), + (ins Int32Regs:$src), "{{ .reg .b16 \t%tmp_hi;\n\t" " mov.b32 \t{$dst, %tmp_hi}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 0))]>; - def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), + [(set Int16Regs:$dst, + (extractelt (v2f16 Int32Regs:$src), 0))]>; + def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst), + (ins Int32Regs:$src), "{{ .reg .b16 \t%tmp_lo;\n\t" " mov.b32 \t{%tmp_lo, $dst}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 1))]>; + [(set Int16Regs:$dst, + (extractelt (v2f16 Int32Regs:$src), 1))]>; // Coalesce two f16 registers into f16x2 - def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + def BuildF16x2 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), "mov.b32 \t$dst, {{$a, $b}};", - [(set (v2f16 Float16x2Regs:$dst), - (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>; + [(set (v2f16 Int32Regs:$dst), + (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>; // Directly initializing underlying the b32 register is one less SASS // instruction than than vector-packing move. - def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src), + def BuildF16x2i : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), "mov.b32 \t$dst, $src;", []>; // Split f16x2 into two f16 registers. - def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), - (ins Float16x2Regs:$src), + def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), + (ins Int32Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; // Split an i32 into two f16 - def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), (ins Int32Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; @@ -3186,14 +3197,14 @@ (CVT_f32_f64 Float64Regs:$a, CvtRN)>; // fpextend f16 -> f32 -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; // fpextend f16 -> f64 -def : Pat<(f64 (fpextend (f16 Float16Regs:$a))), - (CVT_f64_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f64 (fpextend (f16 Int16Regs:$a))), + (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; // fpextend f32 -> f64 def : Pat<(f64 (fpextend Float32Regs:$a)), @@ -3207,8 +3218,8 @@ // fceil, ffloor, froundeven, ftrunc. multiclass CVT_ROUND { - def : Pat<(OpNode (f16 Float16Regs:$a)), - (CVT_f16_f16 Float16Regs:$a, Mode)>; + def : Pat<(OpNode (f16 Int16Regs:$a)), + (CVT_f16_f16 Int16Regs:$a, Mode)>; def : Pat<(OpNode Float32Regs:$a), (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>; def : Pat<(OpNode Float32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -686,46 +686,46 @@ multiclass MIN_MAX { foreach P = [ MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, - int_nvvm_fmax_f16), Float16Regs>, + int_nvvm_fmax_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, - int_nvvm_fmax_ftz_f16), Float16Regs>, + int_nvvm_fmax_ftz_f16), Int16Regs>, MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, - int_nvvm_fmax_nan_f16), Float16Regs>, + int_nvvm_fmax_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>, + int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16, - int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX<72>, hasSM<86>]>, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, - int_nvvm_fmax_f16x2), Float16x2Regs>, + int_nvvm_fmax_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>, + int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>, + int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>, + int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, @@ -933,9 +933,9 @@ def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", - Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; + Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", - Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; + Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; @@ -986,30 +986,30 @@ FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, - FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs, + FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, - Float16x2Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs, + Int32Regs, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, - Float16x2Regs, [hasPTX<70>, hasSM<80>]>, + Int32Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, @@ -2159,8 +2159,8 @@ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; -defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; +defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Int16Regs>; +defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; @@ -2212,9 +2212,9 @@ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDU_G_v2f16_ELE - : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2f16x2_ELE - : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; + : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; defm INT_PTX_LDU_G_v2i64_ELE @@ -2231,10 +2231,10 @@ Int32Regs>; defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", - Float16Regs>; + Int16Regs>; defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", - Float16x2Regs>; + Int32Regs>; defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; @@ -2275,9 +2275,9 @@ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; defm INT_PTX_LDG_GLOBAL_f16 - : LDG_G<"b16 \t$result, [$src];", Float16Regs>; + : LDG_G<"b16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDG_GLOBAL_f16x2 - : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; + : LDG_G<"b32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDG_GLOBAL_f64 @@ -2334,9 +2334,9 @@ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v2f16_ELE - : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v2f16x2_ELE - : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; + : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; defm INT_PTX_LDG_G_v2i64_ELE @@ -2350,9 +2350,9 @@ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v4f16_ELE - : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v4f16x2_ELE - : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; + : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; @@ -6305,7 +6305,7 @@ : WMMA_REGS { // NVPTX register types used to carry fragment data. NVPTXRegClass regclass = !cond( - !eq(ptx_elt_type, "f16") : Float16x2Regs, + !eq(ptx_elt_type, "f16") : Int32Regs, !eq(ptx_elt_type, "f32") : Float32Regs, !eq(ptx_elt_type, "f64") : Float64Regs, !eq(ptx_elt_type, "bf16") : Int32Regs, diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -29,14 +29,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return ".f32"; - if (RC == &NVPTX::Float16RegsRegClass) - // Ideally fp16 registers should be .f16, but this syntax is only - // supported on sm_53+. On the other hand, .b16 registers are - // accepted for all supported fp16 instructions on all GPU - // variants, so we can use them instead. - return ".b16"; - if (RC == &NVPTX::Float16x2RegsRegClass) - return ".b32"; if (RC == &NVPTX::Float64RegsRegClass) return ".f64"; if (RC == &NVPTX::Int64RegsRegClass) @@ -73,10 +65,6 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return "%f"; - if (RC == &NVPTX::Float16RegsRegClass) - return "%h"; - if (RC == &NVPTX::Float16x2RegsRegClass) - return "%hh"; if (RC == &NVPTX::Float64RegsRegClass) return "%fd"; if (RC == &NVPTX::Int64RegsRegClass) diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -57,11 +57,11 @@ // Register classes //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; -def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>; -def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; +def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; +def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16], 32, + (add (sequence "R%u", 0, 4), + VRFrame32, VRFrameLocal32)>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; -def Float16Regs : NVPTXRegClass<[f16,bf16], 16, (add (sequence "H%u", 0, 4))>; -def Float16x2Regs : NVPTXRegClass<[v2f16,v2bf16], 32, (add (sequence "HH%u", 0, 4))>; def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>; def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>; diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll --- a/llvm/test/CodeGen/NVPTX/bf16.ll +++ b/llvm/test/CodeGen/NVPTX/bf16.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_bfloat -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,8 +14,8 @@ %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2+0], %h1; -; CHECK: st.param.b16 [param2+2], %h2; +; CHECK: st.param.b16 [param2+0], %rs1; +; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE, @@ -37,8 +37,8 @@ define void @boom() { %fp = call ptr @usefp(ptr @callee) ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0+0], %h1; - ; CHECK: st.param.b16 [param0+2], %h2; + ; CHECK: st.param.b16 [param0+0], %rs1; + ; CHECK: st.param.b16 [param0+2], %rs2; ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -43,7 +43,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( -; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00; +; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_ret_const() #0 { @@ -51,14 +51,14 @@ } ; CHECK-LABEL: test_fadd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd(half %a, half %b) #0 { @@ -67,14 +67,14 @@ } ; CHECK-LABEL: test_fadd_v1f16( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { @@ -84,14 +84,14 @@ ; Check that we can lower fadd with immediate arguments. ; CHECK-LABEL: test_fadd_imm_0( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_0(half %b) #0 { @@ -100,14 +100,14 @@ } ; CHECK-LABEL: test_fadd_imm_1( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_1(half %a) #0 { @@ -116,14 +116,14 @@ } ; CHECK-LABEL: test_fsub( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1]; -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fsub_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fsub_param_1]; +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fsub(half %a, half %b) #0 { @@ -132,15 +132,15 @@ } ; CHECK-LABEL: test_fneg( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0]; -; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; -; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fneg_param_0]; +; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; +; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fneg(half %a) #0 { @@ -149,14 +149,14 @@ } ; CHECK-LABEL: test_fmul( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1]; -; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmul_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmul_param_1]; +; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fmul(half %a, half %b) #0 { @@ -165,15 +165,15 @@ } ; CHECK-LABEL: test_fdiv( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fdiv_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fdiv_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fdiv(half %a, half %b) #0 { @@ -182,8 +182,8 @@ } ; CHECK-LABEL: test_frem( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_frem_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_frem_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; @@ -198,7 +198,7 @@ ; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; ; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]]; ; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RESULT]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_frem(half %a, half %b) #0 { @@ -207,7 +207,7 @@ } ; CHECK-LABEL: test_store( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_store_param_0]; ; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1]; ; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]]; ; CHECK-NEXT: ret; @@ -218,7 +218,7 @@ ; CHECK-LABEL: test_load( ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; -; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]]; +; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_load(ptr %a) #0 { @@ -243,8 +243,8 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK-LABEL: test_call( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -254,7 +254,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -264,8 +264,8 @@ } ; CHECK-LABEL: test_call_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -275,7 +275,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -285,8 +285,8 @@ } ; CHECK-LABEL: test_tailcall_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -296,7 +296,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -306,10 +306,10 @@ } ; CHECK-LABEL: test_select( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1]; ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select(half %a, half %b, i1 zeroext %c) #0 { @@ -318,15 +318,15 @@ } ; CHECK-LABEL: test_select_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] -; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { @@ -338,8 +338,8 @@ ; CHECK-LABEL: test_select_cc_f32_f16( ; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0]; ; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; @@ -355,13 +355,13 @@ } ; CHECK-LABEL: test_select_cc_f16_f32( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2]; ; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3]; ; CHECK-NOFTZ-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1]; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { @@ -371,8 +371,8 @@ } ; CHECK-LABEL: test_fcmp_une( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_une_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -387,8 +387,8 @@ } ; CHECK-LABEL: test_fcmp_ueq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16-NOFTZ: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.equ.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -403,8 +403,8 @@ } ; CHECK-LABEL: test_fcmp_ugt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16-NOFTZ: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gtu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -419,8 +419,8 @@ } ; CHECK-LABEL: test_fcmp_uge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16-NOFTZ: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.geu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -435,8 +435,8 @@ } ; CHECK-LABEL: test_fcmp_ult( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ult_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16-NOFTZ: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ltu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -451,8 +451,8 @@ } ; CHECK-LABEL: test_fcmp_ule( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ule_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16-NOFTZ: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.leu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -468,8 +468,8 @@ ; CHECK-LABEL: test_fcmp_uno( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uno_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16-NOFTZ: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.nan.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -484,8 +484,8 @@ } ; CHECK-LABEL: test_fcmp_one( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_one_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16-NOFTZ: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ne.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -500,8 +500,8 @@ } ; CHECK-LABEL: test_fcmp_oeq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16-NOFTZ: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.eq.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -516,8 +516,8 @@ } ; CHECK-LABEL: test_fcmp_ogt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16-NOFTZ: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -532,8 +532,8 @@ } ; CHECK-LABEL: test_fcmp_oge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16-NOFTZ: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ge.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -548,8 +548,8 @@ } ; XCHECK-LABEL: test_fcmp_olt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_olt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -564,8 +564,8 @@ } ; XCHECK-LABEL: test_fcmp_ole( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ole_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16-NOFTZ: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.le.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -580,8 +580,8 @@ } ; CHECK-LABEL: test_fcmp_ord( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ord_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16-NOFTZ: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.num.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -596,8 +596,8 @@ } ; CHECK-LABEL: test_br_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_br_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_br_cc_param_1]; ; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2]; ; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] @@ -623,10 +623,10 @@ ; CHECK-LABEL: test_phi( ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; -; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]]; +; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]]; ; CHECK: [[LOOP:\$L__BB[0-9_]+]]: -; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]]; -; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]]; +; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; +; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { ; CHECK: st.param.b64 [param0+0], %[[P1]]; ; CHECK: call.uni (retval0), @@ -651,7 +651,7 @@ declare i1 @test_dummy(ptr %p1) #0 ; CHECK-LABEL: test_fptosi_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0]; ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -661,7 +661,7 @@ } ; CHECK-LABEL: test_fptosi_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0]; ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -671,7 +671,7 @@ } ; CHECK-LABEL: test_fptoui_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0]; ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -681,7 +681,7 @@ } ; CHECK-LABEL: test_fptoui_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0]; ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -692,7 +692,7 @@ ; CHECK-LABEL: test_uitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; -; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32(i32 %a) #0 { @@ -702,7 +702,7 @@ ; CHECK-LABEL: test_uitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; -; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i64(i64 %a) #0 { @@ -712,7 +712,7 @@ ; CHECK-LABEL: test_sitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; -; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32(i32 %a) #0 { @@ -722,7 +722,7 @@ ; CHECK-LABEL: test_sitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; -; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i64(i64 %a) #0 { @@ -732,14 +732,14 @@ ; CHECK-LABEL: test_uitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { @@ -750,14 +750,14 @@ ; CHECK-LABEL: test_sitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { @@ -768,7 +768,7 @@ ; CHECK-LABEL: test_fptrunc_float( ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_float(float %a) #0 { @@ -778,7 +778,7 @@ ; CHECK-LABEL: test_fptrunc_double( ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; -; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_double(double %a) #0 { @@ -787,7 +787,7 @@ } ; CHECK-LABEL: test_fpext_float( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK: st.param.f32 [func_retval0+0], [[R]]; @@ -798,7 +798,7 @@ } ; CHECK-LABEL: test_fpext_double( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0]; ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; ; CHECK: st.param.f64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -809,7 +809,7 @@ ; CHECK-LABEL: test_bitcast_halftoi16( -; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0]; +; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0]; ; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]] ; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]] ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -821,7 +821,7 @@ ; CHECK-LABEL: test_bitcast_i16tohalf( ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; -; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]] +; CHECK: mov.b16 [[AH:%rs[0-9]+]], [[AS]] ; CHECK: st.param.b16 [func_retval0+0], [[AH]]; ; CHECK: ret; define half @test_bitcast_i16tohalf(i16 %a) #0 { @@ -855,12 +855,12 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 ; CHECK-LABEL: test_sqrt( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sqrt_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sqrt(half %a) #0 { @@ -876,11 +876,11 @@ ;} ; CHECK-LABEL: test_sin( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sin(half %a) #0 #1 { @@ -889,11 +889,11 @@ } ; CHECK-LABEL: test_cos( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_cos(half %a) #0 #1 { @@ -944,16 +944,16 @@ ;} ; CHECK-LABEL: test_fma( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fma_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fma_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret define half @test_fma(half %a, half %b, half %c) #0 { @@ -962,12 +962,12 @@ } ; CHECK-LABEL: test_fabs( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fabs_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: abs.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fabs(half %a) #0 { @@ -976,15 +976,15 @@ } ; CHECK-LABEL: test_minnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_minnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_minnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_minnum(half %a, half %b) #0 { @@ -993,15 +993,15 @@ } ; CHECK-LABEL: test_maxnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_maxnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_maxnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_maxnum(half %a, half %b) #0 { @@ -1010,14 +1010,14 @@ } ; CHECK-LABEL: test_copysign( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_param_1]; ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign(half %a, half %b) #0 { @@ -1026,7 +1026,7 @@ } ; CHECK-LABEL: test_copysign_f32( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1]; ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]]; @@ -1034,7 +1034,7 @@ ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; ; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign_f32(half %a, float %b) #0 { @@ -1044,7 +1044,7 @@ } ; CHECK-LABEL: test_copysign_f64( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1]; ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]]; @@ -1053,7 +1053,7 @@ ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign_f64(half %a, double %b) #0 { @@ -1063,14 +1063,14 @@ } ; CHECK-LABEL: test_copysign_extended( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_extended_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_extended_param_1]; ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[R]]; ; CHECK: st.param.f32 [func_retval0+0], [[XR]]; @@ -1082,8 +1082,8 @@ } ; CHECK-LABEL: test_floor( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0]; -; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0]; +; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_floor(half %a) #0 { @@ -1092,8 +1092,8 @@ } ; CHECK-LABEL: test_ceil( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0]; -; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0]; +; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_ceil(half %a) #0 { @@ -1102,8 +1102,8 @@ } ; CHECK-LABEL: test_trunc( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0]; -; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0]; +; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_trunc(half %a) #0 { @@ -1112,8 +1112,8 @@ } ; CHECK-LABEL: test_rint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_rint(half %a) #0 { @@ -1122,8 +1122,8 @@ } ; CHECK-LABEL: test_nearbyint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_nearbyint(half %a) #0 { @@ -1132,8 +1132,8 @@ } ; CHECK-LABEL: test_roundeven( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_roundeven_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_roundeven(half %a) #0 { @@ -1154,16 +1154,16 @@ } ; CHECK-LABEL: test_fmuladd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmuladd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmuladd_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fmuladd(half %a, half %b, half %c) #0 { diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -32,7 +32,7 @@ ; CHECK-LABEL: test_ret_const( ; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184; -; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]]; +; CHECK: mov.b32 [[R:%r[0-9+]]], [[T]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_ret_const() #0 { @@ -40,8 +40,8 @@ } ; CHECK-LABEL: test_extract_0( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; -; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_0_param_0]; +; CHECK: mov.b32 {[[R:%rs[0-9]+]], %tmp_hi}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_0(<2 x half> %a) #0 { @@ -50,8 +50,8 @@ } ; CHECK-LABEL: test_extract_1( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; -; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_1_param_0]; +; CHECK: mov.b32 {%tmp_lo, [[R:%rs[0-9]+]]}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_1(<2 x half> %a) #0 { @@ -60,11 +60,11 @@ } ; CHECK-LABEL: test_extract_i( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_extract_i_param_0]; ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; -; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { @@ -73,22 +73,22 @@ } ; CHECK-LABEL: test_fadd( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_param_1]; ; -; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-F16-NEXT: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -99,20 +99,20 @@ ; Check that we can lower fadd with immediate arguments. ; CHECK-LABEL: test_fadd_imm_0( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_imm_0_param_0]; ; ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; -; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; -; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; +; CHECK-F16: mov.b32 [[IHH:%r[0-9+]]], [[I]]; +; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[IHH]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -122,20 +122,20 @@ } ; CHECK-LABEL: test_fadd_imm_1( -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_imm_1_param_0]; ; ; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; -; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; -; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; +; CHECK-F16: mov.b32 [[IHH:%r[0-9+]]], [[I]]; +; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[IHH]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -145,22 +145,22 @@ } ; CHECK-LABEL: test_fsub( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fsub_param_0]; ; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; -; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fsub_param_1]; +; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -170,21 +170,21 @@ } ; CHECK-LABEL: test_fneg( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fneg_param_0]; ; ; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0; -; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; -; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; +; CHECK-F16: mov.b32 [[IHH0:%r[0-9+]]], [[I0]]; +; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[IHH0]], [[A]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -194,21 +194,21 @@ } ; CHECK-LABEL: test_fmul( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; -; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmul_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmul_param_1]; +; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -218,19 +218,19 @@ } ; CHECK-LABEL: test_fdiv( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fdiv_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fdiv_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; -; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]; +; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { @@ -240,11 +240,11 @@ ; CHECK-LABEL: test_frem( ; -- Load two 16x2 inputs and split them into f16 elements -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_frem_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_frem_param_1]; ; -- Split into elements -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; -- promote to f32. ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; @@ -265,10 +265,10 @@ ; CHECK-DAG: testp.infinite.f32 [[ISB1INF:%p[0-9]+]], [[FB1]]; ; CHECK-DAG: selp.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RFNINF1]], [[ISB1INF]]; ; -- convert back to f16. -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; -- merge into f16x2 and return it. -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { @@ -279,8 +279,8 @@ ; CHECK-LABEL: .func test_ldst_v2f16( ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; -; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] -; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; +; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]] +; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]]; ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; ; CHECK: ret; define void @test_ldst_v2f16(ptr %a, ptr %b) { @@ -309,7 +309,7 @@ ; CHECK-LABEL: .func test_ldst_v4f16( ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; -; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; +; CHECK-DAG: ld.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]]; ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: ret; define void @test_ldst_v4f16(ptr %a, ptr %b) { @@ -333,8 +333,8 @@ declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 ; CHECK-LABEL: test_call( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -344,7 +344,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -354,8 +354,8 @@ } ; CHECK-LABEL: test_call_flipped( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_flipped_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -365,7 +365,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -375,8 +375,8 @@ } ; CHECK-LABEL: test_tailcall_flipped( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_tailcall_flipped_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_tailcall_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -386,7 +386,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -396,11 +396,11 @@ } ; CHECK-LABEL: test_select( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_param_1]; ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; -; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { @@ -409,15 +409,15 @@ } ; CHECK-LABEL: test_select_cc( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2]; -; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_param_2]; +; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_param_3]; ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] ; -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -425,11 +425,11 @@ ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] ; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { @@ -441,12 +441,12 @@ ; CHECK-LABEL: test_select_cc_f32_f16( ; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2]; -; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_f32_f16_param_2]; +; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_f32_f16_param_3]; ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -466,17 +466,17 @@ } ; CHECK-LABEL: test_select_cc_f16_f32( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_f16_f32_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_f16_f32_param_1]; ; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2]; ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, @@ -487,11 +487,11 @@ } ; CHECK-LABEL: test_fcmp_une( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_une_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -509,11 +509,11 @@ } ; CHECK-LABEL: test_fcmp_ueq( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ueq_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -531,11 +531,11 @@ } ; CHECK-LABEL: test_fcmp_ugt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ugt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -553,11 +553,11 @@ } ; CHECK-LABEL: test_fcmp_uge( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uge_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -575,11 +575,11 @@ } ; CHECK-LABEL: test_fcmp_ult( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ult_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -597,11 +597,11 @@ } ; CHECK-LABEL: test_fcmp_ule( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ule_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -620,11 +620,11 @@ ; CHECK-LABEL: test_fcmp_uno( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uno_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -642,11 +642,11 @@ } ; CHECK-LABEL: test_fcmp_one( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_one_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -664,11 +664,11 @@ } ; CHECK-LABEL: test_fcmp_oeq( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oeq_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -686,11 +686,11 @@ } ; CHECK-LABEL: test_fcmp_ogt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ogt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -708,11 +708,11 @@ } ; CHECK-LABEL: test_fcmp_oge( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oge_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -730,11 +730,11 @@ } ; CHECK-LABEL: test_fcmp_olt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_olt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -752,11 +752,11 @@ } ; XCHECK-LABEL: test_fcmp_ole( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ole_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -774,11 +774,11 @@ } ; CHECK-LABEL: test_fcmp_ord( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ord_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -796,8 +796,8 @@ } ; CHECK-LABEL: test_fptosi_i32( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i32_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -808,8 +808,8 @@ } ; CHECK-LABEL: test_fptosi_i64( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i64_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -820,8 +820,8 @@ } ; CHECK-LABEL: test_fptoui_2xi32( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi32_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -832,8 +832,8 @@ } ; CHECK-LABEL: test_fptoui_2xi64( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi64_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -845,9 +845,9 @@ ; CHECK-LABEL: test_uitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.u32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { @@ -857,9 +857,9 @@ ; CHECK-LABEL: test_uitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.u64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { @@ -869,9 +869,9 @@ ; CHECK-LABEL: test_sitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.s32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { @@ -881,9 +881,9 @@ ; CHECK-LABEL: test_sitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.s64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { @@ -893,23 +893,23 @@ ; CHECK-LABEL: test_uitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; +; CHECK-DAG: cvt.rn.f16.u32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C1:%rs[0-9]+]], [[A1]]; -; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} -; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; +; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]} +; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -921,23 +921,23 @@ ; CHECK-LABEL: test_sitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; +; CHECK-DAG: cvt.rn.f16.s32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C1:%rs[0-9]+]], [[A1]]; ; -; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} -; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; +; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]} +; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -949,9 +949,9 @@ ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { @@ -961,9 +961,9 @@ ; CHECK-LABEL: test_fptrunc_2xdouble( ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; -; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { @@ -972,8 +972,8 @@ } ; CHECK-LABEL: test_fpext_2xfloat( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xfloat_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; @@ -984,8 +984,8 @@ } ; CHECK-LABEL: test_fpext_2xdouble( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xdouble_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; @@ -1013,7 +1013,7 @@ ; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]]; ; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16; ; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], [[R1H0L]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { @@ -1023,7 +1023,7 @@ ; CHECK-LABEL: test_bitcast_float_to_2xhalf( ; CHECK: ld.param.f32 [[AF1:%f[0-9]+]], [test_bitcast_float_to_2xhalf_param_0]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], [[AF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], [[AF1]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 { @@ -1066,15 +1066,15 @@ declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 ; CHECK-LABEL: test_sqrt( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sqrt_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sqrt(<2 x half> %a) #0 { @@ -1090,15 +1090,15 @@ ;} ; CHECK-LABEL: test_sin( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sin(<2 x half> %a) #0 #1 { @@ -1107,15 +1107,15 @@ } ; CHECK-LABEL: test_cos( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_cos(<2 x half> %a) #0 #1 { @@ -1166,15 +1166,15 @@ ;} ; CHECK-LABEL: test_fma( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fma_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fma_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fma_param_2]; ; -; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1183,9 +1183,9 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret @@ -1195,15 +1195,15 @@ } ; CHECK-LABEL: test_fabs( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fabs(<2 x half> %a) #0 { @@ -1212,19 +1212,19 @@ } ; CHECK-LABEL: test_minnum( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_minnum_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_minnum_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { @@ -1233,19 +1233,19 @@ } ; CHECK-LABEL: test_maxnum( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_maxnum_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_maxnum_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { @@ -1254,10 +1254,10 @@ } ; CHECK-LABEL: test_copysign( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; @@ -1268,9 +1268,9 @@ ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { @@ -1279,9 +1279,9 @@ } ; CHECK-LABEL: test_copysign_f32( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; @@ -1294,9 +1294,9 @@ ; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; } ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { @@ -1306,9 +1306,9 @@ } ; CHECK-LABEL: test_copysign_f64( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; @@ -1323,9 +1323,9 @@ ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { @@ -1335,10 +1335,10 @@ } ; CHECK-LABEL: test_copysign_extended( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; @@ -1349,10 +1349,10 @@ ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; @@ -1364,11 +1364,11 @@ } ; CHECK-LABEL: test_floor( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_floor_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_floor(<2 x half> %a) #0 { @@ -1377,11 +1377,11 @@ } ; CHECK-LABEL: test_ceil( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_ceil_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_ceil(<2 x half> %a) #0 { @@ -1390,11 +1390,11 @@ } ; CHECK-LABEL: test_trunc( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_trunc_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_trunc(<2 x half> %a) #0 { @@ -1403,11 +1403,11 @@ } ; CHECK-LABEL: test_rint( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_rint_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_rint(<2 x half> %a) #0 { @@ -1416,11 +1416,11 @@ } ; CHECK-LABEL: test_nearbyint( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_nearbyint_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_nearbyint(<2 x half> %a) #0 { @@ -1429,11 +1429,11 @@ } ; CHECK-LABEL: test_roundeven( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_roundeven_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_roundeven_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_roundeven(<2 x half> %a) #0 { @@ -1456,15 +1456,15 @@ } ; CHECK-LABEL: test_fmuladd( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmuladd_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmuladd_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fmuladd_param_2]; ; -; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1473,9 +1473,9 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1485,16 +1485,16 @@ } ; CHECK-LABEL: test_shufflevector( -; CHECK: mov.b32 {%h1, %h2}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs1, %rs2}, %r1; +; CHECK: mov.b32 %r2, {%rs2, %rs1}; define <2 x half> @test_shufflevector(<2 x half> %a) #0 { %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> ret <2 x half> %s } ; CHECK-LABEL: test_insertelement( -; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs2, %tmp_hi}, %r1; +; CHECK: mov.b32 %r2, {%rs2, %rs1}; define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { %i = insertelement <2 x half> %a, half %x, i64 1 ret <2 x half> %i diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll --- a/llvm/test/CodeGen/NVPTX/half.ll +++ b/llvm/test/CodeGen/NVPTX/half.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in store half %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_half -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in %val_int = bitcast half %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py --- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -28,8 +28,8 @@ "i16": "r", "i32": "r", "i64": "rd", - "half": "h", - "<2 x half>": "hh", + "half": "rs", + "<2 x half>": "r", "float": "f", "double": "fd", } diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -384,13 +384,13 @@ ; CHECK: .func (.param .b32 func_retval0) ; CHECK-LABEL: test_f16( ; CHECK-NEXT: .param .b32 test_f16_param_0 -; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; +; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .b32 param0; ; CHECK: st.param.b16 [param0+0], [[E]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]] ; CHECK-NEXT: ret; define half @test_f16(half %a) { @@ -401,13 +401,13 @@ ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v2f16( ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] -; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0]; +; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0+0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v2f16, -; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK: st.param.b32 [func_retval0+0], [[R]] ; CHECK-NEXT: ret; define <2 x half> @test_v2f16(<2 x half> %a) { @@ -418,17 +418,17 @@ ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v3f16( ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] -; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; +; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; +; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; @@ -441,14 +441,14 @@ ; CHECK-LABEL: test_v4f16( ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; -; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; -; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; +; CHECK-DAG: mov.b32 [[HH01:%r[0-9]+]], [[R01]]; +; CHECK-DAG: mov.b32 [[HH23:%r[0-9]+]], [[R23]]; ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v4f16, -; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; ; CHECK: ret; define <4 x half> @test_v4f16(<4 x half> %a) { @@ -459,17 +459,17 @@ ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; @@ -482,16 +482,16 @@ ; CHECK-LABEL: test_v8f16( ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; -; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; -; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; -; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]]; -; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]]; +; CHECK-DAG: mov.b32 [[HH01:%r[0-9]+]], [[R01]]; +; CHECK-DAG: mov.b32 [[HH23:%r[0-9]+]], [[R23]]; +; CHECK-DAG: mov.b32 [[HH45:%r[0-9]+]], [[R45]]; +; CHECK-DAG: mov.b32 [[HH67:%r[0-9]+]], [[R67]]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v8f16, -; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0]; ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; ; CHECK: ret; define <8 x half> @test_v8f16(<8 x half> %a) { @@ -502,9 +502,9 @@ ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) ; CHECK-LABEL: test_v9f16( ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; -; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; -; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; +; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; +; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.v4.b16 [param0+8], @@ -512,9 +512,9 @@ ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK: test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; -; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; +; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; @@ -915,13 +915,13 @@ ; CHECK: .func (.param .align 2 .b8 func_retval0[2]) ; CHECK-LABEL: test_s_f16( ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0+0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni ; CHECK-NEXT: test_s_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define %s_f16 @test_s_f16(%s_f16 %a) { diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -92,10 +92,10 @@ define half @check_f16() { ; PTX-LABEL: check_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0+0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%rs[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]]; ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]]; @@ -155,10 +155,10 @@ define <2 x half> @check_vec_f16() { ; PTX-LABEL: check_vec_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; diff --git a/llvm/test/CodeGen/NVPTX/wmma.py b/llvm/test/CodeGen/NVPTX/wmma.py --- a/llvm/test/CodeGen/NVPTX/wmma.py +++ b/llvm/test/CodeGen/NVPTX/wmma.py @@ -118,7 +118,7 @@ }[ptx_type] self.ptx_reg_pattern = { - "f16": "%hh[0-9]+", + "f16": "%r[0-9]+", "f32": "%f[0-9]+", "f64": "%fd[0-9]+", }.get(ptx_type, "%r[0-9]+")