diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -309,8 +309,6 @@ Ret = (5 << 28); } else if (RC == &NVPTX::Float64RegsRegClass) { Ret = (6 << 28); - } else if (RC == &NVPTX::Float16RegsRegClass) { - Ret = (7 << 28); } else if (RC == &NVPTX::Float16x2RegsRegClass) { Ret = (8 << 28); } else { diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -410,9 +410,9 @@ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); + addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass); addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); - addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass); + addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass); addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass); // Conversion to/from FP16/FP16x2 is always legal. diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -51,9 +51,6 @@ } else if (DestRC == &NVPTX::Int64RegsRegClass) { Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr : NVPTX::BITCONVERT_64_F2I); - } else if (DestRC == &NVPTX::Float16RegsRegClass) { - Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr - : NVPTX::BITCONVERT_16_I2F); } else if (DestRC == &NVPTX::Float16x2RegsRegClass) { Op = NVPTX::IMOV32rr; } else if (DestRC == &NVPTX::Float32RegsRegClass) { diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,9 +164,9 @@ !eq(name, "i16"): Int16Regs, !eq(name, "i32"): Int32Regs, !eq(name, "i64"): Int64Regs, - !eq(name, "f16"): Float16Regs, + !eq(name, "f16"): Int16Regs, !eq(name, "v2f16"): Float16x2Regs, - !eq(name, "bf16"): Float16Regs, + !eq(name, "bf16"): Int16Regs, !eq(name, "v2bf16"): Float16x2Regs, !eq(name, "f32"): Float32Regs, !eq(name, "f64"): Float64Regs, @@ -280,16 +280,16 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math]>; def f16x2rr_ftz : @@ -354,16 +354,16 @@ Requires<[allowFMA]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA]>; def f16x2rr_ftz : @@ -417,16 +417,16 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, Requires<[noFMA]>; def _rnf16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA]>; def _rnf16x2rr_ftz : NVPTXInst<(outs Float16x2Regs:$dst), @@ -513,7 +513,7 @@ FromName, ".u64 \t$dst, $src;"), []>; def _f16 : NVPTXInst<(outs RC:$dst), - (ins Float16Regs:$src, CvtMode:$mode), + (ins Int16Regs:$src, CvtMode:$mode), !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", FromName, ".f16 \t$dst, $src;"), []>; def _f32 : @@ -537,7 +537,7 @@ defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; - defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>; + defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; @@ -640,7 +640,7 @@ defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>; defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; -defm SELP_f16 : SELP_PATTERN<"b16", f16, Float16Regs, f16imm, fpimm>; +defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>; defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; @@ -783,13 +783,13 @@ def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; // Matchers for signed, unsigned mul.wide ISD nodes. -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, +def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), + (MULWIDES32 i16:$a, i16:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), (MULWIDES32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), +def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), @@ -1003,7 +1003,7 @@ // fp16 immediate values in .f16 instructions. Instead we have to load // the constant into a register using mov.b16. def LOAD_CONST_F16 : - NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a), + NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a), "mov.b16 \t$dst, $a;", []>; defm FADD : F3_fma_component<"add", fadd>; @@ -1028,8 +1028,8 @@ !strconcat(OpcStr, " \t$dst, $src;"), [(set RC:$dst, (fneg (T RC:$src)))]>, Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; -def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Float16Regs, doF32FTZ>; -def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Float16Regs, True>; +def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; +def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Float16x2Regs, True>; @@ -1211,8 +1211,8 @@ Requires<[useFP16Math, Pred]>; } -defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Float16Regs, doF32FTZ>; -defm FMA16 : FMA_F16<"fma.rn.f16", f16, Float16Regs, True>; +defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>; +defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>; defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Float16x2Regs, True>; defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; @@ -1651,7 +1651,7 @@ defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; def SETP_f16rr : NVPTXInst<(outs Int1Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp), + (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", []>, Requires<[useFP16Math]>; @@ -1690,7 +1690,7 @@ defm SET_b64 : SET<"b64", Int64Regs, i64imm>; defm SET_s64 : SET<"s64", Int64Regs, i64imm>; defm SET_u64 : SET<"u64", Int64Regs, i64imm>; -defm SET_f16 : SET<"f16", Float16Regs, f16imm>; +defm SET_f16 : SET<"f16", Int16Regs, f16imm>; defm SET_f32 : SET<"f32", Float32Regs, f32imm>; defm SET_f64 : SET<"f64", Float64Regs, f64imm>; @@ -1760,7 +1760,7 @@ def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), "mov.u64 \t$dst, $sss;", []>; - def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src), + def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), // We have to use .b16 here as there's no mov.f16. "mov.b16 \t$dst, $src;", []>; def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), @@ -1824,7 +1824,7 @@ Instruction set_64ri, Instruction set_64ir> { // i16 -> pred - def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i1 (OpNode i16:$a, i16:$b)), (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), (setp_16ri Int16Regs:$a, imm:$b, Mode)>; @@ -1846,7 +1846,7 @@ (setp_64ir imm:$a, Int64Regs:$b, Mode)>; // i16 -> i32 - def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i32 (OpNode i16:$a, i16:$b)), (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), (set_16ri Int16Regs:$a, imm:$b, Mode)>; @@ -1926,23 +1926,23 @@ multiclass FSET_FORMAT { // f16 -> pred - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> pred @@ -1971,23 +1971,23 @@ (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; // f16 -> i32 - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> i32 @@ -2276,15 +2276,15 @@ def LoadParamMemV4I32 : LoadParamV4MemInst; def LoadParamMemV4I16 : LoadParamV4MemInst; def LoadParamMemV4I8 : LoadParamV4MemInst; -def LoadParamMemF16 : LoadParamMemInst; +def LoadParamMemF16 : LoadParamMemInst; def LoadParamMemF16x2 : LoadParamMemInst; def LoadParamMemF32 : LoadParamMemInst; def LoadParamMemF64 : LoadParamMemInst; -def LoadParamMemV2F16 : LoadParamV2MemInst; +def LoadParamMemV2F16 : LoadParamV2MemInst; def LoadParamMemV2F16x2: LoadParamV2MemInst; def LoadParamMemV2F32 : LoadParamV2MemInst; def LoadParamMemV2F64 : LoadParamV2MemInst; -def LoadParamMemV4F16 : LoadParamV4MemInst; +def LoadParamMemV4F16 : LoadParamV4MemInst; def LoadParamMemV4F16x2: LoadParamV4MemInst; def LoadParamMemV4F32 : LoadParamV4MemInst; @@ -2302,15 +2302,15 @@ def StoreParamV4I16 : StoreParamV4Inst; def StoreParamV4I8 : StoreParamV4Inst; -def StoreParamF16 : StoreParamInst; +def StoreParamF16 : StoreParamInst; def StoreParamF16x2 : StoreParamInst; def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; -def StoreParamV2F16 : StoreParamV2Inst; +def StoreParamV2F16 : StoreParamV2Inst; def StoreParamV2F16x2 : StoreParamV2Inst; def StoreParamV2F32 : StoreParamV2Inst; def StoreParamV2F64 : StoreParamV2Inst; -def StoreParamV4F16 : StoreParamV4Inst; +def StoreParamV4F16 : StoreParamV4Inst; def StoreParamV4F16x2 : StoreParamV4Inst; def StoreParamV4F32 : StoreParamV4Inst; @@ -2328,14 +2328,14 @@ def StoreRetvalF64 : StoreRetvalInst; def StoreRetvalF32 : StoreRetvalInst; -def StoreRetvalF16 : StoreRetvalInst; +def StoreRetvalF16 : StoreRetvalInst; def StoreRetvalF16x2 : StoreRetvalInst; def StoreRetvalV2F64 : StoreRetvalV2Inst; def StoreRetvalV2F32 : StoreRetvalV2Inst; -def StoreRetvalV2F16 : StoreRetvalV2Inst; +def StoreRetvalV2F16 : StoreRetvalV2Inst; def StoreRetvalV2F16x2: StoreRetvalV2Inst; def StoreRetvalV4F32 : StoreRetvalV4Inst; -def StoreRetvalV4F16 : StoreRetvalV4Inst; +def StoreRetvalV4F16 : StoreRetvalV4Inst; def StoreRetvalV4F16x2: StoreRetvalV4Inst; def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; @@ -2347,19 +2347,30 @@ NVPTXInst<(outs), (ins regclass:$a), "$a, ", [(CallArg (i32 0), regclass:$a)]>; +class CallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), vt:$a)]>; + class LastCallArgInst : NVPTXInst<(outs), (ins regclass:$a), "$a", [(LastCallArg (i32 0), regclass:$a)]>; +class LastCallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), vt:$a)]>; def CallArgI64 : CallArgInst; def CallArgI32 : CallArgInst; -def CallArgI16 : CallArgInst; +def CallArgI16 : CallArgInstVT; +def CallArgF16 : CallArgInstVT; +def CallArgBF16 : CallArgInstVT; def CallArgF64 : CallArgInst; def CallArgF32 : CallArgInst; def LastCallArgI64 : LastCallArgInst; def LastCallArgI32 : LastCallArgInst; -def LastCallArgI16 : LastCallArgInst; +def LastCallArgI16 : LastCallArgInstVT; +def LastCallArgF16 : LastCallArgInstVT; +def LastCallArgBF16 : LastCallArgInstVT; def LastCallArgF64 : LastCallArgInst; def LastCallArgF32 : LastCallArgInst; @@ -2427,22 +2438,22 @@ def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.u16.u32 \t$dst, $src;", - [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; + "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ? + [(set i16:$dst, (MoveParam i16:$src))]>; def MoveParamF64 : MoveParamInst; def MoveParamF32 : MoveParamInst; -def MoveParamF16 : MoveParamInst; +def MoveParamF16 : MoveParamInst; -class PseudoUseParamInst : +class PseudoUseParamInst : NVPTXInst<(outs), (ins regclass:$src), "// Pseudo use of $src", - [(PseudoUseParam regclass:$src)]>; + [(PseudoUseParam vt:$src)]>; -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; class ProxyRegInst : NVPTXInst<(outs regclass:$dst), (ins regclass:$src), @@ -2454,8 +2465,8 @@ def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; - def ProxyRegF16 : ProxyRegInst<"b16", f16, Float16Regs>; - def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Float16Regs>; + def ProxyRegF16 : ProxyRegInst<"b16", f16, Int16Regs>; + def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Int16Regs>; def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Float16x2Regs>; @@ -2509,7 +2520,7 @@ defm LD_i16 : LD; defm LD_i32 : LD; defm LD_i64 : LD; - defm LD_f16 : LD; + defm LD_f16 : LD; defm LD_f16x2 : LD; defm LD_f32 : LD; defm LD_f64 : LD; @@ -2559,7 +2570,7 @@ defm ST_i16 : ST; defm ST_i32 : ST; defm ST_i64 : ST; - defm ST_f16 : ST; + defm ST_f16 : ST; defm ST_f16x2 : ST; defm ST_f32 : ST; defm ST_f64 : ST; @@ -2647,7 +2658,7 @@ defm LDV_i16 : LD_VEC; defm LDV_i32 : LD_VEC; defm LDV_i64 : LD_VEC; - defm LDV_f16 : LD_VEC; + defm LDV_f16 : LD_VEC; defm LDV_f16x2 : LD_VEC; defm LDV_f32 : LD_VEC; defm LDV_f64 : LD_VEC; @@ -2742,7 +2753,7 @@ defm STV_i16 : ST_VEC; defm STV_i32 : ST_VEC; defm STV_i64 : ST_VEC; - defm STV_f16 : ST_VEC; + defm STV_f16 : ST_VEC; defm STV_f16x2 : ST_VEC; defm STV_f32 : ST_VEC; defm STV_f64 : ST_VEC; @@ -2840,24 +2851,24 @@ // f16 -> sint -def : Pat<(i1 (fp_to_sint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s16_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i32 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s32_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i64 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))), + (SETP_b16ri (BITCONVERT_16_F2I Int16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s64_f16 Int16Regs:$a, CvtRZI)>; // f16 -> uint -def : Pat<(i1 (fp_to_uint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u16_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u32_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))), + (SETP_b16ri (BITCONVERT_16_F2I Int16Regs:$a), 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u16_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u32_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u64_f16 Int16Regs:$a, CvtRZI)>; // f32 -> sint def : Pat<(i1 (fp_to_sint Float32Regs:$a)), @@ -2994,7 +3005,7 @@ // Select instructions with 32-bit predicates -def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), +def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b), (SELP_b16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), @@ -3003,8 +3014,8 @@ def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), (SELP_b64rr Int64Regs:$a, Int64Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, (f16 Float16Regs:$a), (f16 Float16Regs:$b)), - (SELP_f16rr Float16Regs:$a, Float16Regs:$b, +def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)), + (SELP_f16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), (SELP_f32rr Float32Regs:$a, Float32Regs:$b, @@ -3070,25 +3081,25 @@ // Extract element of f16x2 register. PTX does not provide any way // to access elements of f16x2 vector directly, so we need to // extract it using a temporary register. - def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst), + def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst), (ins Float16x2Regs:$src), "{{ .reg .b16 \t%tmp_hi;\n\t" " mov.b32 \t{$dst, %tmp_hi}, $src; }}", - [(set Float16Regs:$dst, + [(set Int16Regs:$dst, (extractelt (v2f16 Float16x2Regs:$src), 0))]>; - def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst), + def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst), (ins Float16x2Regs:$src), "{{ .reg .b16 \t%tmp_lo;\n\t" " mov.b32 \t{%tmp_lo, $dst}, $src; }}", - [(set Float16Regs:$dst, + [(set Int16Regs:$dst, (extractelt (v2f16 Float16x2Regs:$src), 1))]>; // Coalesce two f16 registers into f16x2 def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + (ins Int16Regs:$a, Int16Regs:$b), "mov.b32 \t$dst, {{$a, $b}};", [(set (v2f16 Float16x2Regs:$dst), - (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>; + (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>; // Directly initializing underlying the b32 register is one less SASS // instruction than than vector-packing move. @@ -3097,12 +3108,12 @@ []>; // Split f16x2 into two f16 registers. - def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), (ins Float16x2Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; // Split an i32 into two f16 - def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), (ins Int32Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; @@ -3186,14 +3197,14 @@ (CVT_f32_f64 Float64Regs:$a, CvtRN)>; // fpextend f16 -> f32 -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; // fpextend f16 -> f64 -def : Pat<(f64 (fpextend (f16 Float16Regs:$a))), - (CVT_f64_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f64 (fpextend (f16 Int16Regs:$a))), + (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; // fpextend f32 -> f64 def : Pat<(f64 (fpextend Float32Regs:$a)), @@ -3207,8 +3218,8 @@ // fceil, ffloor, froundeven, ftrunc. multiclass CVT_ROUND { - def : Pat<(OpNode (f16 Float16Regs:$a)), - (CVT_f16_f16 Float16Regs:$a, Mode)>; + def : Pat<(OpNode (f16 Int16Regs:$a)), + (CVT_f16_f16 Int16Regs:$a, Mode)>; def : Pat<(OpNode Float32Regs:$a), (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>; def : Pat<(OpNode Float32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -686,25 +686,25 @@ multiclass MIN_MAX { foreach P = [ MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, - int_nvvm_fmax_f16), Float16Regs>, + int_nvvm_fmax_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, - int_nvvm_fmax_ftz_f16), Float16Regs>, + int_nvvm_fmax_ftz_f16), Int16Regs>, MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, - int_nvvm_fmax_nan_f16), Float16Regs>, + int_nvvm_fmax_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>, + int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16, - int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX<72>, hasSM<86>]>, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, int_nvvm_fmax_f16x2), Float16x2Regs>, MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), @@ -933,7 +933,7 @@ def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", - Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; + Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; @@ -986,16 +986,16 @@ FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, - FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs, + FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, @@ -2159,7 +2159,7 @@ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; +defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; @@ -2212,7 +2212,7 @@ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDU_G_v2f16_ELE - : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2f16x2_ELE : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; defm INT_PTX_LDU_G_v2f32_ELE @@ -2231,7 +2231,7 @@ Int32Regs>; defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", - Float16Regs>; + Int16Regs>; defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; @@ -2275,7 +2275,7 @@ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; defm INT_PTX_LDG_GLOBAL_f16 - : LDG_G<"b16 \t$result, [$src];", Float16Regs>; + : LDG_G<"b16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDG_GLOBAL_f16x2 : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; defm INT_PTX_LDG_GLOBAL_f32 @@ -2334,7 +2334,7 @@ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v2f16_ELE - : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v2f16x2_ELE : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; defm INT_PTX_LDG_G_v2f32_ELE @@ -2350,7 +2350,7 @@ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v4f16_ELE - : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v4f16x2_ELE : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; defm INT_PTX_LDG_G_v4f32_ELE diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -29,12 +29,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return ".f32"; - if (RC == &NVPTX::Float16RegsRegClass) - // Ideally fp16 registers should be .f16, but this syntax is only - // supported on sm_53+. On the other hand, .b16 registers are - // accepted for all supported fp16 instructions on all GPU - // variants, so we can use them instead. - return ".b16"; if (RC == &NVPTX::Float16x2RegsRegClass) return ".b32"; if (RC == &NVPTX::Float64RegsRegClass) @@ -73,8 +67,6 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return "%f"; - if (RC == &NVPTX::Float16RegsRegClass) - return "%h"; if (RC == &NVPTX::Float16x2RegsRegClass) return "%hh"; if (RC == &NVPTX::Float64RegsRegClass) diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -57,10 +57,9 @@ // Register classes //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; -def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>; +def Int16Regs : NVPTXRegClass<[i16,f16,bf16], 16, (add (sequence "RS%u", 0, 4))>; def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; -def Float16Regs : NVPTXRegClass<[f16,bf16], 16, (add (sequence "H%u", 0, 4))>; def Float16x2Regs : NVPTXRegClass<[v2f16,v2bf16], 32, (add (sequence "HH%u", 0, 4))>; def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>; diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll --- a/llvm/test/CodeGen/NVPTX/bf16.ll +++ b/llvm/test/CodeGen/NVPTX/bf16.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_bfloat -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,8 +14,8 @@ %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2+0], %h1; -; CHECK: st.param.b16 [param2+2], %h2; +; CHECK: st.param.b16 [param2+0], %rs1; +; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE, @@ -37,8 +37,8 @@ define void @boom() { %fp = call ptr @usefp(ptr @callee) ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0+0], %h1; - ; CHECK: st.param.b16 [param0+2], %h2; + ; CHECK: st.param.b16 [param0+0], %rs1; + ; CHECK: st.param.b16 [param0+2], %rs2; ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -43,7 +43,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( -; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00; +; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_ret_const() #0 { @@ -51,14 +51,14 @@ } ; CHECK-LABEL: test_fadd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd(half %a, half %b) #0 { @@ -67,14 +67,14 @@ } ; CHECK-LABEL: test_fadd_v1f16( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { @@ -84,14 +84,14 @@ ; Check that we can lower fadd with immediate arguments. ; CHECK-LABEL: test_fadd_imm_0( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_0(half %b) #0 { @@ -100,14 +100,14 @@ } ; CHECK-LABEL: test_fadd_imm_1( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_1(half %a) #0 { @@ -116,14 +116,14 @@ } ; CHECK-LABEL: test_fsub( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1]; -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fsub_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fsub_param_1]; +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fsub(half %a, half %b) #0 { @@ -132,15 +132,15 @@ } ; CHECK-LABEL: test_fneg( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0]; -; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; -; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fneg_param_0]; +; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; +; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fneg(half %a) #0 { @@ -149,14 +149,14 @@ } ; CHECK-LABEL: test_fmul( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1]; -; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmul_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmul_param_1]; +; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fmul(half %a, half %b) #0 { @@ -165,15 +165,15 @@ } ; CHECK-LABEL: test_fdiv( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fdiv_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fdiv_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fdiv(half %a, half %b) #0 { @@ -182,8 +182,8 @@ } ; CHECK-LABEL: test_frem( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_frem_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_frem_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; @@ -198,7 +198,7 @@ ; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; ; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]]; ; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RESULT]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_frem(half %a, half %b) #0 { @@ -207,7 +207,7 @@ } ; CHECK-LABEL: test_store( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_store_param_0]; ; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1]; ; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]]; ; CHECK-NEXT: ret; @@ -218,7 +218,7 @@ ; CHECK-LABEL: test_load( ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; -; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]]; +; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_load(ptr %a) #0 { @@ -243,8 +243,8 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK-LABEL: test_call( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -254,7 +254,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -264,8 +264,8 @@ } ; CHECK-LABEL: test_call_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -275,7 +275,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -285,8 +285,8 @@ } ; CHECK-LABEL: test_tailcall_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -296,7 +296,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -306,10 +306,10 @@ } ; CHECK-LABEL: test_select( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1]; ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select(half %a, half %b, i1 zeroext %c) #0 { @@ -318,15 +318,15 @@ } ; CHECK-LABEL: test_select_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] -; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { @@ -338,8 +338,8 @@ ; CHECK-LABEL: test_select_cc_f32_f16( ; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0]; ; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; @@ -355,13 +355,13 @@ } ; CHECK-LABEL: test_select_cc_f16_f32( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2]; ; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3]; ; CHECK-NOFTZ-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1]; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { @@ -371,8 +371,8 @@ } ; CHECK-LABEL: test_fcmp_une( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_une_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -387,8 +387,8 @@ } ; CHECK-LABEL: test_fcmp_ueq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16-NOFTZ: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.equ.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -403,8 +403,8 @@ } ; CHECK-LABEL: test_fcmp_ugt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16-NOFTZ: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gtu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -419,8 +419,8 @@ } ; CHECK-LABEL: test_fcmp_uge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16-NOFTZ: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.geu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -435,8 +435,8 @@ } ; CHECK-LABEL: test_fcmp_ult( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ult_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16-NOFTZ: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ltu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -451,8 +451,8 @@ } ; CHECK-LABEL: test_fcmp_ule( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ule_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16-NOFTZ: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.leu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -468,8 +468,8 @@ ; CHECK-LABEL: test_fcmp_uno( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uno_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16-NOFTZ: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.nan.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -484,8 +484,8 @@ } ; CHECK-LABEL: test_fcmp_one( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_one_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16-NOFTZ: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ne.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -500,8 +500,8 @@ } ; CHECK-LABEL: test_fcmp_oeq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16-NOFTZ: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.eq.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -516,8 +516,8 @@ } ; CHECK-LABEL: test_fcmp_ogt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16-NOFTZ: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -532,8 +532,8 @@ } ; CHECK-LABEL: test_fcmp_oge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16-NOFTZ: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ge.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -548,8 +548,8 @@ } ; XCHECK-LABEL: test_fcmp_olt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_olt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -564,8 +564,8 @@ } ; XCHECK-LABEL: test_fcmp_ole( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ole_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16-NOFTZ: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.le.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -580,8 +580,8 @@ } ; CHECK-LABEL: test_fcmp_ord( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ord_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16-NOFTZ: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.num.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -596,8 +596,8 @@ } ; CHECK-LABEL: test_br_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_br_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_br_cc_param_1]; ; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2]; ; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] @@ -623,10 +623,10 @@ ; CHECK-LABEL: test_phi( ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; -; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]]; +; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]]; ; CHECK: [[LOOP:\$L__BB[0-9_]+]]: -; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]]; -; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]]; +; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; +; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { ; CHECK: st.param.b64 [param0+0], %[[P1]]; ; CHECK: call.uni (retval0), @@ -651,7 +651,7 @@ declare i1 @test_dummy(ptr %p1) #0 ; CHECK-LABEL: test_fptosi_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0]; ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -661,7 +661,7 @@ } ; CHECK-LABEL: test_fptosi_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0]; ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -671,7 +671,7 @@ } ; CHECK-LABEL: test_fptoui_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0]; ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -681,7 +681,7 @@ } ; CHECK-LABEL: test_fptoui_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0]; ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -692,7 +692,7 @@ ; CHECK-LABEL: test_uitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; -; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32(i32 %a) #0 { @@ -702,7 +702,7 @@ ; CHECK-LABEL: test_uitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; -; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i64(i64 %a) #0 { @@ -712,7 +712,7 @@ ; CHECK-LABEL: test_sitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; -; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32(i32 %a) #0 { @@ -722,7 +722,7 @@ ; CHECK-LABEL: test_sitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; -; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i64(i64 %a) #0 { @@ -732,14 +732,14 @@ ; CHECK-LABEL: test_uitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { @@ -750,14 +750,14 @@ ; CHECK-LABEL: test_sitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { @@ -768,7 +768,7 @@ ; CHECK-LABEL: test_fptrunc_float( ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_float(float %a) #0 { @@ -778,7 +778,7 @@ ; CHECK-LABEL: test_fptrunc_double( ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; -; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_double(double %a) #0 { @@ -787,7 +787,7 @@ } ; CHECK-LABEL: test_fpext_float( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK: st.param.f32 [func_retval0+0], [[R]]; @@ -798,7 +798,7 @@ } ; CHECK-LABEL: test_fpext_double( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0]; ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; ; CHECK: st.param.f64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -809,7 +809,7 @@ ; CHECK-LABEL: test_bitcast_halftoi16( -; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0]; +; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0]; ; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]] ; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]] ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -821,7 +821,7 @@ ; CHECK-LABEL: test_bitcast_i16tohalf( ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; -; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]] +; CHECK: mov.b16 [[AH:%rs[0-9]+]], [[AS]] ; CHECK: st.param.b16 [func_retval0+0], [[AH]]; ; CHECK: ret; define half @test_bitcast_i16tohalf(i16 %a) #0 { @@ -855,12 +855,12 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 ; CHECK-LABEL: test_sqrt( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sqrt_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sqrt(half %a) #0 { @@ -876,11 +876,11 @@ ;} ; CHECK-LABEL: test_sin( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sin(half %a) #0 #1 { @@ -889,11 +889,11 @@ } ; CHECK-LABEL: test_cos( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_cos(half %a) #0 #1 { @@ -944,16 +944,16 @@ ;} ; CHECK-LABEL: test_fma( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fma_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fma_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret define half @test_fma(half %a, half %b, half %c) #0 { @@ -962,12 +962,12 @@ } ; CHECK-LABEL: test_fabs( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fabs_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: abs.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fabs(half %a) #0 { @@ -976,15 +976,15 @@ } ; CHECK-LABEL: test_minnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_minnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_minnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_minnum(half %a, half %b) #0 { @@ -993,15 +993,15 @@ } ; CHECK-LABEL: test_maxnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_maxnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_maxnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_maxnum(half %a, half %b) #0 { @@ -1010,14 +1010,14 @@ } ; CHECK-LABEL: test_copysign( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_param_1]; ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign(half %a, half %b) #0 { @@ -1026,7 +1026,7 @@ } ; CHECK-LABEL: test_copysign_f32( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1]; ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]]; @@ -1034,7 +1034,7 @@ ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; ; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign_f32(half %a, float %b) #0 { @@ -1044,7 +1044,7 @@ } ; CHECK-LABEL: test_copysign_f64( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1]; ; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]]; @@ -1053,7 +1053,7 @@ ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_copysign_f64(half %a, double %b) #0 { @@ -1063,14 +1063,14 @@ } ; CHECK-LABEL: test_copysign_extended( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_extended_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_extended_param_1]; ; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; ; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; ; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; +; CHECK: mov.b16 [[R:%rs[0-9]+]], [[RX]]; ; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[R]]; ; CHECK: st.param.f32 [func_retval0+0], [[XR]]; @@ -1082,8 +1082,8 @@ } ; CHECK-LABEL: test_floor( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0]; -; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0]; +; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_floor(half %a) #0 { @@ -1092,8 +1092,8 @@ } ; CHECK-LABEL: test_ceil( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0]; -; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0]; +; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_ceil(half %a) #0 { @@ -1102,8 +1102,8 @@ } ; CHECK-LABEL: test_trunc( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0]; -; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0]; +; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_trunc(half %a) #0 { @@ -1112,8 +1112,8 @@ } ; CHECK-LABEL: test_rint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_rint(half %a) #0 { @@ -1122,8 +1122,8 @@ } ; CHECK-LABEL: test_nearbyint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_nearbyint(half %a) #0 { @@ -1132,8 +1132,8 @@ } ; CHECK-LABEL: test_roundeven( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_roundeven_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_roundeven(half %a) #0 { @@ -1154,16 +1154,16 @@ } ; CHECK-LABEL: test_fmuladd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmuladd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmuladd_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fmuladd(half %a, half %b, half %c) #0 { diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -41,7 +41,7 @@ ; CHECK-LABEL: test_extract_0( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; -; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; +; CHECK: mov.b32 {[[R:%rs[0-9]+]], %tmp_hi}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_0(<2 x half> %a) #0 { @@ -51,7 +51,7 @@ ; CHECK-LABEL: test_extract_1( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; -; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; +; CHECK: mov.b32 {%tmp_lo, [[R:%rs[0-9]+]]}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_1(<2 x half> %a) #0 { @@ -63,8 +63,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; -; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { @@ -78,16 +78,16 @@ ; ; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -105,13 +105,13 @@ ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -128,13 +128,13 @@ ; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; ; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -150,16 +150,16 @@ ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -176,14 +176,14 @@ ; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; ; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -198,16 +198,16 @@ ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; ; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -220,16 +220,16 @@ ; CHECK-LABEL: test_fdiv( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]; ; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -243,8 +243,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; ; -- Split into elements -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; -- promote to f32. ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; @@ -265,8 +265,8 @@ ; CHECK-DAG: testp.infinite.f32 [[ISB1INF:%p[0-9]+]], [[FB1]]; ; CHECK-DAG: selp.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RFNINF1]], [[ISB1INF]]; ; -- convert back to f16. -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; -- merge into f16x2 and return it. ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; @@ -280,7 +280,7 @@ ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; ; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] -; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; +; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]]; ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; ; CHECK: ret; define void @test_ldst_v2f16(ptr %a, ptr %b) { @@ -309,7 +309,7 @@ ; CHECK-LABEL: .func test_ldst_v4f16( ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; -; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; +; CHECK-DAG: ld.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]]; ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: ret; define void @test_ldst_v4f16(ptr %a, ptr %b) { @@ -416,8 +416,8 @@ ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] ; -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -425,10 +425,10 @@ ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] ; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -445,8 +445,8 @@ ; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -472,10 +472,10 @@ ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -490,8 +490,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -512,8 +512,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -534,8 +534,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -556,8 +556,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -578,8 +578,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -600,8 +600,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -623,8 +623,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -645,8 +645,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -667,8 +667,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -689,8 +689,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -711,8 +711,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -733,8 +733,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -755,8 +755,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -777,8 +777,8 @@ ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -797,7 +797,7 @@ ; CHECK-LABEL: test_fptosi_i32( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -809,7 +809,7 @@ ; CHECK-LABEL: test_fptosi_i64( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -821,7 +821,7 @@ ; CHECK-LABEL: test_fptoui_2xi32( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -833,7 +833,7 @@ ; CHECK-LABEL: test_fptoui_2xi64( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -845,8 +845,8 @@ ; CHECK-LABEL: test_uitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.u32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -857,8 +857,8 @@ ; CHECK-LABEL: test_uitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.u64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -869,8 +869,8 @@ ; CHECK-LABEL: test_sitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.s32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -881,8 +881,8 @@ ; CHECK-LABEL: test_sitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.s64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -894,21 +894,21 @@ ; CHECK-LABEL: test_uitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C1:%rs[0-9]+]], [[A1]]; ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -922,21 +922,21 @@ ; CHECK-LABEL: test_sitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C1:%rs[0-9]+]], [[A1]]; ; ; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} ; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -949,8 +949,8 @@ ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -961,8 +961,8 @@ ; CHECK-LABEL: test_fptrunc_2xdouble( ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; -; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[A1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -973,7 +973,7 @@ ; CHECK-LABEL: test_fpext_2xfloat( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; @@ -985,7 +985,7 @@ ; CHECK-LABEL: test_fpext_2xdouble( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; @@ -1067,13 +1067,13 @@ ; CHECK-LABEL: test_sqrt( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1091,13 +1091,13 @@ ; CHECK-LABEL: test_sin( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1108,13 +1108,13 @@ ; CHECK-LABEL: test_cos( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1172,9 +1172,9 @@ ; ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1183,8 +1183,8 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -1196,13 +1196,13 @@ ; CHECK-LABEL: test_fabs( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1214,16 +1214,16 @@ ; CHECK-LABEL: test_minnum( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1235,16 +1235,16 @@ ; CHECK-LABEL: test_maxnum( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1256,8 +1256,8 @@ ; CHECK-LABEL: test_copysign( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; @@ -1268,8 +1268,8 @@ ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1281,7 +1281,7 @@ ; CHECK-LABEL: test_copysign_f32( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; @@ -1294,8 +1294,8 @@ ; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; } ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1308,7 +1308,7 @@ ; CHECK-LABEL: test_copysign_f64( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; @@ -1323,8 +1323,8 @@ ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1337,8 +1337,8 @@ ; CHECK-LABEL: test_copysign_extended( ; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; ; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; ; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; ; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; @@ -1349,10 +1349,10 @@ ; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; ; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; ; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; +; CHECK-DAG: mov.b16 [[R0:%rs[0-9]+]], [[RS0]]; +; CHECK-DAG: mov.b16 [[R1:%rs[0-9]+]], [[RS1]]; ; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] +; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; @@ -1365,9 +1365,9 @@ ; CHECK-LABEL: test_floor( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1378,9 +1378,9 @@ ; CHECK-LABEL: test_ceil( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1391,9 +1391,9 @@ ; CHECK-LABEL: test_trunc( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1404,9 +1404,9 @@ ; CHECK-LABEL: test_rint( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1417,9 +1417,9 @@ ; CHECK-LABEL: test_nearbyint( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1430,9 +1430,9 @@ ; CHECK-LABEL: test_roundeven( ; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_roundeven_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; ; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1462,9 +1462,9 @@ ; ; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1473,8 +1473,8 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] ; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; @@ -1485,16 +1485,16 @@ } ; CHECK-LABEL: test_shufflevector( -; CHECK: mov.b32 {%h1, %h2}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs1, %rs2}, %hh1; +; CHECK: mov.b32 %hh2, {%rs2, %rs1}; define <2 x half> @test_shufflevector(<2 x half> %a) #0 { %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> ret <2 x half> %s } ; CHECK-LABEL: test_insertelement( -; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs2, %tmp_hi}, %hh1; +; CHECK: mov.b32 %hh2, {%rs2, %rs1}; define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { %i = insertelement <2 x half> %a, half %x, i64 1 ret <2 x half> %i diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll --- a/llvm/test/CodeGen/NVPTX/half.ll +++ b/llvm/test/CodeGen/NVPTX/half.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in store half %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_half -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in %val_int = bitcast half %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py --- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -28,7 +28,7 @@ "i16": "r", "i32": "r", "i64": "rd", - "half": "h", + "half": "rs", "<2 x half>": "hh", "float": "f", "double": "fd", diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -384,13 +384,13 @@ ; CHECK: .func (.param .b32 func_retval0) ; CHECK-LABEL: test_f16( ; CHECK-NEXT: .param .b32 test_f16_param_0 -; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; +; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .b32 param0; ; CHECK: st.param.b16 [param0+0], [[E]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]] ; CHECK-NEXT: ret; define half @test_f16(half %a) { @@ -419,16 +419,16 @@ ; CHECK-LABEL: test_v3f16( ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] ; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; +; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; @@ -459,17 +459,17 @@ ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; @@ -502,9 +502,9 @@ ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) ; CHECK-LABEL: test_v9f16( ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; -; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; -; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; +; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; +; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.v4.b16 [param0+8], @@ -512,9 +512,9 @@ ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK: test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; -; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; +; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; @@ -915,13 +915,13 @@ ; CHECK: .func (.param .align 2 .b8 func_retval0[2]) ; CHECK-LABEL: test_s_f16( ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0+0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni ; CHECK-NEXT: test_s_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define %s_f16 @test_s_f16(%s_f16 %a) { diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -92,10 +92,10 @@ define half @check_f16() { ; PTX-LABEL: check_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0+0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%rs[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]]; ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]];