diff --git a/clang/test/CodeGenCUDA/bf16.cu b/clang/test/CodeGenCUDA/bf16.cu --- a/clang/test/CodeGenCUDA/bf16.cu +++ b/clang/test/CodeGenCUDA/bf16.cu @@ -2,7 +2,7 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "nvptx64-nvidia-cuda" \ -// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -S -o - %s | FileCheck %s +// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -O1 -S -o - %s | FileCheck %s #include "Inputs/cuda.h" @@ -11,10 +11,11 @@ // CHECK: .param .b16 _Z8test_argPDF16bDF16b_param_1 // __device__ void test_arg(__bf16 *out, __bf16 in) { -// CHECK: ld.param.b16 %{{h.*}}, [_Z8test_argPDF16bDF16b_param_1]; +// CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [_Z8test_argPDF16bDF16b_param_0]; +// CHECK-DAG: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_argPDF16bDF16b_param_1]; __bf16 bf16 = in; *out = bf16; -// CHECK: st.b16 +// CHECK: st.b16 [%[[A]]], %[[R]] // CHECK: ret; } @@ -22,25 +23,27 @@ // CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z8test_retDF16b( // CHECK: .param .b16 _Z8test_retDF16b_param_0 __device__ __bf16 test_ret( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z8test_retDF16b_param_0]; +// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z8test_retDF16b_param_0]; return in; -// CHECK: st.param.b16 [func_retval0+0], %h +// CHECK: st.param.b16 [func_retval0+0], %[[R]] // CHECK: ret; } +__device__ __bf16 external_func( __bf16 in); + // CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z9test_callDF16b( // CHECK: .param .b16 _Z9test_callDF16b_param_0 __device__ __bf16 test_call( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z9test_callDF16b_param_0]; -// CHECK: st.param.b16 [param0+0], %h2; +// CHECK: ld.param.b16 %[[R:rs[0-9]+]], [_Z9test_callDF16b_param_0]; +// CHECK: st.param.b16 [param0+0], %[[R]]; // CHECK: .param .b32 retval0; // CHECK: call.uni (retval0), -// CHECK-NEXT: _Z8test_retDF16b, +// CHECK-NEXT: _Z13external_funcDF16b, // CHECK-NEXT: ( // CHECK-NEXT: param0 // CHECK-NEXT ); -// CHECK: ld.param.b16 %h{{.*}}, [retval0+0]; - return test_ret(in); -// CHECK: st.param.b16 [func_retval0+0], %h +// CHECK: ld.param.b16 %[[RET:rs[0-9]+]], [retval0+0]; + return external_func(in); +// CHECK: st.param.b16 [func_retval0+0], %[[RET]] // CHECK: ret; } diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -309,10 +309,6 @@ Ret = (5 << 28); } else if (RC == &NVPTX::Float64RegsRegClass) { Ret = (6 << 28); - } else if (RC == &NVPTX::Float16RegsRegClass) { - Ret = (7 << 28); - } else if (RC == &NVPTX::Float16x2RegsRegClass) { - Ret = (8 << 28); } else { report_fatal_error("Bad register class"); } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -816,8 +816,7 @@ static std::optional pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16, unsigned Opcode_i32, - std::optional Opcode_i64, unsigned Opcode_f16, - unsigned Opcode_f16x2, unsigned Opcode_f32, + std::optional Opcode_i64, unsigned Opcode_f32, std::optional Opcode_f64) { switch (VT) { case MVT::i1: @@ -831,10 +830,10 @@ return Opcode_i64; case MVT::f16: case MVT::bf16: - return Opcode_f16; + return Opcode_i16; case MVT::v2f16: case MVT::v2bf16: - return Opcode_f16x2; + return Opcode_i32; case MVT::f32: return Opcode_f32; case MVT::f64: @@ -935,10 +934,9 @@ MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy; if (SelectDirectAddr(N1, Addr)) { - Opcode = pickOpcodeForVT( - TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar, - NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar, - NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); + Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, + NVPTX::LD_i32_avar, NVPTX::LD_i64_avar, + NVPTX::LD_f32_avar, NVPTX::LD_f64_avar); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), @@ -948,9 +946,8 @@ } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset) : SelectADDRsi(N1.getNode(), N1, Base, Offset)) { Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi, - NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, - NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi, - NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); + NVPTX::LD_i32_asi, NVPTX::LD_i64_asi, + NVPTX::LD_f32_asi, NVPTX::LD_f64_asi); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), @@ -960,15 +957,14 @@ } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset) : SelectADDRri(N1.getNode(), N1, Base, Offset)) { if (PointerSize == 64) - Opcode = pickOpcodeForVT( - TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, - NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64, - NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); + Opcode = + pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64, + NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, + NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64); else - Opcode = pickOpcodeForVT( - TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari, - NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari, - NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); + Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, + NVPTX::LD_i32_ari, NVPTX::LD_i64_ari, + NVPTX::LD_f32_ari, NVPTX::LD_f64_ari); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), @@ -977,16 +973,14 @@ NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops); } else { if (PointerSize == 64) - Opcode = pickOpcodeForVT( - TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, - NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64, - NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64, - NVPTX::LD_f64_areg_64); + Opcode = + pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64, + NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, + NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64); else - Opcode = pickOpcodeForVT( - TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg, - NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg, - NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); + Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, + NVPTX::LD_i32_areg, NVPTX::LD_i64_areg, + NVPTX::LD_f32_areg, NVPTX::LD_f64_areg); if (!Opcode) return false; SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl), @@ -1090,15 +1084,13 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar, NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar, - NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar, NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar); break; case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar, - NVPTX::LDV_i32_v4_avar, std::nullopt, - NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar, - NVPTX::LDV_f32_v4_avar, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar, + NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, + std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt); break; } if (!Opcode) @@ -1117,15 +1109,13 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi, NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi, - NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi, NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi); break; case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi, - NVPTX::LDV_i32_v4_asi, std::nullopt, - NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi, - NVPTX::LDV_f32_v4_asi, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi, + NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, + std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt); break; } if (!Opcode) @@ -1142,18 +1132,16 @@ default: return false; case NVPTXISD::LoadV2: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64, - NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64, - NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64, - NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64, - NVPTX::LDV_f64_v2_ari_64); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64, + NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64, + NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64, NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt, - NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64, NVPTX::LDV_f32_v4_ari_64, std::nullopt); break; } @@ -1165,15 +1153,13 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari, NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari, - NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari, NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari); break; case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari, - NVPTX::LDV_i32_v4_ari, std::nullopt, - NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari, - NVPTX::LDV_f32_v4_ari, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari, + NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, + std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt); break; } } @@ -1193,15 +1179,13 @@ Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64, NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64, - NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64, - NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, + NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64, NVPTX::LDV_f64_v2_areg_64); break; case NVPTXISD::LoadV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64, NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt, - NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64, NVPTX::LDV_f32_v4_areg_64, std::nullopt); break; } @@ -1213,16 +1197,14 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg, NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg, - NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg, - NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg, + NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg, NVPTX::LDV_f64_v2_areg); break; case NVPTXISD::LoadV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, - NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt, - NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg, - NVPTX::LDV_f32_v4_areg, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg, + NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, + std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt); break; } } @@ -1310,47 +1292,39 @@ case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8avar, - NVPTX::INT_PTX_LDG_GLOBAL_i16avar, - NVPTX::INT_PTX_LDG_GLOBAL_i32avar, - NVPTX::INT_PTX_LDG_GLOBAL_i64avar, - NVPTX::INT_PTX_LDG_GLOBAL_f16avar, - NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar, - NVPTX::INT_PTX_LDG_GLOBAL_f32avar, - NVPTX::INT_PTX_LDG_GLOBAL_f64avar); + NVPTX::INT_PTX_LDG_GLOBAL_i8avar, + NVPTX::INT_PTX_LDG_GLOBAL_i16avar, + NVPTX::INT_PTX_LDG_GLOBAL_i32avar, + NVPTX::INT_PTX_LDG_GLOBAL_i64avar, + NVPTX::INT_PTX_LDG_GLOBAL_f32avar, + NVPTX::INT_PTX_LDG_GLOBAL_f64avar); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8avar, - NVPTX::INT_PTX_LDU_GLOBAL_i16avar, - NVPTX::INT_PTX_LDU_GLOBAL_i32avar, - NVPTX::INT_PTX_LDU_GLOBAL_i64avar, - NVPTX::INT_PTX_LDU_GLOBAL_f16avar, - NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar, - NVPTX::INT_PTX_LDU_GLOBAL_f32avar, - NVPTX::INT_PTX_LDU_GLOBAL_f64avar); + NVPTX::INT_PTX_LDU_GLOBAL_i8avar, + NVPTX::INT_PTX_LDU_GLOBAL_i16avar, + NVPTX::INT_PTX_LDU_GLOBAL_i32avar, + NVPTX::INT_PTX_LDU_GLOBAL_i64avar, + NVPTX::INT_PTX_LDU_GLOBAL_f32avar, + NVPTX::INT_PTX_LDU_GLOBAL_f64avar); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); + NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar, + NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar, + NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar, + NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar, + NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar, + NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); + NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar, + NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar, + NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar, + NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar, + NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar, + NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: @@ -1358,8 +1332,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar, NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar, NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar, NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt); break; case NVPTXISD::LDUV4: @@ -1367,8 +1339,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar, NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar, NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar, NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt); break; } @@ -1390,8 +1360,6 @@ NVPTX::INT_PTX_LDG_GLOBAL_i16ari64, NVPTX::INT_PTX_LDG_GLOBAL_i32ari64, NVPTX::INT_PTX_LDG_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f16ari64, - NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64, NVPTX::INT_PTX_LDG_GLOBAL_f32ari64, NVPTX::INT_PTX_LDG_GLOBAL_f64ari64); else @@ -1400,8 +1368,6 @@ NVPTX::INT_PTX_LDU_GLOBAL_i16ari64, NVPTX::INT_PTX_LDU_GLOBAL_i32ari64, NVPTX::INT_PTX_LDU_GLOBAL_i64ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f16ari64, - NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64, NVPTX::INT_PTX_LDU_GLOBAL_f32ari64, NVPTX::INT_PTX_LDU_GLOBAL_f64ari64); break; @@ -1412,8 +1378,6 @@ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64, NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64); break; @@ -1423,8 +1387,6 @@ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64, NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64); break; @@ -1434,8 +1396,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64, NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt); break; case NVPTXISD::LDUV4: @@ -1443,8 +1403,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64, NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt); break; } @@ -1456,47 +1414,39 @@ case ISD::INTRINSIC_W_CHAIN: if (IsLDG) Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_GLOBAL_i8ari, - NVPTX::INT_PTX_LDG_GLOBAL_i16ari, - NVPTX::INT_PTX_LDG_GLOBAL_i32ari, - NVPTX::INT_PTX_LDG_GLOBAL_i64ari, - NVPTX::INT_PTX_LDG_GLOBAL_f16ari, - NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari, - NVPTX::INT_PTX_LDG_GLOBAL_f32ari, - NVPTX::INT_PTX_LDG_GLOBAL_f64ari); + NVPTX::INT_PTX_LDG_GLOBAL_i8ari, + NVPTX::INT_PTX_LDG_GLOBAL_i16ari, + NVPTX::INT_PTX_LDG_GLOBAL_i32ari, + NVPTX::INT_PTX_LDG_GLOBAL_i64ari, + NVPTX::INT_PTX_LDG_GLOBAL_f32ari, + NVPTX::INT_PTX_LDG_GLOBAL_f64ari); else Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_GLOBAL_i8ari, - NVPTX::INT_PTX_LDU_GLOBAL_i16ari, - NVPTX::INT_PTX_LDU_GLOBAL_i32ari, - NVPTX::INT_PTX_LDU_GLOBAL_i64ari, - NVPTX::INT_PTX_LDU_GLOBAL_f16ari, - NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari, - NVPTX::INT_PTX_LDU_GLOBAL_f32ari, - NVPTX::INT_PTX_LDU_GLOBAL_f64ari); + NVPTX::INT_PTX_LDU_GLOBAL_i8ari, + NVPTX::INT_PTX_LDU_GLOBAL_i16ari, + NVPTX::INT_PTX_LDU_GLOBAL_i32ari, + NVPTX::INT_PTX_LDU_GLOBAL_i64ari, + NVPTX::INT_PTX_LDU_GLOBAL_f32ari, + NVPTX::INT_PTX_LDU_GLOBAL_f64ari); break; case NVPTXISD::LoadV2: case NVPTXISD::LDGV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); + NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32, + NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32); break; case NVPTXISD::LDUV2: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); + NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32, + NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32); break; case NVPTXISD::LoadV4: case NVPTXISD::LDGV4: @@ -1504,8 +1454,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32, NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt); break; case NVPTXISD::LDUV4: @@ -1513,8 +1461,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32, NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt); break; } @@ -1536,8 +1482,6 @@ NVPTX::INT_PTX_LDG_GLOBAL_i16areg64, NVPTX::INT_PTX_LDG_GLOBAL_i32areg64, NVPTX::INT_PTX_LDG_GLOBAL_i64areg64, - NVPTX::INT_PTX_LDG_GLOBAL_f16areg64, - NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64, NVPTX::INT_PTX_LDG_GLOBAL_f32areg64, NVPTX::INT_PTX_LDG_GLOBAL_f64areg64); else @@ -1546,8 +1490,6 @@ NVPTX::INT_PTX_LDU_GLOBAL_i16areg64, NVPTX::INT_PTX_LDU_GLOBAL_i32areg64, NVPTX::INT_PTX_LDU_GLOBAL_i64areg64, - NVPTX::INT_PTX_LDU_GLOBAL_f16areg64, - NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64, NVPTX::INT_PTX_LDU_GLOBAL_f32areg64, NVPTX::INT_PTX_LDU_GLOBAL_f64areg64); break; @@ -1558,8 +1500,6 @@ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64, NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64); break; @@ -1569,8 +1509,6 @@ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64, NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64); break; @@ -1580,8 +1518,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64, NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt); break; case NVPTXISD::LDUV4: @@ -1589,8 +1525,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64, NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt); break; } @@ -1606,8 +1540,6 @@ NVPTX::INT_PTX_LDG_GLOBAL_i16areg, NVPTX::INT_PTX_LDG_GLOBAL_i32areg, NVPTX::INT_PTX_LDG_GLOBAL_i64areg, - NVPTX::INT_PTX_LDG_GLOBAL_f16areg, - NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg, NVPTX::INT_PTX_LDG_GLOBAL_f32areg, NVPTX::INT_PTX_LDG_GLOBAL_f64areg); else @@ -1616,8 +1548,6 @@ NVPTX::INT_PTX_LDU_GLOBAL_i16areg, NVPTX::INT_PTX_LDU_GLOBAL_i32areg, NVPTX::INT_PTX_LDU_GLOBAL_i64areg, - NVPTX::INT_PTX_LDU_GLOBAL_f16areg, - NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg, NVPTX::INT_PTX_LDU_GLOBAL_f32areg, NVPTX::INT_PTX_LDU_GLOBAL_f64areg); break; @@ -1628,8 +1558,6 @@ NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32, NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32); break; @@ -1639,8 +1567,6 @@ NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32, NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32); break; @@ -1650,8 +1576,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt, - NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32, - NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32, NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt); break; case NVPTXISD::LDUV4: @@ -1659,8 +1583,6 @@ EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt, - NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32, - NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32, NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt); break; } @@ -1781,7 +1703,6 @@ if (SelectDirectAddr(BasePtr, Addr)) { Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar, NVPTX::ST_i32_avar, NVPTX::ST_i64_avar, - NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar, NVPTX::ST_f32_avar, NVPTX::ST_f64_avar); if (!Opcode) return false; @@ -1799,7 +1720,6 @@ : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) { Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi, NVPTX::ST_i32_asi, NVPTX::ST_i64_asi, - NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi, NVPTX::ST_f32_asi, NVPTX::ST_f64_asi); if (!Opcode) return false; @@ -1817,14 +1737,13 @@ ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset) : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) { if (PointerSize == 64) - Opcode = pickOpcodeForVT( - SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, - NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64, - NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); + Opcode = + pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64, + NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, + NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64); else Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari, NVPTX::ST_i32_ari, NVPTX::ST_i64_ari, - NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari, NVPTX::ST_f32_ari, NVPTX::ST_f64_ari); if (!Opcode) return false; @@ -1844,12 +1763,10 @@ Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64, NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64, - NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64, NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64); else Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg, NVPTX::ST_i32_areg, NVPTX::ST_i64_areg, - NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg, NVPTX::ST_f32_areg, NVPTX::ST_f64_areg); if (!Opcode) return false; @@ -1955,14 +1872,12 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar, NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar, - NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar, NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, std::nullopt, - NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar, NVPTX::STV_f32_v4_avar, std::nullopt); break; } @@ -1976,15 +1891,13 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi, NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi, - NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi, NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi); break; case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, - NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi, - NVPTX::STV_i32_v4_asi, std::nullopt, - NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi, - NVPTX::STV_f32_v4_asi, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi, + NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, + std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt); break; } StOps.push_back(Base); @@ -1996,18 +1909,16 @@ default: return false; case NVPTXISD::StoreV2: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64, - NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64, - NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64, - NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64, - NVPTX::STV_f64_v2_ari_64); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, + NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64, + NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64, + NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64, NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt, - NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64, NVPTX::STV_f32_v4_ari_64, std::nullopt); break; } @@ -2019,14 +1930,12 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari, NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari, - NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari, NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, std::nullopt, - NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari, NVPTX::STV_f32_v4_ari, std::nullopt); break; } @@ -2042,15 +1951,13 @@ Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64, NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64, - NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64, - NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64, + NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64, NVPTX::STV_f64_v2_areg_64); break; case NVPTXISD::StoreV4: Opcode = pickOpcodeForVT( EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64, NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt, - NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64, NVPTX::STV_f32_v4_areg_64, std::nullopt); break; } @@ -2062,16 +1969,14 @@ Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg, NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg, - NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg, - NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg, + NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg, NVPTX::STV_f64_v2_areg); break; case NVPTXISD::StoreV4: - Opcode = pickOpcodeForVT( - EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, - NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt, - NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg, - NVPTX::STV_f32_v4_areg, std::nullopt); + Opcode = + pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg, + NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, + std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt); break; } } @@ -2126,23 +2031,20 @@ Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16, NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64, - NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2, NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64); break; case 2: Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8, NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32, - NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16, - NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32, + NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32, NVPTX::LoadParamMemV2F64); break; case 4: - Opcode = pickOpcodeForVT( - MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, - NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt, - NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2, - NVPTX::LoadParamMemV4F32, std::nullopt); + Opcode = + pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8, + NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, + std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt); break; } if (!Opcode) @@ -2210,21 +2112,18 @@ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16, NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64, - NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2, NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64); break; case 2: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16, NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64, - NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2, NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64); break; case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16, NVPTX::StoreRetvalV4I32, std::nullopt, - NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2, NVPTX::StoreRetvalV4F32, std::nullopt); break; } @@ -2289,21 +2188,18 @@ Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamI8, NVPTX::StoreParamI16, NVPTX::StoreParamI32, NVPTX::StoreParamI64, - NVPTX::StoreParamF16, NVPTX::StoreParamF16x2, NVPTX::StoreParamF32, NVPTX::StoreParamF64); break; case 2: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16, NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64, - NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2, NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64); break; case 4: Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy, NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16, NVPTX::StoreParamV4I32, std::nullopt, - NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2, NVPTX::StoreParamV4F32, std::nullopt); break; } diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -410,10 +410,10 @@ addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass); addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass); addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); - addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass); - addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass); - addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass); - addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass); + addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass); + addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass); + addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass); // Conversion to/from FP16/FP16x2 is always legal. setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -51,11 +51,6 @@ } else if (DestRC == &NVPTX::Int64RegsRegClass) { Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr : NVPTX::BITCONVERT_64_F2I); - } else if (DestRC == &NVPTX::Float16RegsRegClass) { - Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr - : NVPTX::BITCONVERT_16_I2F); - } else if (DestRC == &NVPTX::Float16x2RegsRegClass) { - Op = NVPTX::IMOV32rr; } else if (DestRC == &NVPTX::Float32RegsRegClass) { Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr : NVPTX::BITCONVERT_32_I2F); diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,10 +164,10 @@ !eq(name, "i16"): Int16Regs, !eq(name, "i32"): Int32Regs, !eq(name, "i64"): Int64Regs, - !eq(name, "f16"): Float16Regs, - !eq(name, "v2f16"): Float16x2Regs, - !eq(name, "bf16"): Float16Regs, - !eq(name, "v2bf16"): Float16x2Regs, + !eq(name, "f16"): Int16Regs, + !eq(name, "v2f16"): Int32Regs, + !eq(name, "bf16"): Int16Regs, + !eq(name, "v2bf16"): Int32Regs, !eq(name, "f32"): Float32Regs, !eq(name, "f64"): Float64Regs, !eq(name, "ai32"): Int32ArgRegs, @@ -280,29 +280,29 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math]>; def f16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, doF32FTZ]>; def f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math]>; } @@ -354,29 +354,29 @@ Requires<[allowFMA]>; def f16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, allowFMA]>; def f16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"), - [(set (v2f16 Float16x2Regs:$dst), (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, allowFMA, doF32FTZ]>; def f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, allowFMA]>; // These have strange names so we don't perturb existing mir tests. @@ -417,28 +417,28 @@ [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>, Requires<[noFMA]>; def _rnf16rr_ftz : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16rr : - NVPTXInst<(outs Float16Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + NVPTXInst<(outs Int16Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), !strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"), - [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>, + [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>, Requires<[useFP16Math, noFMA]>; def _rnf16x2rr_ftz : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, noFMA, doF32FTZ]>; def _rnf16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b), !strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"), - [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>, + [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>, Requires<[useFP16Math, noFMA]>; } @@ -513,7 +513,7 @@ FromName, ".u64 \t$dst, $src;"), []>; def _f16 : NVPTXInst<(outs RC:$dst), - (ins Float16Regs:$src, CvtMode:$mode), + (ins Int16Regs:$src, CvtMode:$mode), !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.", FromName, ".f16 \t$dst, $src;"), []>; def _f32 : @@ -537,7 +537,7 @@ defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>; defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>; defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>; - defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>; + defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>; defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>; defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>; @@ -576,7 +576,7 @@ Requires<[hasPTX<70>, hasSM<80>]>; } - defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Float16x2Regs>; + defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>; defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>; } @@ -640,21 +640,21 @@ defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>; defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>; defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>; -defm SELP_f16 : SELP_PATTERN<"b16", f16, Float16Regs, f16imm, fpimm>; +defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>; defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>; defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>; // This does not work as tablegen fails to infer the type of 'imm'. // def v2f16imm : Operand; -// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Float16x2Regs, v2f16imm, imm>; +// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>; def SELP_f16x2rr : - NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p), + NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p), "selp.b32 \t$dst, $a, $b, $p;", - [(set Float16x2Regs:$dst, - (select Int1Regs:$p, (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>; + [(set Int32Regs:$dst, + (select Int1Regs:$p, (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>; //----------------------------------- // Test Instructions @@ -783,26 +783,26 @@ def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; // Matchers for signed, unsigned mul.wide ISD nodes. -def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, +def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)), + (MULWIDES32 i16:$a, i16:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), (MULWIDES32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), +def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)), (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), +def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), (MULWIDES64Imm Int32Regs:$a, imm:$b)>, Requires<[doMulWide]>; -def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), +def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)), (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), @@ -1003,7 +1003,7 @@ // fp16 immediate values in .f16 instructions. Instead we have to load // the constant into a register using mov.b16. def LOAD_CONST_F16 : - NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a), + NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a), "mov.b16 \t$dst, $a;", []>; defm FADD : F3_fma_component<"add", fadd>; @@ -1028,10 +1028,10 @@ !strconcat(OpcStr, " \t$dst, $src;"), [(set RC:$dst, (fneg (T RC:$src)))]>, Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>; -def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Float16Regs, doF32FTZ>; -def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Float16Regs, True>; -def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; -def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Float16x2Regs, True>; +def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>; +def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>; +def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; +def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>; // // F64 division @@ -1211,10 +1211,10 @@ Requires<[useFP16Math, Pred]>; } -defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Float16Regs, doF32FTZ>; -defm FMA16 : FMA_F16<"fma.rn.f16", f16, Float16Regs, True>; -defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>; -defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Float16x2Regs, True>; +defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>; +defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>; +defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>; +defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>; defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>; defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>; defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>; @@ -1651,13 +1651,13 @@ defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>; def SETP_f16rr : NVPTXInst<(outs Int1Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp), + (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;", []>, Requires<[useFP16Math]>; def SETP_f16x2rr : NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q), - (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp), + (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp), "setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;", []>, Requires<[useFP16Math]>; @@ -1690,7 +1690,7 @@ defm SET_b64 : SET<"b64", Int64Regs, i64imm>; defm SET_s64 : SET<"s64", Int64Regs, i64imm>; defm SET_u64 : SET<"u64", Int64Regs, i64imm>; -defm SET_f16 : SET<"f16", Float16Regs, f16imm>; +defm SET_f16 : SET<"f16", Int16Regs, f16imm>; defm SET_f32 : SET<"f32", Float32Regs, f32imm>; defm SET_f64 : SET<"f64", Float64Regs, f64imm>; @@ -1760,7 +1760,14 @@ def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), "mov.u64 \t$dst, $sss;", []>; - def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src), + def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss), + "mov.b16 \t$dst, $sss;", []>; + def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss), + "mov.b32 \t$dst, $sss;", []>; + def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss), + "mov.b64 \t$dst, $sss;", []>; + + def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), // We have to use .b16 here as there's no mov.f16. "mov.b16 \t$dst, $src;", []>; def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), @@ -1782,6 +1789,13 @@ "mov.u64 \t$dst, $src;", [(set Int64Regs:$dst, imm:$src)]>; +def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src), + "mov.b16 \t$dst, $src;", []>; +def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), + "mov.b32 \t$dst, $src;", []>; +def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src), + "mov.b64 \t$dst, $src;", []>; + def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src), "mov.f32 \t$dst, $src;", [(set Float32Regs:$dst, fpimm:$src)]>; @@ -1824,14 +1838,14 @@ Instruction set_64ri, Instruction set_64ir> { // i16 -> pred - def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i1 (OpNode i16:$a, i16:$b)), (setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)), (setp_16ri Int16Regs:$a, imm:$b, Mode)>; def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)), (setp_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> pred - def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)), + def : Pat<(i1 (OpNode i32:$a, i32:$b)), (setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)), (setp_32ri Int32Regs:$a, imm:$b, Mode)>; @@ -1846,14 +1860,14 @@ (setp_64ir imm:$a, Int64Regs:$b, Mode)>; // i16 -> i32 - def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)), + def : Pat<(i32 (OpNode i16:$a, i16:$b)), (set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>; def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)), (set_16ri Int16Regs:$a, imm:$b, Mode)>; def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)), (set_16ir imm:$a, Int16Regs:$b, Mode)>; // i32 -> i32 - def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)), + def : Pat<(i32 (OpNode i32:$a, i32:$b)), (set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>; def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)), (set_32ri Int32Regs:$a, imm:$b, Mode)>; @@ -1926,23 +1940,23 @@ multiclass FSET_FORMAT { // f16 -> pred - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math,doF32FTZ]>; - def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> pred @@ -1971,23 +1985,23 @@ (SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>; // f16 -> i32 - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))), - (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))), + (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)), - (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, + def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)), + (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>, Requires<[useFP16Math]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>, Requires<[useFP16Math, doF32FTZ]>; - def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))), - (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>, + def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))), + (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>, Requires<[useFP16Math]>; // f32 -> i32 @@ -2276,16 +2290,10 @@ def LoadParamMemV4I32 : LoadParamV4MemInst; def LoadParamMemV4I16 : LoadParamV4MemInst; def LoadParamMemV4I8 : LoadParamV4MemInst; -def LoadParamMemF16 : LoadParamMemInst; -def LoadParamMemF16x2 : LoadParamMemInst; def LoadParamMemF32 : LoadParamMemInst; def LoadParamMemF64 : LoadParamMemInst; -def LoadParamMemV2F16 : LoadParamV2MemInst; -def LoadParamMemV2F16x2: LoadParamV2MemInst; def LoadParamMemV2F32 : LoadParamV2MemInst; def LoadParamMemV2F64 : LoadParamV2MemInst; -def LoadParamMemV4F16 : LoadParamV4MemInst; -def LoadParamMemV4F16x2: LoadParamV4MemInst; def LoadParamMemV4F32 : LoadParamV4MemInst; def StoreParamI64 : StoreParamInst; @@ -2302,16 +2310,10 @@ def StoreParamV4I16 : StoreParamV4Inst; def StoreParamV4I8 : StoreParamV4Inst; -def StoreParamF16 : StoreParamInst; -def StoreParamF16x2 : StoreParamInst; def StoreParamF32 : StoreParamInst; def StoreParamF64 : StoreParamInst; -def StoreParamV2F16 : StoreParamV2Inst; -def StoreParamV2F16x2 : StoreParamV2Inst; def StoreParamV2F32 : StoreParamV2Inst; def StoreParamV2F64 : StoreParamV2Inst; -def StoreParamV4F16 : StoreParamV4Inst; -def StoreParamV4F16x2 : StoreParamV4Inst; def StoreParamV4F32 : StoreParamV4Inst; def StoreRetvalI64 : StoreRetvalInst; @@ -2328,15 +2330,9 @@ def StoreRetvalF64 : StoreRetvalInst; def StoreRetvalF32 : StoreRetvalInst; -def StoreRetvalF16 : StoreRetvalInst; -def StoreRetvalF16x2 : StoreRetvalInst; def StoreRetvalV2F64 : StoreRetvalV2Inst; def StoreRetvalV2F32 : StoreRetvalV2Inst; -def StoreRetvalV2F16 : StoreRetvalV2Inst; -def StoreRetvalV2F16x2: StoreRetvalV2Inst; def StoreRetvalV4F32 : StoreRetvalV4Inst; -def StoreRetvalV4F16 : StoreRetvalV4Inst; -def StoreRetvalV4F16x2: StoreRetvalV4Inst; def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>; def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>; @@ -2347,19 +2343,26 @@ NVPTXInst<(outs), (ins regclass:$a), "$a, ", [(CallArg (i32 0), regclass:$a)]>; +class CallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a, ", + [(CallArg (i32 0), vt:$a)]>; + class LastCallArgInst : NVPTXInst<(outs), (ins regclass:$a), "$a", [(LastCallArg (i32 0), regclass:$a)]>; +class LastCallArgInstVT : + NVPTXInst<(outs), (ins regclass:$a), "$a", + [(LastCallArg (i32 0), vt:$a)]>; def CallArgI64 : CallArgInst; -def CallArgI32 : CallArgInst; -def CallArgI16 : CallArgInst; +def CallArgI32 : CallArgInstVT; +def CallArgI16 : CallArgInstVT; def CallArgF64 : CallArgInst; def CallArgF32 : CallArgInst; def LastCallArgI64 : LastCallArgInst; -def LastCallArgI32 : LastCallArgInst; -def LastCallArgI16 : LastCallArgInst; +def LastCallArgI32 : LastCallArgInstVT; +def LastCallArgI16 : LastCallArgInstVT; def LastCallArgF64 : LastCallArgInst; def LastCallArgF32 : LastCallArgInst; @@ -2376,7 +2379,7 @@ def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ", [(CallVoid (Wrapper tglobaladdr:$addr))]>; def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ", - [(CallVoid Int32Regs:$addr)]>; + [(CallVoid i32:$addr)]>; def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ", [(CallVoid Int64Regs:$addr)]>; def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;", @@ -2413,53 +2416,54 @@ !strconcat("mov", asmstr, " \t$dst, $src;"), [(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>; -class MoveParamSymbolInst : NVPTXInst<(outs regclass:$dst), (ins srcty:$src), !strconcat("mov", asmstr, " \t$dst, $src;"), - [(set regclass:$dst, (MoveParam texternalsym:$src))]>; + [(set vt:$dst, (MoveParam texternalsym:$src))]>; def MoveParamI64 : MoveParamInst; def MoveParamI32 : MoveParamInst; -def MoveParamSymbolI64 : MoveParamSymbolInst; -def MoveParamSymbolI32 : MoveParamSymbolInst; +def MoveParamSymbolI64 : MoveParamSymbolInst; +def MoveParamSymbolI32 : MoveParamSymbolInst; def MoveParamI16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), - "cvt.u16.u32 \t$dst, $src;", - [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>; + "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ? + [(set i16:$dst, (MoveParam i16:$src))]>; def MoveParamF64 : MoveParamInst; def MoveParamF32 : MoveParamInst; -def MoveParamF16 : MoveParamInst; -class PseudoUseParamInst : +class PseudoUseParamInst : NVPTXInst<(outs), (ins regclass:$src), "// Pseudo use of $src", - [(PseudoUseParam regclass:$src)]>; + [(PseudoUseParam vt:$src)]>; -def PseudoUseParamI64 : PseudoUseParamInst; -def PseudoUseParamI32 : PseudoUseParamInst; -def PseudoUseParamI16 : PseudoUseParamInst; -def PseudoUseParamF64 : PseudoUseParamInst; -def PseudoUseParamF32 : PseudoUseParamInst; +def PseudoUseParamI64 : PseudoUseParamInst; +def PseudoUseParamI32 : PseudoUseParamInst; +def PseudoUseParamI16 : PseudoUseParamInst; +def PseudoUseParamF64 : PseudoUseParamInst; +def PseudoUseParamF32 : PseudoUseParamInst; class ProxyRegInst : NVPTXInst<(outs regclass:$dst), (ins regclass:$src), !strconcat("mov.", SzStr, " \t$dst, $src;"), [(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>; -let isCodeGenOnly=1, isPseudo=1 in { - def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>; - def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; - def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; - def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; - def ProxyRegF16 : ProxyRegInst<"b16", f16, Float16Regs>; - def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Float16Regs>; - def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; - def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; - def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Float16x2Regs>; - def ProxyRegBF16x2 : ProxyRegInst<"b32", v2bf16, Float16x2Regs>; +def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>; +def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>; +def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>; +def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>; +def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>; +def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>; + +foreach vt = [f16, bf16] in { + def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>; +} + +foreach vt = [v2f16, v2bf16] in { + def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>; } // @@ -2509,8 +2513,6 @@ defm LD_i16 : LD; defm LD_i32 : LD; defm LD_i64 : LD; - defm LD_f16 : LD; - defm LD_f16x2 : LD; defm LD_f32 : LD; defm LD_f64 : LD; } @@ -2559,8 +2561,6 @@ defm ST_i16 : ST; defm ST_i32 : ST; defm ST_i64 : ST; - defm ST_f16 : ST; - defm ST_f16x2 : ST; defm ST_f32 : ST; defm ST_f64 : ST; } @@ -2647,8 +2647,8 @@ defm LDV_i16 : LD_VEC; defm LDV_i32 : LD_VEC; defm LDV_i64 : LD_VEC; - defm LDV_f16 : LD_VEC; - defm LDV_f16x2 : LD_VEC; + defm LDV_f16 : LD_VEC; + defm LDV_f16x2 : LD_VEC; defm LDV_f32 : LD_VEC; defm LDV_f64 : LD_VEC; } @@ -2742,8 +2742,8 @@ defm STV_i16 : ST_VEC; defm STV_i32 : ST_VEC; defm STV_i64 : ST_VEC; - defm STV_f16 : ST_VEC; - defm STV_f16x2 : ST_VEC; + defm STV_f16 : ST_VEC; + defm STV_f16x2 : ST_VEC; defm STV_f32 : ST_VEC; defm STV_f64 : ST_VEC; } @@ -2757,23 +2757,30 @@ !strconcat("mov.b", SzStr, " \t$d, $a;"), [(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>; -def BITCONVERT_16_I2F : F_BITCONVERT<"16", i16, f16>; -def BITCONVERT_16_F2I : F_BITCONVERT<"16", f16, i16>; -def BITCONVERT_16_I2BF : F_BITCONVERT<"16", i16, bf16>; -def BITCONVERT_16_BF2I : F_BITCONVERT<"16", bf16, i16>; def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>; def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>; def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>; def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>; -def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", i32, v2f16>; -def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", v2f16, i32>; -def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", f32, v2f16>; -def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", v2f16, f32>; -def BITCONVERT_32_I2BF16x2 : F_BITCONVERT<"32", i32, v2bf16>; -def BITCONVERT_32_BF16x22I : F_BITCONVERT<"32", v2bf16, i32>; -def BITCONVERT_32_F2BF16x2 : F_BITCONVERT<"32", f32, v2bf16>; -def BITCONVERT_32_BF16x22F : F_BITCONVERT<"32", v2bf16, f32>; +foreach vt = [v2f16, v2bf16] in { +def: Pat<(vt (bitconvert (i32 UInt32Const:$a))), + (IMOVB32ri UInt32Const:$a)>; +def: Pat<(vt (bitconvert (i32 Int32Regs:$a))), + (ProxyRegI32 Int32Regs:$a)>; +def: Pat<(i32 (bitconvert (vt Int32Regs:$a))), + (ProxyRegI32 Int32Regs:$a)>; +def: Pat<(vt (bitconvert (f32 Float32Regs:$a))), + (BITCONVERT_32_F2I Float32Regs:$a)>; +} +foreach vt = [f16, bf16] in { +def: Pat<(vt (bitconvert (i16 UInt16Const:$a))), + (IMOVB16ri UInt16Const:$a)>; +def: Pat<(vt (bitconvert (i16 Int16Regs:$a))), + (ProxyRegI16 Int16Regs:$a)>; +def: Pat<(i16 (bitconvert (vt Int16Regs:$a))), + (ProxyRegI16 Int16Regs:$a)>; +} + // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where // we cannot specify floating-point literals in isel patterns. Therefore, we // use an integer selp to select either 1 or 0 and then cvt to floating-point. @@ -2840,24 +2847,24 @@ // f16 -> sint -def : Pat<(i1 (fp_to_sint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s16_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i32 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s32_f16 (f16 Float16Regs:$a), CvtRZI)>; -def : Pat<(i64 (fp_to_sint (f16 Float16Regs:$a))), - (CVT_s64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))), + (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; +def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>; +def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))), + (CVT_s64_f16 Int16Regs:$a, CvtRZI)>; // f16 -> uint -def : Pat<(i1 (fp_to_uint (f16 Float16Regs:$a))), - (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>; -def : Pat<(i16 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u16_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i32 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u32_f16 Float16Regs:$a, CvtRZI)>; -def : Pat<(i64 (fp_to_uint (f16 Float16Regs:$a))), - (CVT_u64_f16 Float16Regs:$a, CvtRZI)>; +def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))), + (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>; +def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u16_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u32_f16 Int16Regs:$a, CvtRZI)>; +def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))), + (CVT_u64_f16 Int16Regs:$a, CvtRZI)>; // f32 -> sint def : Pat<(i1 (fp_to_sint Float32Regs:$a)), @@ -2994,17 +3001,17 @@ // Select instructions with 32-bit predicates -def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b), +def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b), (SELP_b16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b), +def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b), (SELP_b32rr Int32Regs:$a, Int32Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b), (SELP_b64rr Int64Regs:$a, Int64Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; -def : Pat<(select Int32Regs:$pred, (f16 Float16Regs:$a), (f16 Float16Regs:$b)), - (SELP_f16rr Float16Regs:$a, Float16Regs:$b, +def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)), + (SELP_f16rr Int16Regs:$a, Int16Regs:$b, (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>; def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b), (SELP_f32rr Float32Regs:$a, Float32Regs:$b, @@ -3070,39 +3077,39 @@ // Extract element of f16x2 register. PTX does not provide any way // to access elements of f16x2 vector directly, so we need to // extract it using a temporary register. - def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), + def F16x2toF16_0 : NVPTXInst<(outs Int16Regs:$dst), + (ins Int32Regs:$src), "{{ .reg .b16 \t%tmp_hi;\n\t" " mov.b32 \t{$dst, %tmp_hi}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 0))]>; - def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst), - (ins Float16x2Regs:$src), + [(set Int16Regs:$dst, + (extractelt (v2f16 Int32Regs:$src), 0))]>; + def F16x2toF16_1 : NVPTXInst<(outs Int16Regs:$dst), + (ins Int32Regs:$src), "{{ .reg .b16 \t%tmp_lo;\n\t" " mov.b32 \t{%tmp_lo, $dst}, $src; }}", - [(set Float16Regs:$dst, - (extractelt (v2f16 Float16x2Regs:$src), 1))]>; + [(set Int16Regs:$dst, + (extractelt (v2f16 Int32Regs:$src), 1))]>; // Coalesce two f16 registers into f16x2 - def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst), - (ins Float16Regs:$a, Float16Regs:$b), + def BuildF16x2 : NVPTXInst<(outs Int32Regs:$dst), + (ins Int16Regs:$a, Int16Regs:$b), "mov.b32 \t$dst, {{$a, $b}};", - [(set (v2f16 Float16x2Regs:$dst), - (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>; + [(set (v2f16 Int32Regs:$dst), + (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>; // Directly initializing underlying the b32 register is one less SASS // instruction than than vector-packing move. - def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src), + def BuildF16x2i : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src), "mov.b32 \t$dst, $src;", []>; // Split f16x2 into two f16 registers. - def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), - (ins Float16x2Regs:$src), + def SplitF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), + (ins Int32Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; // Split an i32 into two f16 - def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi), + def SplitI32toF16x2 : NVPTXInst<(outs Int16Regs:$lo, Int16Regs:$hi), (ins Int32Regs:$src), "mov.b32 \t{{$lo, $hi}}, $src;", []>; @@ -3186,14 +3193,14 @@ (CVT_f32_f64 Float64Regs:$a, CvtRN)>; // fpextend f16 -> f32 -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; -def : Pat<(f32 (fpextend (f16 Float16Regs:$a))), - (CVT_f32_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>; +def : Pat<(f32 (fpextend (f16 Int16Regs:$a))), + (CVT_f32_f16 Int16Regs:$a, CvtNONE)>; // fpextend f16 -> f64 -def : Pat<(f64 (fpextend (f16 Float16Regs:$a))), - (CVT_f64_f16 Float16Regs:$a, CvtNONE)>; +def : Pat<(f64 (fpextend (f16 Int16Regs:$a))), + (CVT_f64_f16 Int16Regs:$a, CvtNONE)>; // fpextend f32 -> f64 def : Pat<(f64 (fpextend Float32Regs:$a)), @@ -3207,8 +3214,8 @@ // fceil, ffloor, froundeven, ftrunc. multiclass CVT_ROUND { - def : Pat<(OpNode (f16 Float16Regs:$a)), - (CVT_f16_f16 Float16Regs:$a, Mode)>; + def : Pat<(OpNode (f16 Int16Regs:$a)), + (CVT_f16_f16 Int16Regs:$a, Mode)>; def : Pat<(OpNode Float32Regs:$a), (CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>; def : Pat<(OpNode Float32Regs:$a), diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -686,46 +686,46 @@ multiclass MIN_MAX { foreach P = [ MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, - int_nvvm_fmax_f16), Float16Regs>, + int_nvvm_fmax_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, - int_nvvm_fmax_ftz_f16), Float16Regs>, + int_nvvm_fmax_ftz_f16), Int16Regs>, MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, - int_nvvm_fmax_nan_f16), Float16Regs>, + int_nvvm_fmax_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>, + int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), - Float16Regs, [hasPTX<72>, hasSM<86>]>, + Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16, - int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX<72>, hasSM<86>]>, + int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, - int_nvvm_fmax_f16x2), Float16x2Regs>, + int_nvvm_fmax_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>, + int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>, + int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), - int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>, + int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), - Float16x2Regs, [hasPTX<72>, hasSM<86>]>, + Int32Regs, [hasPTX<72>, hasSM<86>]>, MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, @@ -933,9 +933,9 @@ def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", - Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; + Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", - Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; + Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; @@ -986,30 +986,30 @@ FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, - FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs, + FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs, + FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, [hasPTX<70>, hasSM<80>]>, - FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs, + FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, [hasPTX<42>, hasSM<53>]>, FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, - Float16x2Regs, [hasPTX<42>, hasSM<53>]>, - FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs, + Int32Regs, [hasPTX<42>, hasSM<53>]>, + FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, - Float16x2Regs, [hasPTX<70>, hasSM<80>]>, + Int32Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, @@ -1397,9 +1397,9 @@ def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a), - (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>; + (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>; def : Pat<(int_nvvm_f2h_rn Float32Regs:$a), - (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>; + (CVT_f16_f32 Float32Regs:$a, CvtRN)>; // // Bitcast @@ -2159,8 +2159,8 @@ defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; -defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>; -defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>; +defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Int16Regs>; +defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; @@ -2212,9 +2212,9 @@ defm INT_PTX_LDU_G_v2i32_ELE : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDU_G_v2f16_ELE - : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDU_G_v2f16x2_ELE - : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; + : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDU_G_v2f32_ELE : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; defm INT_PTX_LDU_G_v2i64_ELE @@ -2231,10 +2231,10 @@ Int32Regs>; defm INT_PTX_LDU_G_v4f16_ELE : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", - Float16Regs>; + Int16Regs>; defm INT_PTX_LDU_G_v4f16x2_ELE : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", - Float16x2Regs>; + Int32Regs>; defm INT_PTX_LDU_G_v4f32_ELE : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; @@ -2275,9 +2275,9 @@ defm INT_PTX_LDG_GLOBAL_i64 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; defm INT_PTX_LDG_GLOBAL_f16 - : LDG_G<"b16 \t$result, [$src];", Float16Regs>; + : LDG_G<"b16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDG_GLOBAL_f16x2 - : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>; + : LDG_G<"b32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDG_GLOBAL_f32 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDG_GLOBAL_f64 @@ -2334,9 +2334,9 @@ defm INT_PTX_LDG_G_v2i32_ELE : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v2f16_ELE - : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v2f16x2_ELE - : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>; + : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v2f32_ELE : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; defm INT_PTX_LDG_G_v2i64_ELE @@ -2350,9 +2350,9 @@ defm INT_PTX_LDG_G_v4i32_ELE : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v4f16_ELE - : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>; + : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; defm INT_PTX_LDG_G_v4f16x2_ELE - : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>; + : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; defm INT_PTX_LDG_G_v4f32_ELE : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; @@ -6305,7 +6305,7 @@ : WMMA_REGS { // NVPTX register types used to carry fragment data. NVPTXRegClass regclass = !cond( - !eq(ptx_elt_type, "f16") : Float16x2Regs, + !eq(ptx_elt_type, "f16") : Int32Regs, !eq(ptx_elt_type, "f32") : Float32Regs, !eq(ptx_elt_type, "f64") : Float64Regs, !eq(ptx_elt_type, "bf16") : Int32Regs, diff --git a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp --- a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp @@ -73,10 +73,6 @@ case NVPTX::ProxyRegI16: case NVPTX::ProxyRegI32: case NVPTX::ProxyRegI64: - case NVPTX::ProxyRegF16: - case NVPTX::ProxyRegF16x2: - case NVPTX::ProxyRegBF16: - case NVPTX::ProxyRegBF16x2: case NVPTX::ProxyRegF32: case NVPTX::ProxyRegF64: replaceMachineInstructionUsage(MF, MI); diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -29,14 +29,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return ".f32"; - if (RC == &NVPTX::Float16RegsRegClass) - // Ideally fp16 registers should be .f16, but this syntax is only - // supported on sm_53+. On the other hand, .b16 registers are - // accepted for all supported fp16 instructions on all GPU - // variants, so we can use them instead. - return ".b16"; - if (RC == &NVPTX::Float16x2RegsRegClass) - return ".b32"; if (RC == &NVPTX::Float64RegsRegClass) return ".f64"; if (RC == &NVPTX::Int64RegsRegClass) @@ -73,10 +65,6 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) { if (RC == &NVPTX::Float32RegsRegClass) return "%f"; - if (RC == &NVPTX::Float16RegsRegClass) - return "%h"; - if (RC == &NVPTX::Float16x2RegsRegClass) - return "%hh"; if (RC == &NVPTX::Float64RegsRegClass) return "%fd"; if (RC == &NVPTX::Int64RegsRegClass) diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -57,11 +57,11 @@ // Register classes //===----------------------------------------------------------------------===// def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>; -def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>; -def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>; +def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>; +def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16], 32, + (add (sequence "R%u", 0, 4), + VRFrame32, VRFrameLocal32)>; def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>; -def Float16Regs : NVPTXRegClass<[f16,bf16], 16, (add (sequence "H%u", 0, 4))>; -def Float16x2Regs : NVPTXRegClass<[v2f16,v2bf16], 32, (add (sequence "HH%u", 0, 4))>; def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>; def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>; def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>; diff --git a/llvm/test/CodeGen/NVPTX/bf16.ll b/llvm/test/CodeGen/NVPTX/bf16.ll --- a/llvm/test/CodeGen/NVPTX/bf16.ll +++ b/llvm/test/CodeGen/NVPTX/bf16.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in store bfloat %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_bfloat -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load bfloat, ptr addrspace(1) %in %val_int = bitcast bfloat %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,8 +14,8 @@ %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2+0], %h1; -; CHECK: st.param.b16 [param2+2], %h2; +; CHECK: st.param.b16 [param2+0], %rs1; +; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: _Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE, @@ -37,8 +37,8 @@ define void @boom() { %fp = call ptr @usefp(ptr @callee) ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0+0], %h1; - ; CHECK: st.param.b16 [param0+2], %h2; + ; CHECK: st.param.b16 [param0+0], %rs1; + ; CHECK: st.param.b16 [param0+2], %rs2; ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll @@ -43,7 +43,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( -; CHECK: mov.b16 [[R:%h[0-9]+]], 0x3C00; +; CHECK: mov.b16 [[R:%rs[0-9]+]], 0x3C00; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_ret_const() #0 { @@ -51,14 +51,14 @@ } ; CHECK-LABEL: test_fadd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd(half %a, half %b) #0 { @@ -67,14 +67,14 @@ } ; CHECK-LABEL: test_fadd_v1f16( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fadd_v1f16_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_v1f16_param_1]; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fadd_v1f16_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_v1f16_param_1]; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 { @@ -84,14 +84,14 @@ ; Check that we can lower fadd with immediate arguments. ; CHECK-LABEL: test_fadd_imm_0( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_0_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_0_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_0(half %b) #0 { @@ -100,14 +100,14 @@ } ; CHECK-LABEL: test_fadd_imm_1( -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fadd_imm_1_param_0]; -; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[A]]; -; CHECK-F16-FTZ-DAG: mov.b16 [[A:%h[0-9]+]], 0x3C00; -; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fadd_imm_1_param_0]; +; CHECK-F16-NOFTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-NOFTZ-NEXT: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; +; CHECK-F16-FTZ-DAG: mov.b16 [[A:%rs[0-9]+]], 0x3C00; +; CHECK-F16-FTZ-NEXT: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], 0f3F800000; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fadd_imm_1(half %a) #0 { @@ -116,14 +116,14 @@ } ; CHECK-LABEL: test_fsub( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fsub_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fsub_param_1]; -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fsub_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fsub_param_1]; +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fsub(half %a, half %b) #0 { @@ -132,15 +132,15 @@ } ; CHECK-LABEL: test_fneg( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fneg_param_0]; -; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; -; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%h[0-9]+]], 0x0000 -; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%h[0-9]+]], [[Z]], [[A]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fneg_param_0]; +; CHECK-F16-NOFTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-NOFTZ-NEXT: sub.rn.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; +; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000 +; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fneg(half %a) #0 { @@ -149,14 +149,14 @@ } ; CHECK-LABEL: test_fmul( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmul_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmul_param_1]; -; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]]; -; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmul_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmul_param_1]; +; CHECK-F16-NOFTZ-NEXT: mul.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; +; CHECK-F16-FTZ-NEXT: mul.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-NEXT: mul.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fmul(half %a, half %b) #0 { @@ -165,15 +165,15 @@ } ; CHECK-LABEL: test_fdiv( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fdiv_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fdiv_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fdiv_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fdiv_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F0:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[F1:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ-NEXT: div.rn.ftz.f32 [[FR:%f[0-9]+]], [[F0]], [[F1]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[FR]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[FR]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_fdiv(half %a, half %b) #0 { @@ -182,8 +182,8 @@ } ; CHECK-LABEL: test_frem( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_frem_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_frem_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_frem_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_frem_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FA:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[FB:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ-NEXT: div.rn.f32 [[D:%f[0-9]+]], [[FA]], [[FB]]; @@ -198,7 +198,7 @@ ; CHECK-F16-FTZ-NEXT: sub.ftz.f32 [[RF:%f[0-9]+]], [[FA]], [[RI]]; ; CHECK-NEXT: testp.infinite.f32 [[ISBINF:%p[0-9]+]], [[FB]]; ; CHECK-NEXT: selp.f32 [[RESULT:%f[0-9]+]], [[FA]], [[RF]], [[ISBINF]]; -; CHECK-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RESULT]]; +; CHECK-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RESULT]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_frem(half %a, half %b) #0 { @@ -207,7 +207,7 @@ } ; CHECK-LABEL: test_store( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_store_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_store_param_0]; ; CHECK-DAG: ld.param.u64 %[[PTR:rd[0-9]+]], [test_store_param_1]; ; CHECK-NEXT: st.b16 [%[[PTR]]], [[A]]; ; CHECK-NEXT: ret; @@ -218,7 +218,7 @@ ; CHECK-LABEL: test_load( ; CHECK: ld.param.u64 %[[PTR:rd[0-9]+]], [test_load_param_0]; -; CHECK-NEXT: ld.b16 [[R:%h[0-9]+]], [%[[PTR]]]; +; CHECK-NEXT: ld.b16 [[R:%rs[0-9]+]], [%[[PTR]]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_load(ptr %a) #0 { @@ -243,8 +243,8 @@ declare half @test_callee(half %a, half %b) #0 ; CHECK-LABEL: test_call( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -254,7 +254,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -264,8 +264,8 @@ } ; CHECK-LABEL: test_call_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_call_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_call_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_call_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_call_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -275,7 +275,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -285,8 +285,8 @@ } ; CHECK-LABEL: test_tailcall_flipped( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_tailcall_flipped_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_tailcall_flipped_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_tailcall_flipped_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_tailcall_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .b32 param0; ; CHECK-DAG: .param .b32 param1; @@ -296,7 +296,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -306,10 +306,10 @@ } ; CHECK-LABEL: test_select( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_param_1]; ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select(half %a, half %b, i1 zeroext %c) #0 { @@ -318,15 +318,15 @@ } ; CHECK-LABEL: test_select_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_param_3]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]]; ; CHECK-NOF16: setp.neu.f32 [[PRED:%p[0-9]+]], [[CF]], [[DF]] -; CHECK: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { @@ -338,8 +338,8 @@ ; CHECK-LABEL: test_select_cc_f32_f16( ; CHECK-DAG: ld.param.f32 [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0]; ; CHECK-DAG: ld.param.f32 [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2]; -; CHECK-DAG: ld.param.b16 [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_select_cc_f32_f16_param_2]; +; CHECK-DAG: ld.param.b16 [[D:%rs[0-9]+]], [test_select_cc_f32_f16_param_3]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]]; @@ -355,13 +355,13 @@ } ; CHECK-LABEL: test_select_cc_f16_f32( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_select_cc_f16_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2]; ; CHECK-DAG: ld.param.f32 [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3]; ; CHECK-NOFTZ-DAG: setp.neu.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] ; CHECK-F16-FTZ-DAG: setp.neu.ftz.f32 [[PRED:%p[0-9]+]], [[C]], [[D]] -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1]; -; CHECK-NEXT: selp.b16 [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_select_cc_f16_f32_param_1]; +; CHECK-NEXT: selp.b16 [[R:%rs[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { @@ -371,8 +371,8 @@ } ; CHECK-LABEL: test_fcmp_une( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_une_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_une_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_une_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16-NOFTZ: setp.neu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.neu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -387,8 +387,8 @@ } ; CHECK-LABEL: test_fcmp_ueq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ueq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ueq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ueq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16-NOFTZ: setp.equ.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.equ.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -403,8 +403,8 @@ } ; CHECK-LABEL: test_fcmp_ugt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ugt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ugt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ugt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16-NOFTZ: setp.gtu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gtu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -419,8 +419,8 @@ } ; CHECK-LABEL: test_fcmp_uge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16-NOFTZ: setp.geu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.geu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -435,8 +435,8 @@ } ; CHECK-LABEL: test_fcmp_ult( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ult_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ult_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ult_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16-NOFTZ: setp.ltu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ltu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -451,8 +451,8 @@ } ; CHECK-LABEL: test_fcmp_ule( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ule_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ule_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ule_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16-NOFTZ: setp.leu.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.leu.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -468,8 +468,8 @@ ; CHECK-LABEL: test_fcmp_uno( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_uno_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_uno_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_uno_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16-NOFTZ: setp.nan.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.nan.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -484,8 +484,8 @@ } ; CHECK-LABEL: test_fcmp_one( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_one_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_one_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_one_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16-NOFTZ: setp.ne.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ne.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -500,8 +500,8 @@ } ; CHECK-LABEL: test_fcmp_oeq( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oeq_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oeq_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oeq_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16-NOFTZ: setp.eq.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.eq.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -516,8 +516,8 @@ } ; CHECK-LABEL: test_fcmp_ogt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ogt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ogt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ogt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16-NOFTZ: setp.gt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.gt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -532,8 +532,8 @@ } ; CHECK-LABEL: test_fcmp_oge( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_oge_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_oge_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_oge_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16-NOFTZ: setp.ge.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.ge.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -548,8 +548,8 @@ } ; XCHECK-LABEL: test_fcmp_olt( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_olt_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_olt_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_olt_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.lt.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -564,8 +564,8 @@ } ; XCHECK-LABEL: test_fcmp_ole( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ole_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ole_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ole_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16-NOFTZ: setp.le.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.le.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -580,8 +580,8 @@ } ; CHECK-LABEL: test_fcmp_ord( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fcmp_ord_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fcmp_ord_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fcmp_ord_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16-NOFTZ: setp.num.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-F16-FTZ: setp.num.ftz.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; @@ -596,8 +596,8 @@ } ; CHECK-LABEL: test_br_cc( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_br_cc_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_br_cc_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_br_cc_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_br_cc_param_1]; ; CHECK-DAG: ld.param.u64 %[[C:rd[0-9]+]], [test_br_cc_param_2]; ; CHECK-DAG: ld.param.u64 %[[D:rd[0-9]+]], [test_br_cc_param_3]; ; CHECK-F16-NOFTZ: setp.lt.f16 [[PRED:%p[0-9]+]], [[A]], [[B]] @@ -623,10 +623,10 @@ ; CHECK-LABEL: test_phi( ; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0]; -; CHECK: ld.b16 {{%h[0-9]+}}, [%[[P1]]]; +; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]]; ; CHECK: [[LOOP:\$L__BB[0-9_]+]]: -; CHECK: mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]]; -; CHECK: ld.b16 [[AB:%h[0-9]+]], [%[[P1]]]; +; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]]; +; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]]; ; CHECK: { ; CHECK: st.param.b64 [param0+0], %[[P1]]; ; CHECK: call.uni (retval0), @@ -651,7 +651,7 @@ declare i1 @test_dummy(ptr %p1) #0 ; CHECK-LABEL: test_fptosi_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i32_param_0]; ; CHECK: cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -661,7 +661,7 @@ } ; CHECK-LABEL: test_fptosi_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptosi_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptosi_i64_param_0]; ; CHECK: cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -671,7 +671,7 @@ } ; CHECK-LABEL: test_fptoui_i32( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i32_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i32_param_0]; ; CHECK: cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -681,7 +681,7 @@ } ; CHECK-LABEL: test_fptoui_i64( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fptoui_i64_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fptoui_i64_param_0]; ; CHECK: cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]]; ; CHECK: st.param.b64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -692,7 +692,7 @@ ; CHECK-LABEL: test_uitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_param_0]; -; CHECK: cvt.rn.f16.u32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32(i32 %a) #0 { @@ -702,7 +702,7 @@ ; CHECK-LABEL: test_uitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_uitofp_i64_param_0]; -; CHECK: cvt.rn.f16.u64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.u64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i64(i64 %a) #0 { @@ -712,7 +712,7 @@ ; CHECK-LABEL: test_sitofp_i32( ; CHECK: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_param_0]; -; CHECK: cvt.rn.f16.s32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32(i32 %a) #0 { @@ -722,7 +722,7 @@ ; CHECK-LABEL: test_sitofp_i64( ; CHECK: ld.param.u64 [[A:%rd[0-9]+]], [test_sitofp_i64_param_0]; -; CHECK: cvt.rn.f16.s64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.s64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i64(i64 %a) #0 { @@ -732,14 +732,14 @@ ; CHECK-LABEL: test_uitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_uitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { @@ -750,14 +750,14 @@ ; CHECK-LABEL: test_sitofp_i32_fadd( ; CHECK-DAG: ld.param.u32 [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[C:%h[0-9]+]], [[A]]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1]; -; CHECK-F16-NOFTZ: add.rn.f16 [[R:%h[0-9]+]], [[B]], [[C]]; -; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%h[0-9]+]], [[B]], [[C]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C:%rs[0-9]+]], [[A]]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_sitofp_i32_fadd_param_1]; +; CHECK-F16-NOFTZ: add.rn.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; +; CHECK-F16-FTZ: add.rn.ftz.f16 [[R:%rs[0-9]+]], [[B]], [[C]]; ; XCHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; XCHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; XCHECK-NOF16-NEXT: add.rn.f32 [[R32:%f[0-9]+]], [[B32]], [[C32]]; -; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { @@ -768,7 +768,7 @@ ; CHECK-LABEL: test_fptrunc_float( ; CHECK: ld.param.f32 [[A:%f[0-9]+]], [test_fptrunc_float_param_0]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_float(float %a) #0 { @@ -778,7 +778,7 @@ ; CHECK-LABEL: test_fptrunc_double( ; CHECK: ld.param.f64 [[A:%fd[0-9]+]], [test_fptrunc_double_param_0]; -; CHECK: cvt.rn.f16.f64 [[R:%h[0-9]+]], [[A]]; +; CHECK: cvt.rn.f16.f64 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fptrunc_double(double %a) #0 { @@ -787,7 +787,7 @@ } ; CHECK-LABEL: test_fpext_float( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_float_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_float_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[R:%f[0-9]+]], [[A]]; ; CHECK: st.param.f32 [func_retval0+0], [[R]]; @@ -798,7 +798,7 @@ } ; CHECK-LABEL: test_fpext_double( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fpext_double_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fpext_double_param_0]; ; CHECK: cvt.f64.f16 [[R:%fd[0-9]+]], [[A]]; ; CHECK: st.param.f64 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -809,9 +809,8 @@ ; CHECK-LABEL: test_bitcast_halftoi16( -; CHECK: ld.param.b16 [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0]; -; CHECK: mov.b16 [[AS:%rs[0-9]+]], [[AH]] -; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AS]] +; CHECK: ld.param.b16 [[AH:%rs[0-9]+]], [test_bitcast_halftoi16_param_0]; +; CHECK: cvt.u32.u16 [[R:%r[0-9]+]], [[AH]] ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define i16 @test_bitcast_halftoi16(half %a) #0 { @@ -821,8 +820,7 @@ ; CHECK-LABEL: test_bitcast_i16tohalf( ; CHECK: ld.param.u16 [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0]; -; CHECK: mov.b16 [[AH:%h[0-9]+]], [[AS]] -; CHECK: st.param.b16 [func_retval0+0], [[AH]]; +; CHECK: st.param.b16 [func_retval0+0], [[AS]]; ; CHECK: ret; define half @test_bitcast_i16tohalf(i16 %a) #0 { %r = bitcast i16 %a to half @@ -855,12 +853,12 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 ; CHECK-LABEL: test_sqrt( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sqrt_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sqrt_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: sqrt.rn.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: sqrt.rn.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sqrt(half %a) #0 { @@ -876,11 +874,11 @@ ;} ; CHECK-LABEL: test_sin( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_sin_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: sin.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_sin(half %a) #0 #1 { @@ -889,11 +887,11 @@ } ; CHECK-LABEL: test_cos( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_cos_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK: cos.approx.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_cos(half %a) #0 #1 { @@ -944,16 +942,16 @@ ;} ; CHECK-LABEL: test_fma( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fma_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fma_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fma_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fma_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fma_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fma_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret define half @test_fma(half %a, half %b, half %c) #0 { @@ -962,12 +960,12 @@ } ; CHECK-LABEL: test_fabs( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_fabs_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_fabs_param_0]; ; CHECK-NOFTZ: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ: abs.f32 [[RF:%f[0-9]+]], [[AF]]; ; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ: abs.ftz.f32 [[RF:%f[0-9]+]], [[AF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fabs(half %a) #0 { @@ -976,15 +974,15 @@ } ; CHECK-LABEL: test_minnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_minnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_minnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_minnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_minnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: min.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: min.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_minnum(half %a, half %b) #0 { @@ -993,15 +991,15 @@ } ; CHECK-LABEL: test_maxnum( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_maxnum_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_maxnum_param_1]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_maxnum_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_maxnum_param_1]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-NOFTZ-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-NOFTZ: max.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[AF:%f[0-9]+]], [[A]]; ; CHECK-F16-FTZ-DAG: cvt.ftz.f32.f16 [[BF:%f[0-9]+]], [[B]]; ; CHECK-F16-FTZ: max.ftz.f32 [[RF:%f[0-9]+]], [[AF]], [[BF]]; -; CHECK: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[RF]]; +; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_maxnum(half %a, half %b) #0 { @@ -1010,15 +1008,12 @@ } ; CHECK-LABEL: test_copysign( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; -; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; -; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; -; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_param_1]; +; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; +; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0+0], [[RX]]; ; CHECK: ret; define half @test_copysign(half %a, half %b) #0 { %r = call half @llvm.copysign.f16(half %a, half %b) @@ -1026,16 +1021,14 @@ } ; CHECK-LABEL: test_copysign_f32( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f32_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.f32 [[BF:%f[0-9]+]], [test_copysign_f32_param_1]; -; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b32 [[B:%r[0-9]+]], [[BF]]; -; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767; +; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[B]], -2147483648; ; CHECK-DAG: mov.b32 {tmp, [[BX2:%rs[0-9]+]]}, [[BX0]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0+0], [[RX]]; ; CHECK: ret; define half @test_copysign_f32(half %a, float %b) #0 { %tb = fptrunc float %b to half @@ -1044,17 +1037,15 @@ } ; CHECK-LABEL: test_copysign_f64( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_f64_param_0]; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.f64 [[BD:%fd[0-9]+]], [test_copysign_f64_param_1]; -; CHECK-DAG: mov.b16 [[A:%rs[0-9]+]], [[AH]]; ; CHECK-DAG: mov.b64 [[B:%rd[0-9]+]], [[BD]]; -; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[A]], 32767; +; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808; ; CHECK-DAG: shr.u64 [[BX1:%rd[0-9]+]], [[BX0]], 48; ; CHECK-DAG: cvt.u16.u64 [[BX2:%rs[0-9]+]], [[BX1]]; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX2]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; -; CHECK: st.param.b16 [func_retval0+0], [[R]]; +; CHECK: st.param.b16 [func_retval0+0], [[RX]]; ; CHECK: ret; define half @test_copysign_f64(half %a, double %b) #0 { %tb = fptrunc double %b to half @@ -1063,16 +1054,13 @@ } ; CHECK-LABEL: test_copysign_extended( -; CHECK-DAG: ld.param.b16 [[AH:%h[0-9]+]], [test_copysign_extended_param_0]; -; CHECK-DAG: ld.param.b16 [[BH:%h[0-9]+]], [test_copysign_extended_param_1]; -; CHECK-DAG: mov.b16 [[AS:%rs[0-9]+]], [[AH]]; -; CHECK-DAG: mov.b16 [[BS:%rs[0-9]+]], [[BH]]; -; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AS]], 32767; -; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BS]], -32768; +; CHECK-DAG: ld.param.b16 [[AH:%rs[0-9]+]], [test_copysign_extended_param_0]; +; CHECK-DAG: ld.param.b16 [[BH:%rs[0-9]+]], [test_copysign_extended_param_1]; +; CHECK-DAG: and.b16 [[AX:%rs[0-9]+]], [[AH]], 32767; +; CHECK-DAG: and.b16 [[BX:%rs[0-9]+]], [[BH]], -32768; ; CHECK: or.b16 [[RX:%rs[0-9]+]], [[AX]], [[BX]]; -; CHECK: mov.b16 [[R:%h[0-9]+]], [[RX]]; -; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[R]]; -; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[R]]; +; CHECK-NOFTZ: cvt.f32.f16 [[XR:%f[0-9]+]], [[RX]]; +; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[XR:%f[0-9]+]], [[RX]]; ; CHECK: st.param.f32 [func_retval0+0], [[XR]]; ; CHECK: ret; define float @test_copysign_extended(half %a, half %b) #0 { @@ -1082,8 +1070,8 @@ } ; CHECK-LABEL: test_floor( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_floor_param_0]; -; CHECK: cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_floor_param_0]; +; CHECK: cvt.rmi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_floor(half %a) #0 { @@ -1092,8 +1080,8 @@ } ; CHECK-LABEL: test_ceil( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_ceil_param_0]; -; CHECK: cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_ceil_param_0]; +; CHECK: cvt.rpi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_ceil(half %a) #0 { @@ -1102,8 +1090,8 @@ } ; CHECK-LABEL: test_trunc( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_trunc_param_0]; -; CHECK: cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_trunc_param_0]; +; CHECK: cvt.rzi.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_trunc(half %a) #0 { @@ -1112,8 +1100,8 @@ } ; CHECK-LABEL: test_rint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_rint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_rint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_rint(half %a) #0 { @@ -1122,8 +1110,8 @@ } ; CHECK-LABEL: test_nearbyint( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_nearbyint_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_nearbyint_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_nearbyint(half %a) #0 { @@ -1132,8 +1120,8 @@ } ; CHECK-LABEL: test_roundeven( -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_roundeven_param_0]; -; CHECK: cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_roundeven_param_0]; +; CHECK: cvt.rni.f16.f16 [[R:%rs[0-9]+]], [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_roundeven(half %a) #0 { @@ -1154,16 +1142,16 @@ } ; CHECK-LABEL: test_fmuladd( -; CHECK-DAG: ld.param.b16 [[A:%h[0-9]+]], [test_fmuladd_param_0]; -; CHECK-DAG: ld.param.b16 [[B:%h[0-9]+]], [test_fmuladd_param_1]; -; CHECK-DAG: ld.param.b16 [[C:%h[0-9]+]], [test_fmuladd_param_2]; -; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; -; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%h[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-DAG: ld.param.b16 [[A:%rs[0-9]+]], [test_fmuladd_param_0]; +; CHECK-DAG: ld.param.b16 [[B:%rs[0-9]+]], [test_fmuladd_param_1]; +; CHECK-DAG: ld.param.b16 [[C:%rs[0-9]+]], [test_fmuladd_param_2]; +; CHECK-F16-NOFTZ: fma.rn.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16-FTZ: fma.rn.ftz.f16 [[R:%rs[0-9]+]], [[A]], [[B]], [[C]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[B32:%f[0-9]+]], [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[C32:%f[0-9]+]], [[C]] ; CHECK-NOF16-NEXT: fma.rn.f32 [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]]; -; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]] +; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]] ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_fmuladd(half %a, half %b, half %c) #0 { diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -31,8 +31,7 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" ; CHECK-LABEL: test_ret_const( -; CHECK: mov.u32 [[T:%r[0-9+]]], 1073757184; -; CHECK: mov.b32 [[R:%hh[0-9+]]], [[T]]; +; CHECK: mov.b32 [[R:%r[0-9+]]], 1073757184; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_ret_const() #0 { @@ -40,8 +39,8 @@ } ; CHECK-LABEL: test_extract_0( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_0_param_0]; -; CHECK: mov.b32 {[[R:%h[0-9]+]], %tmp_hi}, [[A]]; +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_0_param_0]; +; CHECK: mov.b32 {[[R:%rs[0-9]+]], %tmp_hi}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_0(<2 x half> %a) #0 { @@ -50,8 +49,8 @@ } ; CHECK-LABEL: test_extract_1( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_1_param_0]; -; CHECK: mov.b32 {%tmp_lo, [[R:%h[0-9]+]]}, [[A]]; +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_extract_1_param_0]; +; CHECK: mov.b32 {%tmp_lo, [[R:%rs[0-9]+]]}, [[A]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_1(<2 x half> %a) #0 { @@ -60,11 +59,11 @@ } ; CHECK-LABEL: test_extract_i( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_extract_i_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_extract_i_param_0]; ; CHECK-DAG: ld.param.u64 [[IDX:%rd[0-9]+]], [test_extract_i_param_1]; ; CHECK-DAG: setp.eq.s64 [[PRED:%p[0-9]+]], [[IDX]], 0; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[A]]; -; CHECK: selp.b16 [[R:%h[0-9]+]], [[E0]], [[E1]], [[PRED]]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[A]]; +; CHECK: selp.b16 [[R:%rs[0-9]+]], [[E0]], [[E1]], [[PRED]]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK: ret; define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { @@ -73,22 +72,22 @@ } ; CHECK-LABEL: test_fadd( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_param_1]; ; -; CHECK-F16-NEXT: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-F16-NEXT: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -99,20 +98,19 @@ ; Check that we can lower fadd with immediate arguments. ; CHECK-LABEL: test_fadd_imm_0( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fadd_imm_0_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fadd_imm_0_param_0]; ; -; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; -; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; -; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[IHH]]; +; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 1073757184; +; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[I]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -122,20 +120,19 @@ } ; CHECK-LABEL: test_fadd_imm_1( -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fadd_imm_1_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fadd_imm_1_param_0]; ; -; CHECK-F16: mov.u32 [[I:%r[0-9+]]], 1073757184; -; CHECK-F16: mov.b32 [[IHH:%hh[0-9+]]], [[I]]; -; CHECK-F16: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[IHH]]; +; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 1073757184; +; CHECK-F16: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[I]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], 0f3F800000; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], 0f40000000; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -145,22 +142,22 @@ } ; CHECK-LABEL: test_fsub( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fsub_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fsub_param_0]; ; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fsub_param_1]; -; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fsub_param_1]; +; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -170,21 +167,20 @@ } ; CHECK-LABEL: test_fneg( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fneg_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fneg_param_0]; ; -; CHECK-F16: mov.u32 [[I0:%r[0-9+]]], 0; -; CHECK-F16: mov.b32 [[IHH0:%hh[0-9+]]], [[I0]]; -; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%hh[0-9]+]], [[IHH0]], [[A]]; +; CHECK-F16: mov.b32 [[I:%r[0-9+]]], 0; +; CHECK-F16-NEXT: sub.rn.f16x2 [[R:%r[0-9]+]], [[I]], [[A]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR0:%f[0-9]+]], [[Z]], [[FA0]]; ; CHECK-NOF16-DAG: sub.rn.f32 [[FR1:%f[0-9]+]], [[Z]], [[FA1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -194,21 +190,21 @@ } ; CHECK-LABEL: test_fmul( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmul_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmul_param_1]; -; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmul_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmul_param_1]; +; CHECK-F16-NEXT: mul.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: mul.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-NOF16-DAG: mul.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -218,19 +214,19 @@ } ; CHECK-LABEL: test_fdiv( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fdiv_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fdiv_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fdiv_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fdiv_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: div.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]]; ; CHECK-DAG: div.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]]; -; CHECK-NEXT: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]]; +; CHECK-NEXT: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { @@ -240,11 +236,11 @@ ; CHECK-LABEL: test_frem( ; -- Load two 16x2 inputs and split them into f16 elements -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_frem_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_frem_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_frem_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_frem_param_1]; ; -- Split into elements -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; -- promote to f32. ; CHECK-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]]; @@ -265,10 +261,10 @@ ; CHECK-DAG: testp.infinite.f32 [[ISB1INF:%p[0-9]+]], [[FB1]]; ; CHECK-DAG: selp.f32 [[RF1:%f[0-9]+]], [[FA1]], [[RFNINF1]], [[ISB1INF]]; ; -- convert back to f16. -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; ; -- merge into f16x2 and return it. -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { @@ -279,8 +275,8 @@ ; CHECK-LABEL: .func test_ldst_v2f16( ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v2f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v2f16_param_1]; -; CHECK-DAG: ld.b32 [[E:%hh[0-9]+]], [%[[A]]] -; CHECK: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[E]]; +; CHECK-DAG: ld.b32 [[E:%r[0-9]+]], [%[[A]]] +; CHECK: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[E]]; ; CHECK-DAG: st.v2.b16 [%[[B]]], {[[E0]], [[E1]]}; ; CHECK: ret; define void @test_ldst_v2f16(ptr %a, ptr %b) { @@ -309,7 +305,7 @@ ; CHECK-LABEL: .func test_ldst_v4f16( ; CHECK-DAG: ld.param.u64 %[[A:rd[0-9]+]], [test_ldst_v4f16_param_0]; ; CHECK-DAG: ld.param.u64 %[[B:rd[0-9]+]], [test_ldst_v4f16_param_1]; -; CHECK-DAG: ld.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [%[[A]]]; +; CHECK-DAG: ld.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [%[[A]]]; ; CHECK-DAG: st.v4.b16 [%[[B]]], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: ret; define void @test_ldst_v4f16(ptr %a, ptr %b) { @@ -333,8 +329,8 @@ declare <2 x half> @test_callee(<2 x half> %a, <2 x half> %b) #0 ; CHECK-LABEL: test_call( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -344,7 +340,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -354,8 +350,8 @@ } ; CHECK-LABEL: test_call_flipped( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_call_flipped_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_call_flipped_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_call_flipped_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_call_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -365,7 +361,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -375,8 +371,8 @@ } ; CHECK-LABEL: test_tailcall_flipped( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_tailcall_flipped_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_tailcall_flipped_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_tailcall_flipped_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_tailcall_flipped_param_1]; ; CHECK: { ; CHECK-DAG: .param .align 4 .b8 param0[4]; ; CHECK-DAG: .param .align 4 .b8 param1[4]; @@ -386,7 +382,7 @@ ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_callee, ; CHECK: ); -; CHECK-NEXT: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK-NEXT: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK-NEXT: } ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; @@ -396,11 +392,11 @@ } ; CHECK-LABEL: test_select( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_param_1]; ; CHECK-DAG: ld.param.u8 [[C:%rs[0-9]+]], [test_select_param_2] ; CHECK-DAG: setp.eq.b16 [[PRED:%p[0-9]+]], %rs{{.*}}, 1; -; CHECK-NEXT: selp.b32 [[R:%hh[0-9]+]], [[A]], [[B]], [[PRED]]; +; CHECK-NEXT: selp.b32 [[R:%r[0-9]+]], [[A]], [[B]], [[PRED]]; ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select(<2 x half> %a, <2 x half> %b, i1 zeroext %c) #0 { @@ -409,15 +405,15 @@ } ; CHECK-LABEL: test_select_cc( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_param_2]; -; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_param_3]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_param_2]; +; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_param_3]; ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] ; -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -425,11 +421,11 @@ ; CHECK-NOF16-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[CF0]], [[DF0]] ; CHECK-NOF16-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[CF1]], [[DF1]] ; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d) #0 { @@ -441,12 +437,12 @@ ; CHECK-LABEL: test_select_cc_f32_f16( ; CHECK-DAG: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_select_cc_f32_f16_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_select_cc_f32_f16_param_2]; -; CHECK-DAG: ld.param.b32 [[D:%hh[0-9]+]], [test_select_cc_f32_f16_param_3]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_select_cc_f32_f16_param_2]; +; CHECK-DAG: ld.param.b32 [[D:%r[0-9]+]], [test_select_cc_f32_f16_param_3]; ; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[C]], [[D]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] -; CHECK-NOF16-DAG: mov.b32 {[[D0:%h[0-9]+]], [[D1:%h[0-9]+]]}, [[D]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[D0:%rs[0-9]+]], [[D1:%rs[0-9]+]]}, [[D]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF0:%f[0-9]+]], [[D0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF0:%f[0-9]+]], [[C0]]; ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF1:%f[0-9]+]], [[D1]]; @@ -466,17 +462,17 @@ } ; CHECK-LABEL: test_select_cc_f16_f32( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_select_cc_f16_f32_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_select_cc_f16_f32_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_select_cc_f16_f32_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_select_cc_f16_f32_param_1]; ; CHECK-DAG: ld.param.v2.f32 {[[C0:%f[0-9]+]], [[C1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_2]; ; CHECK-DAG: ld.param.v2.f32 {[[D0:%f[0-9]+]], [[D1:%f[0-9]+]]}, [test_select_cc_f16_f32_param_3]; ; CHECK-DAG: setp.neu.f32 [[P0:%p[0-9]+]], [[C0]], [[D0]] ; CHECK-DAG: setp.neu.f32 [[P1:%p[0-9]+]], [[C1]], [[D1]] -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: selp.b16 [[R0:%h[0-9]+]], [[A0]], [[B0]], [[P0]]; -; CHECK-DAG: selp.b16 [[R1:%h[0-9]+]], [[A1]], [[B1]], [[P1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: selp.b16 [[R0:%rs[0-9]+]], [[A0]], [[B0]], [[P0]]; +; CHECK-DAG: selp.b16 [[R1:%rs[0-9]+]], [[A1]], [[B1]], [[P1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK-NEXT: st.param.b32 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, @@ -487,11 +483,11 @@ } ; CHECK-LABEL: test_fcmp_une( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_une_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_une_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_une_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_une_param_1]; ; CHECK-F16: setp.neu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -509,11 +505,11 @@ } ; CHECK-LABEL: test_fcmp_ueq( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ueq_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ueq_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ueq_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ueq_param_1]; ; CHECK-F16: setp.equ.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -531,11 +527,11 @@ } ; CHECK-LABEL: test_fcmp_ugt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ugt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ugt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ugt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ugt_param_1]; ; CHECK-F16: setp.gtu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -553,11 +549,11 @@ } ; CHECK-LABEL: test_fcmp_uge( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uge_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uge_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uge_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uge_param_1]; ; CHECK-F16: setp.geu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -575,11 +571,11 @@ } ; CHECK-LABEL: test_fcmp_ult( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ult_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ult_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ult_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ult_param_1]; ; CHECK-F16: setp.ltu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -597,11 +593,11 @@ } ; CHECK-LABEL: test_fcmp_ule( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ule_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ule_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ule_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ule_param_1]; ; CHECK-F16: setp.leu.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -620,11 +616,11 @@ ; CHECK-LABEL: test_fcmp_uno( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_uno_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_uno_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_uno_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_uno_param_1]; ; CHECK-F16: setp.nan.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -642,11 +638,11 @@ } ; CHECK-LABEL: test_fcmp_one( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_one_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_one_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_one_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_one_param_1]; ; CHECK-F16: setp.ne.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -664,11 +660,11 @@ } ; CHECK-LABEL: test_fcmp_oeq( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oeq_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oeq_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oeq_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oeq_param_1]; ; CHECK-F16: setp.eq.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -686,11 +682,11 @@ } ; CHECK-LABEL: test_fcmp_ogt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ogt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ogt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ogt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ogt_param_1]; ; CHECK-F16: setp.gt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -708,11 +704,11 @@ } ; CHECK-LABEL: test_fcmp_oge( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_oge_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_oge_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_oge_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_oge_param_1]; ; CHECK-F16: setp.ge.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -730,11 +726,11 @@ } ; CHECK-LABEL: test_fcmp_olt( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_olt_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_olt_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_olt_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_olt_param_1]; ; CHECK-F16: setp.lt.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -752,11 +748,11 @@ } ; XCHECK-LABEL: test_fcmp_ole( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ole_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ole_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ole_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ole_param_1]; ; CHECK-F16: setp.le.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -774,11 +770,11 @@ } ; CHECK-LABEL: test_fcmp_ord( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fcmp_ord_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fcmp_ord_param_1]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fcmp_ord_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fcmp_ord_param_1]; ; CHECK-F16: setp.num.f16x2 [[P0:%p[0-9]+]]|[[P1:%p[0-9]+]], [[A]], [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA1:%f[0-9]+]], [[A1]] @@ -796,8 +792,8 @@ } ; CHECK-LABEL: test_fptosi_i32( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i32_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -808,8 +804,8 @@ } ; CHECK-LABEL: test_fptosi_i64( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptosi_i64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptosi_i64_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.s64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.s64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -820,8 +816,8 @@ } ; CHECK-LABEL: test_fptoui_2xi32( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi32_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi32_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u32.f16 [[R0:%r[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u32.f16 [[R1:%r[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[R0]], [[R1]]} @@ -832,8 +828,8 @@ } ; CHECK-LABEL: test_fptoui_2xi64( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fptoui_2xi64_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fptoui_2xi64_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.rzi.u64.f16 [[R0:%rd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.rzi.u64.f16 [[R1:%rd[0-9]+]], [[A1]]; ; CHECK: st.param.v2.b64 [func_retval0+0], {[[R0]], [[R1]]} @@ -845,9 +841,9 @@ ; CHECK-LABEL: test_uitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.u32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.u32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi32(<2 x i32> %a) #0 { @@ -857,9 +853,9 @@ ; CHECK-LABEL: test_uitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_uitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.u64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.u64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_uitofp_2xi64(<2 x i64> %a) #0 { @@ -869,9 +865,9 @@ ; CHECK-LABEL: test_sitofp_2xi32( ; CHECK: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_param_0]; -; CHECK-DAG: cvt.rn.f16.s32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.s32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi32(<2 x i32> %a) #0 { @@ -881,9 +877,9 @@ ; CHECK-LABEL: test_sitofp_2xi64( ; CHECK: ld.param.v2.u64 {[[A0:%rd[0-9]+]], [[A1:%rd[0-9]+]]}, [test_sitofp_2xi64_param_0]; -; CHECK-DAG: cvt.rn.f16.s64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.s64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sitofp_2xi64(<2 x i64> %a) #0 { @@ -893,23 +889,23 @@ ; CHECK-LABEL: test_uitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_uitofp_2xi32_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.u32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.u32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_uitofp_2xi32_fadd_param_1]; +; CHECK-DAG: cvt.rn.f16.u32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.u32 [[C1:%rs[0-9]+]], [[A1]]; -; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} -; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; +; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]} +; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -921,23 +917,23 @@ ; CHECK-LABEL: test_sitofp_2xi32_fadd( ; CHECK-DAG: ld.param.v2.u32 {[[A0:%r[0-9]+]], [[A1:%r[0-9]+]]}, [test_sitofp_2xi32_fadd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; -; CHECK-DAG: cvt.rn.f16.s32 [[C0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.s32 [[C1:%h[0-9]+]], [[A1]]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_sitofp_2xi32_fadd_param_1]; +; CHECK-DAG: cvt.rn.f16.s32 [[C0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.s32 [[C1:%rs[0-9]+]], [[A1]]; ; -; CHECK-F16-DAG: mov.b32 [[C:%hh[0-9]+]], {[[C0]], [[C1]]} -; CHECK-F16-DAG: add.rn.f16x2 [[R:%hh[0-9]+]], [[B]], [[C]]; +; CHECK-F16-DAG: mov.b32 [[C:%r[0-9]+]], {[[C0]], [[C1]]} +; CHECK-F16-DAG: add.rn.f16x2 [[R:%r[0-9]+]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB1:%f[0-9]+]], [[B1]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC1:%f[0-9]+]], [[C1]] ; CHECK-NOF16-DAG: add.rn.f32 [[FR0:%f[0-9]+]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: add.rn.f32 [[FR1:%f[0-9]+]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -949,9 +945,9 @@ ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: ld.param.v2.f32 {[[A0:%f[0-9]+]], [[A1:%f[0-9]+]]}, [test_fptrunc_2xfloat_param_0]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { @@ -961,9 +957,9 @@ ; CHECK-LABEL: test_fptrunc_2xdouble( ; CHECK: ld.param.v2.f64 {[[A0:%fd[0-9]+]], [[A1:%fd[0-9]+]]}, [test_fptrunc_2xdouble_param_0]; -; CHECK-DAG: cvt.rn.f16.f64 [[R0:%h[0-9]+]], [[A0]]; -; CHECK-DAG: cvt.rn.f16.f64 [[R1:%h[0-9]+]], [[A1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f64 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK-DAG: cvt.rn.f16.f64 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fptrunc_2xdouble(<2 x double> %a) #0 { @@ -972,8 +968,8 @@ } ; CHECK-LABEL: test_fpext_2xfloat( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xfloat_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xfloat_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[R0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[R1:%f[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f32 [func_retval0+0], {[[R0]], [[R1]]}; @@ -984,8 +980,8 @@ } ; CHECK-LABEL: test_fpext_2xdouble( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fpext_2xdouble_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fpext_2xdouble_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f64.f16 [[R0:%fd[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f64.f16 [[R1:%fd[0-9]+]], [[A1]]; ; CHECK-NEXT: st.param.v2.f64 [func_retval0+0], {[[R0]], [[R1]]}; @@ -1012,8 +1008,7 @@ ; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RS0]]; ; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RS1]]; ; CHECK-DAG: shl.b32 [[R1H:%r[0-9]+]], [[R1]], 16; -; CHECK-DAG: or.b32 [[R1H0L:%r[0-9]+]], [[R0]], [[R1H]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], [[R1H0L]]; +; CHECK-DAG: or.b32 [[R:%r[0-9]+]], [[R0]], [[R1H]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_2xi16_to_2xhalf(<2 x i16> %a) #0 { @@ -1023,7 +1018,7 @@ ; CHECK-LABEL: test_bitcast_float_to_2xhalf( ; CHECK: ld.param.f32 [[AF1:%f[0-9]+]], [test_bitcast_float_to_2xhalf_param_0]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], [[AF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], [[AF1]]; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_bitcast_float_to_2xhalf(float %a) #0 { @@ -1066,15 +1061,15 @@ declare <2 x half> @llvm.fmuladd.f16(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 ; CHECK-LABEL: test_sqrt( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sqrt_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sqrt_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sqrt.rn.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sqrt.rn.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sqrt(<2 x half> %a) #0 { @@ -1090,15 +1085,15 @@ ;} ; CHECK-LABEL: test_sin( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_sin_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_sin_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: sin.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: sin.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_sin(<2 x half> %a) #0 #1 { @@ -1107,15 +1102,15 @@ } ; CHECK-LABEL: test_cos( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_cos_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_cos_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cos.approx.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: cos.approx.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_cos(<2 x half> %a) #0 #1 { @@ -1166,15 +1161,15 @@ ;} ; CHECK-LABEL: test_fma( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fma_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fma_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fma_param_2]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fma_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fma_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fma_param_2]; ; -; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1183,9 +1178,9 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret @@ -1195,15 +1190,15 @@ } ; CHECK-LABEL: test_fabs( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_fabs_param_0]; -; CHECK: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_fabs_param_0]; +; CHECK: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: abs.f32 [[RF0:%f[0-9]+]], [[AF0]]; ; CHECK-DAG: abs.f32 [[RF1:%f[0-9]+]], [[AF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_fabs(<2 x half> %a) #0 { @@ -1212,19 +1207,19 @@ } ; CHECK-LABEL: test_minnum( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_minnum_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_minnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_minnum_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_minnum_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: min.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: min.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { @@ -1233,19 +1228,19 @@ } ; CHECK-LABEL: test_maxnum( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_maxnum_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_maxnum_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_maxnum_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_maxnum_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] ; CHECK-DAG: cvt.f32.f16 [[AF0:%f[0-9]+]], [[A0]]; ; CHECK-DAG: cvt.f32.f16 [[AF1:%f[0-9]+]], [[A1]]; ; CHECK-DAG: cvt.f32.f16 [[BF0:%f[0-9]+]], [[B0]]; ; CHECK-DAG: cvt.f32.f16 [[BF1:%f[0-9]+]], [[B1]]; ; CHECK-DAG: max.f32 [[RF0:%f[0-9]+]], [[AF0]], [[BF0]]; ; CHECK-DAG: max.f32 [[RF1:%f[0-9]+]], [[AF1]], [[BF1]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[RF0]]; -; CHECK-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[RF1]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[RF0]]; +; CHECK-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[RF1]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { @@ -1254,23 +1249,17 @@ } ; CHECK-LABEL: test_copysign( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; -; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; -; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; -; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; -; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; -; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; -; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; -; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; -; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; +; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; +; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; +; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { @@ -1279,24 +1268,20 @@ } ; CHECK-LABEL: test_copysign_f32( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f32_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f32_param_0]; ; CHECK-DAG: ld.param.v2.f32 {[[B0:%f[0-9]+]], [[B1:%f[0-9]+]]}, [test_copysign_f32_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; -; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b32 [[BI0:%r[0-9]+]], [[B0]]; ; CHECK-DAG: mov.b32 [[BI1:%r[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; -; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; +; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; ; CHECK-DAG: and.b32 [[BX0:%r[0-9]+]], [[BI0]], -2147483648; ; CHECK-DAG: and.b32 [[BX1:%r[0-9]+]], [[BI1]], -2147483648; ; CHECK-DAG: mov.b32 {tmp, [[BZ0:%rs[0-9]+]]}, [[BX0]]; } ; CHECK-DAG: mov.b32 {tmp, [[BZ1:%rs[0-9]+]]}, [[BX1]]; } -; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; -; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; +; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { @@ -1306,26 +1291,22 @@ } ; CHECK-LABEL: test_copysign_f64( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_f64_param_0]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_f64_param_0]; ; CHECK-DAG: ld.param.v2.f64 {[[B0:%fd[0-9]+]], [[B1:%fd[0-9]+]]}, [test_copysign_f64_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; -; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] ; CHECK-DAG: mov.b64 [[BI0:%rd[0-9]+]], [[B0]]; ; CHECK-DAG: mov.b64 [[BI1:%rd[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[AS0]], 32767; -; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[AS1]], 32767; +; CHECK-DAG: and.b16 [[AI0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-DAG: and.b16 [[AI1:%rs[0-9]+]], [[A1]], 32767; ; CHECK-DAG: and.b64 [[BX0:%rd[0-9]+]], [[BI0]], -9223372036854775808; ; CHECK-DAG: and.b64 [[BX1:%rd[0-9]+]], [[BI1]], -9223372036854775808; ; CHECK-DAG: shr.u64 [[BY0:%rd[0-9]+]], [[BX0]], 48; ; CHECK-DAG: shr.u64 [[BY1:%rd[0-9]+]], [[BX1]], 48; ; CHECK-DAG: cvt.u16.u64 [[BZ0:%rs[0-9]+]], [[BY0]]; ; CHECK-DAG: cvt.u16.u64 [[BZ1:%rs[0-9]+]], [[BY1]]; -; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AI0]], [[BZ0]]; -; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AI1]], [[BZ1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AI0]], [[BZ0]]; +; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AI1]], [[BZ1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { @@ -1335,24 +1316,18 @@ } ; CHECK-LABEL: test_copysign_extended( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_copysign_extended_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_copysign_extended_param_1]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-DAG: mov.b16 [[AS0:%rs[0-9]+]], [[A0]]; -; CHECK-DAG: mov.b16 [[AS1:%rs[0-9]+]], [[A1]]; -; CHECK-DAG: mov.b16 [[BS0:%rs[0-9]+]], [[B0]]; -; CHECK-DAG: mov.b16 [[BS1:%rs[0-9]+]], [[B1]]; -; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[AS0]], 32767; -; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[AS1]], 32767; -; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[BS0]], -32768; -; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[BS1]], -32768; -; CHECK-DAG: or.b16 [[RS0:%rs[0-9]+]], [[AX0]], [[BX0]]; -; CHECK-DAG: or.b16 [[RS1:%rs[0-9]+]], [[AX1]], [[BX1]]; -; CHECK-DAG: mov.b16 [[R0:%h[0-9]+]], [[RS0]]; -; CHECK-DAG: mov.b16 [[R1:%h[0-9]+]], [[RS1]]; -; CHECK-DAG: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} -; CHECK: mov.b32 {[[RX0:%h[0-9]+]], [[RX1:%h[0-9]+]]}, [[R]] +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_copysign_extended_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_copysign_extended_param_1]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-DAG: and.b16 [[AX0:%rs[0-9]+]], [[A0]], 32767; +; CHECK-DAG: and.b16 [[AX1:%rs[0-9]+]], [[A1]], 32767; +; CHECK-DAG: and.b16 [[BX0:%rs[0-9]+]], [[B0]], -32768; +; CHECK-DAG: and.b16 [[BX1:%rs[0-9]+]], [[B1]], -32768; +; CHECK-DAG: or.b16 [[R0:%rs[0-9]+]], [[AX0]], [[BX0]]; +; CHECK-DAG: or.b16 [[R1:%rs[0-9]+]], [[AX1]], [[BX1]]; +; CHECK-DAG: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} +; CHECK: mov.b32 {[[RX0:%rs[0-9]+]], [[RX1:%rs[0-9]+]]}, [[R]] ; CHECK-DAG: cvt.f32.f16 [[XR0:%f[0-9]+]], [[RX0]]; ; CHECK-DAG: cvt.f32.f16 [[XR1:%f[0-9]+]], [[RX1]]; ; CHECK: st.param.v2.f32 [func_retval0+0], {[[XR0]], [[XR1]]}; @@ -1364,11 +1339,11 @@ } ; CHECK-LABEL: test_floor( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_floor_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_floor_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rmi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_floor(<2 x half> %a) #0 { @@ -1377,11 +1352,11 @@ } ; CHECK-LABEL: test_ceil( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_ceil_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_ceil_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rpi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_ceil(<2 x half> %a) #0 { @@ -1390,11 +1365,11 @@ } ; CHECK-LABEL: test_trunc( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_trunc_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_trunc_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rzi.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_trunc(<2 x half> %a) #0 { @@ -1403,11 +1378,11 @@ } ; CHECK-LABEL: test_rint( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_rint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_rint_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_rint(<2 x half> %a) #0 { @@ -1416,11 +1391,11 @@ } ; CHECK-LABEL: test_nearbyint( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_nearbyint_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_nearbyint_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_nearbyint(<2 x half> %a) #0 { @@ -1429,11 +1404,11 @@ } ; CHECK-LABEL: test_roundeven( -; CHECK: ld.param.b32 [[A:%hh[0-9]+]], [test_roundeven_param_0]; -; CHECK-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R1:%h[0-9]+]], [[A1]]; -; CHECK-DAG: cvt.rni.f16.f16 [[R0:%h[0-9]+]], [[A0]]; -; CHECK: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK: ld.param.b32 [[A:%r[0-9]+]], [test_roundeven_param_0]; +; CHECK-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R1:%rs[0-9]+]], [[A1]]; +; CHECK-DAG: cvt.rni.f16.f16 [[R0:%rs[0-9]+]], [[A0]]; +; CHECK: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; define <2 x half> @test_roundeven(<2 x half> %a) #0 { @@ -1456,15 +1431,15 @@ } ; CHECK-LABEL: test_fmuladd( -; CHECK-DAG: ld.param.b32 [[A:%hh[0-9]+]], [test_fmuladd_param_0]; -; CHECK-DAG: ld.param.b32 [[B:%hh[0-9]+]], [test_fmuladd_param_1]; -; CHECK-DAG: ld.param.b32 [[C:%hh[0-9]+]], [test_fmuladd_param_2]; +; CHECK-DAG: ld.param.b32 [[A:%r[0-9]+]], [test_fmuladd_param_0]; +; CHECK-DAG: ld.param.b32 [[B:%r[0-9]+]], [test_fmuladd_param_1]; +; CHECK-DAG: ld.param.b32 [[C:%r[0-9]+]], [test_fmuladd_param_2]; ; -; CHECK-F16: fma.rn.f16x2 [[R:%hh[0-9]+]], [[A]], [[B]], [[C]]; +; CHECK-F16: fma.rn.f16x2 [[R:%r[0-9]+]], [[A]], [[B]], [[C]]; ; -; CHECK-NOF16-DAG: mov.b32 {[[A0:%h[0-9]+]], [[A1:%h[0-9]+]]}, [[A]] -; CHECK-NOF16-DAG: mov.b32 {[[B0:%h[0-9]+]], [[B1:%h[0-9]+]]}, [[B]] -; CHECK-NOF16-DAG: mov.b32 {[[C0:%h[0-9]+]], [[C1:%h[0-9]+]]}, [[C]] +; CHECK-NOF16-DAG: mov.b32 {[[A0:%rs[0-9]+]], [[A1:%rs[0-9]+]]}, [[A]] +; CHECK-NOF16-DAG: mov.b32 {[[B0:%rs[0-9]+]], [[B1:%rs[0-9]+]]}, [[B]] +; CHECK-NOF16-DAG: mov.b32 {[[C0:%rs[0-9]+]], [[C1:%rs[0-9]+]]}, [[C]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FA0:%f[0-9]+]], [[A0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FB0:%f[0-9]+]], [[B0]] ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] @@ -1473,9 +1448,9 @@ ; CHECK-NOF16-DAG: cvt.f32.f16 [[FC0:%f[0-9]+]], [[C0]] ; CHECK-NOF16-DAG: fma.rn.f32 [[FR0:%f[0-9]+]], [[FA0]], [[FB0]], [[FC0]]; ; CHECK-NOF16-DAG: fma.rn.f32 [[FR1:%f[0-9]+]], [[FA1]], [[FB1]], [[FC1]]; -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%h[0-9]+]], [[FR0]] -; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%h[0-9]+]], [[FR1]] -; CHECK-NOF16: mov.b32 [[R:%hh[0-9]+]], {[[R0]], [[R1]]} +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R0:%rs[0-9]+]], [[FR0]] +; CHECK-NOF16-DAG: cvt.rn.f16.f32 [[R1:%rs[0-9]+]], [[FR1]] +; CHECK-NOF16: mov.b32 [[R:%r[0-9]+]], {[[R0]], [[R1]]} ; ; CHECK: st.param.b32 [func_retval0+0], [[R]]; ; CHECK: ret; @@ -1485,16 +1460,16 @@ } ; CHECK-LABEL: test_shufflevector( -; CHECK: mov.b32 {%h1, %h2}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs1, %rs2}, %r1; +; CHECK: mov.b32 %r2, {%rs2, %rs1}; define <2 x half> @test_shufflevector(<2 x half> %a) #0 { %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> ret <2 x half> %s } ; CHECK-LABEL: test_insertelement( -; CHECK: mov.b32 {%h2, %tmp_hi}, %hh1; -; CHECK: mov.b32 %hh2, {%h2, %h1}; +; CHECK: mov.b32 {%rs2, %tmp_hi}, %r1; +; CHECK: mov.b32 %r2, {%rs2, %rs1}; define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { %i = insertelement <2 x half> %a, half %x, i64 1 ret <2 x half> %i diff --git a/llvm/test/CodeGen/NVPTX/half.ll b/llvm/test/CodeGen/NVPTX/half.ll --- a/llvm/test/CodeGen/NVPTX/half.ll +++ b/llvm/test/CodeGen/NVPTX/half.ll @@ -7,7 +7,7 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_load_store -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in store half %val, ptr addrspace(1) %out @@ -16,7 +16,7 @@ define void @test_bitcast_from_half(ptr addrspace(1) %in, ptr addrspace(1) %out) { ; CHECK-LABEL: @test_bitcast_from_half -; CHECK: ld.global.b16 [[TMP:%h[0-9]+]], [{{%r[0-9]+}}] +; CHECK: ld.global.b16 [[TMP:%rs[0-9]+]], [{{%r[0-9]+}}] ; CHECK: st.global.b16 [{{%r[0-9]+}}], [[TMP]] %val = load half, ptr addrspace(1) %in %val_int = bitcast half %val to i16 diff --git a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll --- a/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll +++ b/llvm/test/CodeGen/NVPTX/intrinsics-sm90.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| FileCheck --check-prefixes=CHECK %s -; RUN: %if ptxas-11.8 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} +; RUN: %if ptxas-12.0 %{ llc < %s -march=nvptx64 -mcpu=sm_90 -mattr=+ptx80| %ptxas-verify -arch=sm_90 %} ; CHECK-LABEL: test_isspacep define i1 @test_isspacep_shared_cluster(ptr %p) { diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py --- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -28,8 +28,8 @@ "i16": "r", "i32": "r", "i64": "rd", - "half": "h", - "<2 x half>": "hh", + "half": "rs", + "<2 x half>": "r", "float": "f", "double": "fd", } diff --git a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll --- a/llvm/test/CodeGen/NVPTX/ldg-invariant.ll +++ b/llvm/test/CodeGen/NVPTX/ldg-invariant.ll @@ -15,7 +15,7 @@ define half @ld_global_v2f16(ptr addrspace(1) %ptr) { ; Load of v2f16 is weird. We consider it to be a legal type, which happens to be ; loaded/stored as a 32-bit scalar. -; CHECK: ld.global.nc.b32 +; CHECK: ld.global.nc.u32 %a = load <2 x half>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <2 x half> %a, i32 0 %v2 = extractelement <2 x half> %a, i32 1 @@ -28,7 +28,7 @@ ; Larger f16 vectors may be split into individual f16 elements and multiple ; loads/stores may be vectorized using f16 element type. Practically it's ; limited to v4 variant only. -; CHECK: ld.global.nc.v4.b16 +; CHECK: ld.global.nc.v4.u16 %a = load <4 x half>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <4 x half> %a, i32 0 %v2 = extractelement <4 x half> %a, i32 1 @@ -44,7 +44,7 @@ define half @ld_global_v8f16(ptr addrspace(1) %ptr) { ; Larger vectors are, again, loaded as v4i32. PTX has no v8 variants of loads/stores, ; so load/store vectorizer has to convert v8f16 -> v4 x v2f16. -; CHECK: ld.global.nc.v4.b32 +; CHECK: ld.global.nc.v4.u32 %a = load <8 x half>, ptr addrspace(1) %ptr, !invariant.load !0 %v1 = extractelement <8 x half> %a, i32 0 %v2 = extractelement <8 x half> %a, i32 2 diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll --- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -64,14 +64,14 @@ ; CHECK-LABEL: test_ldu_f16 define half @test_ldu_f16(ptr addrspace(1) %ptr) { - ; CHECK: ldu.global.b16 + ; CHECK: ldu.global.u16 %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) ret half %val } ; CHECK-LABEL: test_ldu_v2f16 define <2 x half> @test_ldu_v2f16(ptr addrspace(1) %ptr) { - ; CHECK: ldu.global.b32 + ; CHECK: ldu.global.u32 %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) ret <2 x half> %val } @@ -120,14 +120,14 @@ ; CHECK-LABEL: test_ldg_f16 define half @test_ldg_f16(ptr addrspace(1) %ptr) { - ; CHECK: ld.global.nc.b16 + ; CHECK: ld.global.nc.u16 %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) ret half %val } ; CHECK-LABEL: test_ldg_v2f16 define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) { - ; CHECK: ld.global.nc.b32 + ; CHECK: ld.global.nc.u32 %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) ret <2 x half> %val } diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -384,13 +384,13 @@ ; CHECK: .func (.param .b32 func_retval0) ; CHECK-LABEL: test_f16( ; CHECK-NEXT: .param .b32 test_f16_param_0 -; CHECK: ld.param.b16 [[E:%h[0-9]+]], [test_f16_param_0]; +; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .b32 param0; ; CHECK: st.param.b16 [param0+0], [[E]]; ; CHECK: .param .b32 retval0; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]] ; CHECK-NEXT: ret; define half @test_f16(half %a) { @@ -401,13 +401,13 @@ ; CHECK: .func (.param .align 4 .b8 func_retval0[4]) ; CHECK-LABEL: test_v2f16( ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] -; CHECK: ld.param.b32 [[E:%hh[0-9]+]], [test_v2f16_param_0]; +; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; ; CHECK: st.param.b32 [param0+0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; ; CHECK: call.uni (retval0), ; CHECK-NEXT: test_v2f16, -; CHECK: ld.param.b32 [[R:%hh[0-9]+]], [retval0+0]; +; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0+0]; ; CHECK: st.param.b32 [func_retval0+0], [[R]] ; CHECK-NEXT: ret; define <2 x half> @test_v2f16(<2 x half> %a) { @@ -418,17 +418,17 @@ ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v3f16( ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] -; CHECK-DAG: ld.param.b32 [[HH01:%hh[0-9]+]], [test_v3f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E2:%h[0-9]+]], [test_v3f16_param_0+4]; +; CHECK-DAG: ld.param.b32 [[HH01:%r[0-9]+]], [test_v3f16_param_0]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; ; CHECK-DAG: st.param.v2.b16 [param0+0], {[[E0]], [[E1]]}; ; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R2:%h[0-9]+]], [retval0+4]; +; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; ; CHECK-DAG: st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; @@ -441,14 +441,12 @@ ; CHECK-LABEL: test_v4f16( ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] ; CHECK: ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; -; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; -; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]}; +; CHECK: st.param.v2.b32 [param0+0], {[[R01]], [[R23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; ; CHECK: call.uni (retval0), ; CHECK: test_v4f16, -; CHECK: ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0+0]; ; CHECK: st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]}; ; CHECK: ret; define <4 x half> @test_v4f16(<4 x half> %a) { @@ -459,17 +457,17 @@ ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0]; -; CHECK-DAG: mov.b32 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]]; -; CHECK-DAG: ld.param.b16 [[E4:%h[0-9]+]], [test_v5f16_param_0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; +; CHECK-DAG: mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]]; +; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.b16 [[R4:%h[0-9]+]], [retval0+8]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; @@ -482,16 +480,12 @@ ; CHECK-LABEL: test_v8f16( ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] ; CHECK: ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; -; CHECK-DAG: mov.b32 [[HH01:%hh[0-9]+]], [[R01]]; -; CHECK-DAG: mov.b32 [[HH23:%hh[0-9]+]], [[R23]]; -; CHECK-DAG: mov.b32 [[HH45:%hh[0-9]+]], [[R45]]; -; CHECK-DAG: mov.b32 [[HH67:%hh[0-9]+]], [[R67]]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]}; +; CHECK: st.param.v4.b32 [param0+0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; ; CHECK: call.uni (retval0), ; CHECK: test_v8f16, -; CHECK: ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0]; +; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0+0]; ; CHECK: st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; ; CHECK: ret; define <8 x half> @test_v8f16(<8 x half> %a) { @@ -502,9 +496,9 @@ ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) ; CHECK-LABEL: test_v9f16( ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0]; -; CHECK-DAG: ld.param.v4.b16 {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8]; -; CHECK-DAG: ld.param.b16 [[E8:%h[0-9]+]], [test_v9f16_param_0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; +; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; +; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; ; CHECK-DAG: st.param.v4.b16 [param0+0], ; CHECK-DAG: st.param.v4.b16 [param0+8], @@ -512,9 +506,9 @@ ; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), ; CHECK: test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0]; -; CHECK-DAG: ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8]; -; CHECK-DAG: ld.param.b16 [[R8:%h[0-9]+]], [retval0+16]; +; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0+0]; +; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; +; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; ; CHECK-DAG: st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]}; ; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; @@ -915,13 +909,13 @@ ; CHECK: .func (.param .align 2 .b8 func_retval0[2]) ; CHECK-LABEL: test_s_f16( ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] -; CHECK: ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0]; +; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; ; CHECK: st.param.b16 [param0+0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; ; CHECK: call.uni ; CHECK-NEXT: test_s_f16, -; CHECK: ld.param.b16 [[R:%h[0-9]+]], [retval0+0]; +; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0+0]; ; CHECK: st.param.b16 [func_retval0+0], [[R]]; ; CHECK-NEXT: ret; define %s_f16 @test_s_f16(%s_f16 %a) { diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll @@ -92,10 +92,10 @@ define half @check_f16() { ; PTX-LABEL: check_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b16 [[LD:%h[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b16 [[LD:%rs[0-9]+]], [retval0+0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%h[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b16 [[PROXY:%rs[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.b16 [func_retval0+0], [[PROXY]]; ; PTX-WITH-DAG: st.param.b16 [func_retval0+0], [[LD]]; @@ -155,10 +155,10 @@ define <2 x half> @check_vec_f16() { ; PTX-LABEL: check_vec_f16 ; PTX-DAG: { // callseq {{[0-9]+}}, {{[0-9]+}} - ; PTX-DAG: ld.param.b32 [[LD:%hh[0-9]+]], [retval0+0]; + ; PTX-DAG: ld.param.b32 [[LD:%r[0-9]+]], [retval0+0]; ; PTX-DAG: } // callseq {{[0-9]+}} - ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%hh[0-9]+]], [[LD]]; + ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%r[0-9]+]], [[LD]]; ; PTX-WITHOUT-DAG: st.param.b32 [func_retval0+0], [[PROXY]]; ; PTX-WITH-DAG: st.param.b32 [func_retval0+0], [[LD]]; diff --git a/llvm/test/CodeGen/NVPTX/wmma.py b/llvm/test/CodeGen/NVPTX/wmma.py --- a/llvm/test/CodeGen/NVPTX/wmma.py +++ b/llvm/test/CodeGen/NVPTX/wmma.py @@ -118,7 +118,7 @@ }[ptx_type] self.ptx_reg_pattern = { - "f16": "%hh[0-9]+", + "f16": "%r[0-9]+", "f32": "%f[0-9]+", "f64": "%fd[0-9]+", }.get(ptx_type, "%r[0-9]+")