Index: lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp =================================================================== --- lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp +++ lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp @@ -679,15 +679,18 @@ const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNum).getImm(); - - if (Imm == 2) { - O << "P0"; - } else if (Imm == 1) { - O << "P20"; - } else if (Imm == 0) { - O << "P10"; - } else { - llvm_unreachable("Invalid interpolation parameter slot"); + switch (Imm) { + case 0: + O << "p10"; + break; + case 1: + O << "p20"; + break; + case 2: + O << "p0"; + break; + default: + O << "invalid_param_" << Imm; } } Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -52,7 +52,7 @@ 0x00000000, (outs VGPR_32:$vdst), (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr), - "v_interp_p1_f32 $vdst, $i, $attr_chan, $attr, [m0]", + "v_interp_p1_f32 $vdst, $i, $attr_chan, $attr", [(set f32:$vdst, (AMDGPUinterp_p1 f32:$i, (i32 imm:$attr_chan), (i32 imm:$attr)))] >; @@ -75,7 +75,7 @@ 0x00000001, (outs VGPR_32:$vdst), (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr), - "v_interp_p2_f32 $vdst, [$src0], $j, $attr_chan, $attr, [m0]", + "v_interp_p2_f32 $vdst, $j, $attr_chan, $attr", [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$j, (i32 imm:$attr_chan), (i32 imm:$attr)))]>; @@ -85,7 +85,7 @@ 0x00000002, (outs VGPR_32:$vdst), (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr), - "v_interp_mov_f32 $vdst, $src0, $attr_chan, $attr, [m0]", + "v_interp_mov_f32 $vdst, $src0, $attr_chan, $attr", [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan), (i32 imm:$attr)))]>; Index: test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -1,11 +1,13 @@ ;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s ;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s -;GCN-LABEL: {{^}}v_interp: -;GCN-NOT: s_wqm -;GCN: s_mov_b32 m0, s{{[0-9]+}} -;GCN: v_interp_p1_f32 -;GCN: v_interp_p2_f32 +; GCN-LABEL: {{^}}v_interp: +; GCN-NOT: s_wqm +; GCN: s_mov_b32 m0, s{{[0-9]+}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}} define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) { main_body: %i = extractelement <2 x float> %4, i32 0 @@ -20,10 +22,144 @@ ret void } +; GCN-LABEL: {{^}}v_interp_p1: +; GCN: s_movk_i32 m0, 0x100 +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 0{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 0{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 0{{$}} + +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 3{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 4{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 63{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 64{{$}} +; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 64{{$}} +define amdgpu_ps void @v_interp_p1(float %i) { + %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256) + %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256) + %p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256) + %p0_3 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 0, i32 256) + %p0_4 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 0, i32 256) + %p0_5 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 1, i32 256) + %p0_6 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 2, i32 256) + %p0_7 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 3, i32 256) + %p0_8 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 4, i32 256) + %p0_9 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 63, i32 256) + %p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256) + %p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256) + + store volatile float %p0_0, float addrspace(1)* undef + store volatile float %p0_1, float addrspace(1)* undef + store volatile float %p0_2, float addrspace(1)* undef + store volatile float %p0_3, float addrspace(1)* undef + store volatile float %p0_4, float addrspace(1)* undef + store volatile float %p0_5, float addrspace(1)* undef + store volatile float %p0_6, float addrspace(1)* undef + store volatile float %p0_7, float addrspace(1)* undef + store volatile float %p0_8, float addrspace(1)* undef + store volatile float %p0_9, float addrspace(1)* undef + store volatile float %p0_10, float addrspace(1)* undef + store volatile float %p0_11, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}v_interp_p2: +; GCN: s_movk_i32 m0, 0x100 +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 3, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 0{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 63{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 64{{$}} +; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, 4, 64{{$}} +define amdgpu_ps void @v_interp_p2(float %x, float %j) { + %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256) + %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256) + %p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256) + %p2_3 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 3, i32 0, i32 256) + %p2_4 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 0, i32 256) + + %p2_5 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 1, i32 256) + %p2_6 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 63, i32 256) + %p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256) + %p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256) + + store volatile float %p2_0, float addrspace(1)* undef + store volatile float %p2_1, float addrspace(1)* undef + store volatile float %p2_2, float addrspace(1)* undef + store volatile float %p2_3, float addrspace(1)* undef + store volatile float %p2_4, float addrspace(1)* undef + store volatile float %p2_5, float addrspace(1)* undef + store volatile float %p2_6, float addrspace(1)* undef + store volatile float %p2_7, float addrspace(1)* undef + store volatile float %p2_8, float addrspace(1)* undef + ret void +} + +; GCN-LABEL: {{^}}v_interp_mov: +; GCN: s_movk_i32 m0, 0x100 +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 0, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p20, 0, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, 0, 0{{$}} + +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 2, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 3, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 4, 0{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_8, 4, 0{{$}} + +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 63{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 64{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, 1, 64{{$}} +; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, 4, 64{{$}} +define amdgpu_ps void @v_interp_mov(float %x, float %j) { + %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256) + %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256) + %mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256) + %mov_3 = call float @llvm.amdgcn.interp.mov(i32 3, i32 0, i32 0, i32 256) + + %mov_4 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 0, i32 256) + %mov_5 = call float @llvm.amdgcn.interp.mov(i32 0, i32 2, i32 0, i32 256) + %mov_6 = call float @llvm.amdgcn.interp.mov(i32 0, i32 3, i32 0, i32 256) + %mov_7 = call float @llvm.amdgcn.interp.mov(i32 0, i32 4, i32 0, i32 256) + %mov_8 = call float @llvm.amdgcn.interp.mov(i32 8, i32 4, i32 0, i32 256) + + %mov_9 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 63, i32 256) + %mov_10 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 64, i32 256) + %mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256) + %mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256) + + store volatile float %mov_0, float addrspace(1)* undef + store volatile float %mov_1, float addrspace(1)* undef + store volatile float %mov_2, float addrspace(1)* undef + store volatile float %mov_3, float addrspace(1)* undef + + store volatile float %mov_4, float addrspace(1)* undef + store volatile float %mov_5, float addrspace(1)* undef + store volatile float %mov_6, float addrspace(1)* undef + store volatile float %mov_7, float addrspace(1)* undef + store volatile float %mov_8, float addrspace(1)* undef + + store volatile float %mov_9, float addrspace(1)* undef + store volatile float %mov_10, float addrspace(1)* undef + store volatile float %mov_11, float addrspace(1)* undef + store volatile float %mov_12, float addrspace(1)* undef + ret void +} + ; SI won't merge ds memory operations, because of the signed offset bug, so ; we only have check lines for VI. ; VI-LABEL: v_interp_readnone: -; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; VI: s_mov_b32 m0, 0 +; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 +; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, 0, 0{{$}} +; VI: s_mov_b32 m0, -1{{$}} ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) { store float 0.0, float addrspace(3)* %lds Index: test/MC/Disassembler/AMDGPU/missing_op.txt =================================================================== --- test/MC/Disassembler/AMDGPU/missing_op.txt +++ test/MC/Disassembler/AMDGPU/missing_op.txt @@ -1,5 +1,5 @@ # RUN: llvm-mc -arch=amdgcn -mcpu=fiji -disassemble < %s | FileCheck %s -check-prefix=VI #TODO: this test will fail when we fix v_interp_p2_f32 signature, remove it then -#VI: v_interp_p2_f32 v7, [v7], 16, /*Missing OP3*/, /*Missing OP4*/ +#VI: v_interp_p2_f32 v7, 16, /*Missing OP3*/, /*Missing OP4*/ 0xd4 0x41 0x1d 0xd4