Index: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td @@ -479,7 +479,7 @@ def int_amdgcn_interp_p1 : GCCBuiltin<"__builtin_amdgcn_interp_p1">, Intrinsic<[llvm_float_ty], - [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // This intrinsic reads from lds, but the memory // values are constant, so it behaves like IntrNoMem. @@ -487,7 +487,7 @@ def int_amdgcn_interp_p2 : GCCBuiltin<"__builtin_amdgcn_interp_p2">, Intrinsic<[llvm_float_ty], - [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; // See int_amdgcn_v_interp_p1 for why this is // IntrNoMem. Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2476,6 +2476,8 @@ DAG.getConstant(0, DL, MVT::i32)); SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ, DAG.getConstant(1, DL, MVT::i32)); + I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I); + J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J); SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3)); SDValue Glue = M0.getValue(1); SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL, Index: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td +++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td @@ -52,7 +52,7 @@ (outs VGPR_32:$dst), (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr), "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]", - [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan), + [(set f32:$dst, (AMDGPUinterp_p1 f32:$i, (i32 imm:$attr_chan), (i32 imm:$attr)))] >; @@ -75,7 +75,7 @@ (outs VGPR_32:$dst), (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr), "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]", - [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan), + [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, f32:$j, (i32 imm:$attr_chan), (i32 imm:$attr)))]>; } // End DisableEncoding = "$src0", Constraints = "$src0 = $dst" Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -6,23 +6,23 @@ ;GCN: s_mov_b32 m0, s{{[0-9]+}} ;GCN: v_interp_p1_f32 ;GCN: v_interp_p2_f32 -define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x i32>) { +define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) { main_body: - %i = extractelement <2 x i32> %4, i32 0 - %j = extractelement <2 x i32> %4, i32 1 - %p0_0 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 0, i32 0, i32 %3) - %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, i32 %j, i32 0, i32 0, i32 %3) - %p0_1 = call float @llvm.amdgcn.interp.p1(i32 %i, i32 1, i32 0, i32 %3) - %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, i32 %j, i32 1, i32 0, i32 %3) + %i = extractelement <2 x float> %4, i32 0 + %j = extractelement <2 x float> %4, i32 1 + %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3) + %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3) + %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3) + %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %p1_1) ret void } ; Function Attrs: nounwind readnone -declare float @llvm.amdgcn.interp.p1(i32, i32, i32, i32) #0 +declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 ; Function Attrs: nounwind readnone -declare float @llvm.amdgcn.interp.p2(float, i32, i32, i32, i32) #0 +declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)