Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -64,6 +64,10 @@ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem] >; +def int_r600_cube : Intrinsic< + [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem] +>; + } // End TargetPrefix = "r600" let TargetPrefix = "amdgcn" in { Index: lib/Target/AMDGPU/AMDGPUIntrinsics.td =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -20,9 +20,6 @@ // Deprecated in favor of llvm.amdgcn.sffbh def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; - // Deprecated in favor of separate int_amdgcn_cube* intrinsics. - def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - // Deprecated in favor of expanded bit operations def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; Index: lib/Target/AMDGPU/R600Instructions.td =================================================================== --- lib/Target/AMDGPU/R600Instructions.td +++ lib/Target/AMDGPU/R600Instructions.td @@ -1013,7 +1013,7 @@ (outs R600_Reg128:$dst), (ins R600_Reg128:$src0), "CUBE $dst $src0", - [(set v4f32:$dst, (int_AMDGPU_cube v4f32:$src0))], + [(set v4f32:$dst, (int_r600_cube v4f32:$src0))], VecALU > { let isPseudo = 1; Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -818,27 +818,6 @@ def : POW_Common ; def : Pat < - (int_AMDGPU_cube v4f32:$src), - (REG_SEQUENCE VReg_128, - (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub0, - (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub1, - (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub2, - (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)), - 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)), - 0 /* clamp */, 0 /* omod */), sub3) ->; - -def : Pat < (i32 (sext i1:$src0)), (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src0) >; Index: test/CodeGen/AMDGPU/cube.ll =================================================================== --- test/CodeGen/AMDGPU/cube.ll +++ test/CodeGen/AMDGPU/cube.ll @@ -6,9 +6,6 @@ declare float @llvm.amdgcn.cubetc(float, float, float) #0 declare float @llvm.amdgcn.cubema(float, float, float) #0 -declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0 - - ; GCN-LABEL: {{^}}cube: ; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} @@ -29,18 +26,5 @@ ret void } -; GCN-LABEL: {{^}}legacy_cube: -; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} -; GCN: _store_dwordx4 -define void @legacy_cube(<4 x float> addrspace(1)* %out, <4 x float> %abcx) #1 { - %cube = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %abcx) - store <4 x float> %cube, <4 x float> addrspace(1)* %out - ret void -} - attributes #0 = { nounwind readnone } attributes #1 = { nounwind } - Index: test/CodeGen/AMDGPU/llvm.r600.cube.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.r600.cube.ll +++ test/CodeGen/AMDGPU/llvm.r600.cube.ll @@ -22,7 +22,7 @@ %tmp12 = insertelement <4 x float> %tmp11, float %tmp7, i32 1 %tmp13 = insertelement <4 x float> %tmp12, float %tmp10, i32 2 %tmp14 = insertelement <4 x float> %tmp13, float 1.000000e+00, i32 3 - %tmp15 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp14) + %tmp15 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp14) %tmp16 = extractelement <4 x float> %tmp15, i32 0 %tmp17 = extractelement <4 x float> %tmp15, i32 1 %tmp18 = extractelement <4 x float> %tmp15, i32 2 @@ -44,7 +44,7 @@ } ; Function Attrs: readnone -declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0 +declare <4 x float> @llvm.r600.cube(<4 x float>) #0 ; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 Index: test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll =================================================================== --- test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll +++ test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll @@ -10,7 +10,7 @@ %tmp6 = insertelement <4 x float> %tmp5, float %tmp2, i32 1 %tmp7 = insertelement <4 x float> %tmp6, float %tmp3, i32 2 %tmp8 = insertelement <4 x float> %tmp7, float %tmp4, i32 3 - %tmp9 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp8) + %tmp9 = call <4 x float> @llvm.r600.cube(<4 x float> %tmp8) %tmp10 = extractelement <4 x float> %tmp9, i32 0 %tmp11 = extractelement <4 x float> %tmp9, i32 1 %tmp12 = extractelement <4 x float> %tmp9, i32 2 @@ -45,7 +45,7 @@ } ; Function Attrs: readnone -declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0 +declare <4 x float> @llvm.r600.cube(<4 x float>) #0 ; Function Attrs: readnone declare float @fabs(float) #0 Index: test/CodeGen/AMDGPU/si-sgpr-spill.ll =================================================================== --- test/CodeGen/AMDGPU/si-sgpr-spill.ll +++ test/CodeGen/AMDGPU/si-sgpr-spill.ll @@ -586,7 +586,19 @@ %tmp449 = insertelement <4 x float> %tmp448, float %tmp445, i32 1 %tmp450 = insertelement <4 x float> %tmp449, float %tmp447, i32 2 %tmp451 = insertelement <4 x float> %tmp450, float %tmp194, i32 3 - %tmp452 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %tmp451) + + %tmp451.x = extractelement <4 x float> %tmp451, i32 0 + %tmp451.y = extractelement <4 x float> %tmp451, i32 1 + %tmp451.z = extractelement <4 x float> %tmp451, i32 2 + %cubetc = call float @llvm.amdgcn.cubetc(float %tmp451.x, float %tmp451.y, float %tmp451.z) + %cubesc = call float @llvm.amdgcn.cubesc(float %tmp451.x, float %tmp451.y, float %tmp451.z) + %cubema = call float @llvm.amdgcn.cubema(float %tmp451.x, float %tmp451.y, float %tmp451.z) + %cubeid = call float @llvm.amdgcn.cubeid(float %tmp451.x, float %tmp451.y, float %tmp451.z) + %tmp452.0 = insertelement <4 x float> undef, float %cubetc, i32 0 + %tmp452.1 = insertelement <4 x float> %tmp452.0, float %cubesc, i32 1 + %tmp452.2 = insertelement <4 x float> %tmp452.1, float %cubema, i32 2 + %tmp452 = insertelement <4 x float> %tmp452.2, float %cubeid, i32 3 + %tmp453 = extractelement <4 x float> %tmp452, i32 0 %tmp454 = extractelement <4 x float> %tmp452, i32 1 %tmp455 = extractelement <4 x float> %tmp452, i32 2 @@ -1841,9 +1853,6 @@ declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0 ; Function Attrs: nounwind readnone -declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0 - -; Function Attrs: nounwind readnone declare float @llvm.fabs.f32(float) #0 ; Function Attrs: nounwind readnone @@ -1863,6 +1872,11 @@ ; Function Attrs: nounwind readnone declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 +declare float @llvm.amdgcn.cubeid(float, float, float) #0 +declare float @llvm.amdgcn.cubesc(float, float, float) #0 +declare float @llvm.amdgcn.cubetc(float, float, float) #0 +declare float @llvm.amdgcn.cubema(float, float, float) #0 + attributes #0 = { nounwind readnone } attributes #1 = { nounwind }