diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1535,6 +1535,16 @@ [IntrNoMem, IntrSpeculatable, IntrWillReturn] >; +def int_amdgcn_mulhi_i24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + +def int_amdgcn_mulhi_u24 : Intrinsic<[llvm_i32_ty], + [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn] +>; + // llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id) // // bar_val is the total number of waves that will wait on this diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -279,10 +279,10 @@ [SDNPCommutative, SDNPAssociative] >; -def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, +def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; -def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, +def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; @@ -435,6 +435,14 @@ [(int_amdgcn_mul_i24 node:$src0, node:$src1), (AMDGPUmul_i24_impl node:$src0, node:$src1)]>; +def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mulhi_u24 node:$src0, node:$src1), + (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>; + +def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1), + [(int_amdgcn_mulhi_i24 node:$src0, node:$src1), + (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>; + def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), [(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2), (AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -492,9 +492,9 @@ defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>; defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, any_fmul>; defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32_ARITH, AMDGPUmul_i24>; -defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_PAT_GEN, AMDGPUmulhi_i24>; +defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>; defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32_ARITH, AMDGPUmul_u24>; -defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_PAT_GEN, AMDGPUmulhi_u24>; +defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>; defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum_like>; defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum_like>; defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_PAT_GEN, smin>; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define i32 @basic(i32 %a, i32 %b) { +; CHECK-LABEL: basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mul_hi_i32_i24_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b) + ret i32 %mul +} + +declare i32 @llvm.amdgcn.mulhi.i24(i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s + +define i32 @basic(i32 %a, i32 %b) { +; CHECK-LABEL: basic: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_mul_hi_u32_u24_e32 v0, v0, v1 +; CHECK-NEXT: s_setpc_b64 s[30:31] + %mul = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b) + ret i32 %mul +} + +declare i32 @llvm.amdgcn.mulhi.u24(i32, i32)