Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -246,4 +246,12 @@ GCCBuiltin<"__builtin_amdgcn_s_dcache_wb_vol">, Intrinsic<[], [], []>; +// llvm.amdgcn.ds.permute +def int_amdgcn_ds_permute : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + +// llvm.amdgcn.ds.bpermute +def int_amdgcn_ds_bpermute : + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>; + } Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -227,6 +227,10 @@ const MachineOperand *Offset1Imm = getNamedOperand(*LdSt, AMDGPU::OpName::offset1); + // DS_PERMUTE has no offset0 and offset1. + if (!Offset0Imm || !Offset1Imm) + return false; + uint8_t Offset0 = Offset0Imm->getImm(); uint8_t Offset1 = Offset1Imm->getImm(); Index: lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.td +++ lib/Target/AMDGPU/SIInstrInfo.td @@ -2342,6 +2342,20 @@ } } +multiclass DS_1A1D_PERMUTE op, string opName, RegisterClass rc, + dag outs = (outs rc:$vdst), + dag ins = (ins VGPR_32:$addr, rc:$data0), + string asm = opName#" $vdst, $addr, $data0"> { + + let mayLoad = 0, mayStore = 0, isConvergent = 1 in { + def "" : DS_Pseudo ; + + let data1 = 0, offset0 = 0, offset1 = 0, gds = 0 in { + def "_vi" : DS_Real_vi ; + } + } +} + multiclass DS_1A2D_RET_m op, string opName, RegisterClass rc, string noRetOp = "", dag ins, dag outs = (outs rc:$vdst), Index: lib/Target/AMDGPU/VIInstructions.td =================================================================== --- lib/Target/AMDGPU/VIInstructions.td +++ lib/Target/AMDGPU/VIInstructions.td @@ -124,4 +124,23 @@ (as_i32imm $bank_mask), (as_i32imm $row_mask)) >; +//===----------------------------------------------------------------------===// +// DS_PERMUTE/DS_BPERMUTE Instructions. +//===----------------------------------------------------------------------===// + +let Uses = [EXEC] in { +defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE < 0x3e, "ds_permute_b32", VGPR_32>; +defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE < 0x3f, "ds_bpermute_b32", VGPR_32>; +} + +def : Pat < + (int_amdgcn_ds_permute i32:$addr, i32:$data0), + (DS_PERMUTE_B32 $addr, $data0) +>; + +def : Pat < + (int_amdgcn_ds_bpermute i32:$addr, i32:$data0), + (DS_BPERMUTE_B32 $addr, $data0) +>; + } // End Predicates = [isVI] Index: test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -0,0 +1,23 @@ +; RUN: llc -march=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s + +declare i32 @llvm.amdgcn.ds.permute(i32, i32) convergent +declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) convergent + +; FUNC-LABEL: {{^}}ds_permute: +; CHECK: ds_permute +define void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { + %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) convergent + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + + +; FUNC-LABEL: {{^}}ds_bpermute: +; CHECK: ds_bpermute +define void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { + %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) convergent + store i32 %bpermute, i32 addrspace(1)* %out, align 4 + ret void +} + +