diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -2473,7 +2473,7 @@ // SW version of rotate 64 def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), - (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_64 node:$amt))>, Requires<[noHWROT32]>; def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, diff --git a/llvm/test/CodeGen/NVPTX/rotate_64.ll b/llvm/test/CodeGen/NVPTX/rotate_64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/rotate_64.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx | FileCheck %s + + +declare i64 @llvm.nvvm.rotate.b64(i64, i32) +declare i64 @llvm.nvvm.rotate.right.b64(i64, i32) + +; CHECK: rotate64 +define i64 @rotate64(i64 %a, i32 %b) { +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 3; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 61; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret + %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 3) + ret i64 %val +} + +; CHECK: rotateright64 +define i64 @rotateright64(i64 %a, i32 %b) { +; CHECK: shl.b64 [[LHS:%.*]], [[RD1:%.*]], 61; +; CHECK: shr.b64 [[RHS:%.*]], [[RD1]], 3; +; CHECK: add.u64 [[RD2:%.*]], [[LHS]], [[RHS]]; +; CHECK: ret + %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 3) + ret i64 %val +}