diff --git a/clang/include/clang/Basic/BuiltinsNVPTX.def b/clang/include/clang/Basic/BuiltinsNVPTX.def --- a/clang/include/clang/Basic/BuiltinsNVPTX.def +++ b/clang/include/clang/Basic/BuiltinsNVPTX.def @@ -473,11 +473,11 @@ TARGET_BUILTIN(__nvvm_vote_ballot_sync, "UiUib", "", PTX60) // Match -TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", PTX60) -TARGET_BUILTIN(__nvvm_match_any_sync_i64, "WiUiWi", "", PTX60) +TARGET_BUILTIN(__nvvm_match_any_sync_i32, "UiUiUi", "", AND(SM_70,PTX60)) +TARGET_BUILTIN(__nvvm_match_any_sync_i64, "UiUiWi", "", AND(SM_70,PTX60)) // These return a pair {value, predicate}, which requires custom lowering. -TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", PTX60) -TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "WiUiWii*", "", PTX60) +TARGET_BUILTIN(__nvvm_match_all_sync_i32p, "UiUiUii*", "", AND(SM_70,PTX60)) +TARGET_BUILTIN(__nvvm_match_all_sync_i64p, "UiUiWii*", "", AND(SM_70,PTX60)) // Redux TARGET_BUILTIN(__nvvm_redux_sync_add, "iii", "", AND(SM_80,PTX70)) diff --git a/clang/lib/Headers/__clang_cuda_intrinsics.h b/clang/lib/Headers/__clang_cuda_intrinsics.h --- a/clang/lib/Headers/__clang_cuda_intrinsics.h +++ b/clang/lib/Headers/__clang_cuda_intrinsics.h @@ -234,7 +234,7 @@ return __nvvm_match_any_sync_i32(mask, value); } -inline __device__ unsigned long long +inline __device__ unsigned int __match64_any_sync(unsigned int mask, unsigned long long value) { return __nvvm_match_any_sync_i64(mask, value); } @@ -244,7 +244,7 @@ return __nvvm_match_all_sync_i32p(mask, value, pred); } -inline __device__ unsigned long long +inline __device__ unsigned int __match64_all_sync(unsigned int mask, unsigned long long value, int *pred) { return __nvvm_match_all_sync_i64p(mask, value, pred); } diff --git a/clang/test/CodeGen/builtins-nvptx-ptx60.cu b/clang/test/CodeGen/builtins-nvptx-ptx60.cu --- a/clang/test/CodeGen/builtins-nvptx-ptx60.cu +++ b/clang/test/CodeGen/builtins-nvptx-ptx60.cu @@ -91,13 +91,13 @@ // CHECK: call i32 @llvm.nvvm.match.any.sync.i32(i32 // expected-error@+1 {{'__nvvm_match_any_sync_i32' needs target feature ptx60}} __nvvm_match_any_sync_i32(mask, i); - // CHECK: call i64 @llvm.nvvm.match.any.sync.i64(i32 + // CHECK: call i32 @llvm.nvvm.match.any.sync.i64(i32 // expected-error@+1 {{'__nvvm_match_any_sync_i64' needs target feature ptx60}} __nvvm_match_any_sync_i64(mask, i64); // CHECK: call { i32, i1 } @llvm.nvvm.match.all.sync.i32p(i32 // expected-error@+1 {{'__nvvm_match_all_sync_i32p' needs target feature ptx60}} __nvvm_match_all_sync_i32p(mask, i, &i); - // CHECK: call { i64, i1 } @llvm.nvvm.match.all.sync.i64p(i32 + // CHECK: call { i32, i1 } @llvm.nvvm.match.all.sync.i64p(i32 // expected-error@+1 {{'__nvvm_match_all_sync_i64p' needs target feature ptx60}} __nvvm_match_all_sync_i64p(mask, i64, &i); diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -4499,7 +4499,7 @@ GCCBuiltin<"__nvvm_match_any_sync_i32">; // match.any.sync.b64 mask, value def int_nvvm_match_any_sync_i64 : - Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.any.sync.i64">, GCCBuiltin<"__nvvm_match_any_sync_i64">; @@ -4513,7 +4513,7 @@ [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i32p">; // match.all.sync.b64p mask, value def int_nvvm_match_all_sync_i64p : - Intrinsic<[llvm_i64_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], + Intrinsic<[llvm_i32_ty, llvm_i1_ty], [llvm_i32_ty, llvm_i64_ty], [IntrInaccessibleMemOnly, IntrConvergent], "llvm.nvvm.match.all.sync.i64p">; // diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td --- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -223,21 +223,21 @@ multiclass MATCH_ANY_SYNC { - def ii : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, ImmOp:$value), + def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, ImmOp:$value), + def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, regclass:$value), + def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, regclass:$value), + def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", - [(set regclass:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } @@ -248,25 +248,25 @@ multiclass MATCH_ALLP_SYNC { - def ii : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ir : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, ImmOp:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>, Requires<[hasPTX60, hasSM70]>; - def ri : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; - def rr : NVPTXInst<(outs regclass:$dest, Int1Regs:$pred), + def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask, regclass:$value), "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", - [(set regclass:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, + [(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>, Requires<[hasPTX60, hasSM70]>; } defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC