diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -99,6 +99,10 @@ BUILTIN(__builtin_arm_tcancel, "vWUIi", "n") BUILTIN(__builtin_arm_ttest, "WUi", "nc") +// Armv8.5-A Random number generation intrinsics +BUILTIN(__builtin_arm_rndr, "iWUi*", "n") +BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n") + // Armv8.7-A load/store 64-byte intrinsics BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n") BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n") diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -38,6 +38,7 @@ bool HasTME; bool HasPAuth; bool HasLS64; + bool HasRandGen; bool HasMatMul; bool HasSVE2; bool HasSVE2AES; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -360,6 +360,9 @@ if (HasLS64) Builder.defineMacro("__ARM_FEATURE_LS64", "1"); + if (HasRandGen) + Builder.defineMacro("__ARM_FEATURE_RNG", "1"); + switch (ArchKind) { default: break; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9197,6 +9197,23 @@ } } + if (BuiltinID == AArch64::BI__builtin_arm_rndr || + BuiltinID == AArch64::BI__builtin_arm_rndrrs) { + + auto Intr = + (BuiltinID == AArch64::BI__builtin_arm_rndr ? Intrinsic::aarch64_rndr + : Intrinsic::aarch64_rndrrs); + Function *F = CGM.getIntrinsic(Intr); + llvm::Value *Val = Builder.CreateCall(F); + Value *RandomValue = Builder.CreateExtractValue(Val, 0); + Value *Status = Builder.CreateExtractValue(Val, 1); + + Address MemAddress = EmitPointerWithAlignment(E->getArg(0)); + Builder.CreateStore(RandomValue, MemAddress); + Status = Builder.CreateZExt(Status, Int32Ty); + return Status; + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -709,6 +709,18 @@ #endif /* __ARM_FEATURE_TME */ +/* Armv8.5-A Random number generation intrinsics */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__rndr(uint64_t *__p) { + return __builtin_arm_rndr(__p); +} +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__rndrrs(uint64_t *__p) { + return __builtin_arm_rndrrs(__p); +} +#endif + #if defined(__cplusplus) } #endif diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c --- a/clang/test/CodeGen/arm_acle.c +++ b/clang/test/CodeGen/arm_acle.c @@ -903,3 +903,31 @@ return __jcvt(v); } #endif + +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) + +// AArch64-LABEL: @test_rndr( +// AArch64-NEXT: entry: +// AArch64-NEXT: [[TMP0:%.*]] = tail call { i64, i1 } @llvm.aarch64.rndr() [[ATTR6]] +// AArch64-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// AArch64-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// AArch64-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// AArch64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// AArch64-NEXT: ret i32 [[TMP3]] +int test_rndr(uint64_t *__addr) { + return __rndr(__addr); +} + +// AArch64-LABEL: @test_rndrrs( +// AArch64-NEXT: entry: +// AArch64-NEXT: [[TMP0:%.*]] = tail call { i64, i1 } @llvm.aarch64.rndrrs() [[ATTR6]] +// AArch64-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// AArch64-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// AArch64-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// AArch64-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// AArch64-NEXT: ret i32 [[TMP3]] +// +int test_rndrrs(uint64_t *__addr) { + return __rndrrs(__addr); +} +#endif diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c --- a/clang/test/CodeGen/builtins-arm64.c +++ b/clang/test/CodeGen/builtins-arm64.c @@ -124,4 +124,30 @@ return __builtin_arm_cls64(v); } +// CHECK-LABEL: @rndr( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndr() +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int rndr(uint64_t *__addr) { + return __builtin_arm_rndr(__addr); +} + +// CHECK-LABEL: @rndrrs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndrrs() +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int rndrrs(uint64_t *__addr) { + return __builtin_arm_rndrrs(__addr); +} + // CHECK: ![[M0]] = !{!"1:2:3:4:5"} diff --git a/clang/test/Preprocessor/init-aarch64.c b/clang/test/Preprocessor/init-aarch64.c --- a/clang/test/Preprocessor/init-aarch64.c +++ b/clang/test/Preprocessor/init-aarch64.c @@ -25,6 +25,7 @@ // AARCH64-NEXT: #define __ARM_FEATURE_IDIV 1 // AARCH64-NEXT: #define __ARM_FEATURE_LDREX 0xF // AARCH64-NEXT: #define __ARM_FEATURE_NUMERIC_MAXMIN 1 +// AARCH64-NEXT: #define __ARM_FEATURE_RNG 1 // AARCH64-NEXT: #define __ARM_FEATURE_UNALIGNED 1 // AARCH64-NEXT: #define __ARM_FP 0xE // AARCH64-NEXT: #define __ARM_FP16_ARGS 1 diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -683,12 +683,18 @@ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; class FPCR_Set_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; + class RNDR_Intrinsic + : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; } // FPCR def int_aarch64_get_fpcr : FPCR_Get_Intrinsic; def int_aarch64_set_fpcr : FPCR_Set_Intrinsic; +// Armv8.5-A Random number generation intrinsics +def int_aarch64_rndr : RNDR_Intrinsic; +def int_aarch64_rndrrs : RNDR_Intrinsic; + let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -283,6 +283,8 @@ /// mode without emitting such REV instructions. NVCAST, + MRS, // MRS, also sets the flags via a glue. + SMULL, UMULL, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1893,6 +1893,7 @@ MAKE_CASE(AArch64ISD::SITOF) MAKE_CASE(AArch64ISD::UITOF) MAKE_CASE(AArch64ISD::NVCAST) + MAKE_CASE(AArch64ISD::MRS) MAKE_CASE(AArch64ISD::SQSHL_I) MAKE_CASE(AArch64ISD::UQSHL_I) MAKE_CASE(AArch64ISD::SRSHR_I) @@ -15975,6 +15976,24 @@ LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL); return DAG.getMergeValues({Result, Chain}, DL); } + case Intrinsic::aarch64_rndr: + case Intrinsic::aarch64_rndrrs: { + unsigned IntrinsicID = + cast(N->getOperand(1))->getZExtValue(); + auto Register = + (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR + : AArch64SysReg::RNDRRS); + SDLoc dl(N); + SDValue A = DAG.getNode( + AArch64ISD::MRS, dl, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other), + N->getOperand(0), DAG.getConstant(Register, dl, MVT::i64)); + SDValue B = DAG.getNode( + AArch64ISD::CSINC, dl, MVT::i32, DAG.getConstant(0, dl, MVT::i32), + DAG.getConstant(0, dl, MVT::i32), + DAG.getConstant(AArch64CC::EQ, dl, MVT::i32), A.getValue(1)); + return DAG.getMergeValues( + {A, DAG.getZExtOrTrunc(B, dl, MVT::i1), A.getValue(2)}, dl); + } default: break; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1492,6 +1492,7 @@ bits<16> systemreg; let Inst{20-5} = systemreg; let DecoderNamespace = "Fallback"; + let Defs = [NZCV]; } // FIXME: Some of these def NZCV, others don't. Best way to model that? diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1266,6 +1266,13 @@ def MSRpstateImm1 : MSRpstateImm0_1; def MSRpstateImm4 : MSRpstateImm0_15; +// MRS from CodeGen. +def AArch64mrs : SDNode<"AArch64ISD::MRS", + SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue]>; +def : Pat<(AArch64mrs imm:$id), + (MRS imm:$id)>; + // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), diff --git a/llvm/test/CodeGen/AArch64/rand.ll b/llvm/test/CodeGen/AArch64/rand.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/rand.ll @@ -0,0 +1,37 @@ +; RUN: llc -mtriple=aarch64 -mcpu=neoverse-v1 -mattr=+rand %s -o - | FileCheck %s + +define i32 @rndr(i64* %__addr) { +; CHECK-LABEL: rndr: +; CHECK: // %bb.0: +; CHECK-NEXT: mrs x8, RNDR +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: and w0, w9, #0x1 +; CHECK-NEXT: ret + %1 = tail call { i64, i1 } @llvm.aarch64.rndr() + %2 = extractvalue { i64, i1 } %1, 0 + %3 = extractvalue { i64, i1 } %1, 1 + store i64 %2, i64* %__addr, align 8 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + + +define i32 @rndrrs(i64* %__addr) { +; CHECK-LABEL: rndrrs: +; CHECK: // %bb.0: +; CHECK-NEXT: mrs x8, RNDRRS +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: str x8, [x0] +; CHECK-NEXT: and w0, w9, #0x1 +; CHECK-NEXT: ret + %1 = tail call { i64, i1 } @llvm.aarch64.rndrrs() + %2 = extractvalue { i64, i1 } %1, 0 + %3 = extractvalue { i64, i1 } %1, 1 + store i64 %2, i64* %__addr, align 8 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +declare { i64, i1 } @llvm.aarch64.rndr() +declare { i64, i1 } @llvm.aarch64.rndrrs() diff --git a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir --- a/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ b/llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -100,11 +100,11 @@ bb.0: liveins: $x0 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv renamable $x8 = MOVZXi 15309, 0 renamable $x8 = MOVKXi renamable $x8, 26239, 16 STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 8) RET undef $lr @@ -134,9 +134,9 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr @@ -166,9 +166,9 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr @@ -275,10 +275,10 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv renamable $w8 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x8 STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr