Index: clang/include/clang/Basic/BuiltinsAArch64.def =================================================================== --- clang/include/clang/Basic/BuiltinsAArch64.def +++ clang/include/clang/Basic/BuiltinsAArch64.def @@ -109,6 +109,10 @@ BUILTIN(__builtin_arm_frint64xf, "ff", "") BUILTIN(__builtin_arm_frint64x, "dd", "") +// Armv8.5-A Random number generation intrinsics +BUILTIN(__builtin_arm_rndr, "iWUi*", "n") +BUILTIN(__builtin_arm_rndrrs, "iWUi*", "n") + // Armv8.7-A load/store 64-byte intrinsics BUILTIN(__builtin_arm_ld64b, "vvC*WUi*", "n") BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n") Index: clang/lib/Basic/Targets/AArch64.h =================================================================== --- clang/lib/Basic/Targets/AArch64.h +++ clang/lib/Basic/Targets/AArch64.h @@ -38,6 +38,7 @@ bool HasTME; bool HasPAuth; bool HasLS64; + bool HasRandGen; bool HasMatMul; bool HasSVE2; bool HasSVE2AES; Index: clang/lib/Basic/Targets/AArch64.cpp =================================================================== --- clang/lib/Basic/Targets/AArch64.cpp +++ clang/lib/Basic/Targets/AArch64.cpp @@ -360,6 +360,9 @@ if (HasLS64) Builder.defineMacro("__ARM_FEATURE_LS64", "1"); + if (HasRandGen) + Builder.defineMacro("__ARM_FEATURE_RNG", "1"); + switch (ArchKind) { default: break; @@ -425,6 +428,7 @@ HasMTE = false; HasTME = false; HasLS64 = false; + HasRandGen = false; HasMatMul = false; HasBFloat16 = false; HasSVE2 = false; @@ -524,6 +528,8 @@ HasLSE = true; if (Feature == "+ls64") HasLS64 = true; + if (Feature == "+rand") + HasRandGen = true; if (Feature == "+flagm") HasFlagM = true; } Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -9232,6 +9232,23 @@ } } + if (BuiltinID == AArch64::BI__builtin_arm_rndr || + BuiltinID == AArch64::BI__builtin_arm_rndrrs) { + + auto Intr = (BuiltinID == AArch64::BI__builtin_arm_rndr + ? Intrinsic::aarch64_rndr + : Intrinsic::aarch64_rndrrs); + Function *F = CGM.getIntrinsic(Intr); + llvm::Value *Val = Builder.CreateCall(F); + Value *RandomValue = Builder.CreateExtractValue(Val, 0); + Value *Status = Builder.CreateExtractValue(Val, 1); + + Address MemAddress = EmitPointerWithAlignment(E->getArg(0)); + Builder.CreateStore(RandomValue, MemAddress); + Status = Builder.CreateZExt(Status, Int32Ty); + return Status; + } + if (BuiltinID == AArch64::BI__clear_cache) { assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments"); const FunctionDecl *FD = E->getDirectCallee(); Index: clang/lib/Headers/arm_acle.h =================================================================== --- clang/lib/Headers/arm_acle.h +++ clang/lib/Headers/arm_acle.h @@ -752,6 +752,18 @@ #endif /* __ARM_FEATURE_TME */ +/* Armv8.5-A Random number generation intrinsics */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__rndr(uint64_t *__p) { + return __builtin_arm_rndr(__p); +} +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__rndrrs(uint64_t *__p) { + return __builtin_arm_rndrrs(__p); +} +#endif + #if defined(__cplusplus) } #endif Index: clang/test/CodeGen/arm_acle.c =================================================================== --- clang/test/CodeGen/arm_acle.c +++ clang/test/CodeGen/arm_acle.c @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -ffreestanding -triple armv8a-none-eabi -target-feature +crc -target-feature +dsp -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch32 // RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature +neon -target-feature +crc -target-feature +crypto -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64 // RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature +v8.3a -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483 -// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature +v8.5a -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483 +// RUN: %clang_cc1 -ffreestanding -triple aarch64-none-eabi -target-feature +v8.5a -target-feature +rand -O0 -disable-O0-optnone -fexperimental-new-pass-manager -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -check-prefixes=ARM,AArch64,AArch6483,AArch6485 #include @@ -1756,6 +1756,36 @@ } #endif + +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_RNG) + +// AArch6485-LABEL: @test_rndr( +// AArch6485-NEXT: entry: +// AArch6485-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndr() [[ATTR3:#.*]] +// AArch6485-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// AArch6485-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// AArch6485-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// AArch6485-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// AArch6485-NEXT: ret i32 [[TMP3]] +// +int test_rndr(uint64_t *__addr) { + return __rndr(__addr); +} + +// AArch6485-LABEL: @test_rndrrs( +// AArch6485-NEXT: entry: +// AArch6485-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndrrs() [[ATTR3:#.*]] +// AArch6485-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// AArch6485-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// AArch6485-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// AArch6485-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// AArch6485-NEXT: ret i32 [[TMP3]] +// +int test_rndrrs(uint64_t *__addr) { + return __rndrrs(__addr); +} +#endif + // AArch32: !5 = !{!"cp1:2:c3:c4:5"} // AArch32: !6 = !{!"cp1:2:c3"} // AArch32: !7 = !{!"sysreg"} Index: clang/test/CodeGen/builtins-arm64.c =================================================================== --- clang/test/CodeGen/builtins-arm64.c +++ clang/test/CodeGen/builtins-arm64.c @@ -124,4 +124,30 @@ return __builtin_arm_cls64(v); } +// CHECK-LABEL: @rndr( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndr() +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int rndr(uint64_t *__addr) { + return __builtin_arm_rndr(__addr); +} + +// CHECK-LABEL: @rndrrs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.aarch64.rndrrs() +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1 +// CHECK-NEXT: store i64 [[TMP1]], i64* [[__ADDR:%.*]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i32 +// CHECK-NEXT: ret i32 [[TMP3]] +// +int rndrrs(uint64_t *__addr) { + return __builtin_arm_rndrrs(__addr); +} + // CHECK: ![[M0]] = !{!"1:2:3:4:5"} Index: clang/test/Preprocessor/aarch64-target-features.c =================================================================== --- clang/test/Preprocessor/aarch64-target-features.c +++ clang/test/Preprocessor/aarch64-target-features.c @@ -441,6 +441,12 @@ // CHECK-BTI-OFF-NOT: __ARM_FEATURE_BTI_DEFAULT // CHECK-BTI: #define __ARM_FEATURE_BTI_DEFAULT 1 +// ================== Check Armv8.5-A random number generation extension. +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.5-a+rng -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-RNG %s +// RUN: %clang -target aarch64-arm-none-eabi -march=armv8.5-a x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-NO-RNG %s +// CHECK-RNG: __ARM_FEATURE_RNG 1 +// CHECK-NO-RNG-NOT: __ARM_FEATURE_RNG 1 + // ================== Check BFloat16 Extensions. // RUN: %clang -target aarch64-arm-none-eabi -march=armv8.6-a+bf16 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-BFLOAT %s // CHECK-BFLOAT: __ARM_BF16_FORMAT_ALTERNATIVE 1 Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -696,12 +696,18 @@ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; class FPCR_Set_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; + class RNDR_Intrinsic + : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; } // FPCR def int_aarch64_get_fpcr : FPCR_Get_Intrinsic; def int_aarch64_set_fpcr : FPCR_Set_Intrinsic; +// Armv8.5-A Random number generation intrinsics +def int_aarch64_rndr : RNDR_Intrinsic; +def int_aarch64_rndrrs : RNDR_Intrinsic; + let TargetPrefix = "aarch64" in { class Crypto_AES_DataKey_Intrinsic : DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -285,6 +285,8 @@ /// mode without emitting such REV instructions. NVCAST, + MRS, // MRS, also sets the flags via a glue. + SMULL, UMULL, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1899,6 +1899,7 @@ MAKE_CASE(AArch64ISD::SITOF) MAKE_CASE(AArch64ISD::UITOF) MAKE_CASE(AArch64ISD::NVCAST) + MAKE_CASE(AArch64ISD::MRS) MAKE_CASE(AArch64ISD::SQSHL_I) MAKE_CASE(AArch64ISD::UQSHL_I) MAKE_CASE(AArch64ISD::SRSHR_I) @@ -16001,6 +16002,24 @@ LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL); return DAG.getMergeValues({Result, Chain}, DL); } + case Intrinsic::aarch64_rndr: + case Intrinsic::aarch64_rndrrs: { + unsigned IntrinsicID = + cast(N->getOperand(1))->getZExtValue(); + auto Register = + (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR + : AArch64SysReg::RNDRRS); + SDLoc DL(N); + SDValue A = DAG.getNode( + AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other), + N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64)); + SDValue B = DAG.getNode( + AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(0, DL, MVT::i32), + DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1)); + return DAG.getMergeValues( + {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); + } default: break; } Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1492,6 +1492,11 @@ bits<16> systemreg; let Inst{20-5} = systemreg; let DecoderNamespace = "Fallback"; + // The MRS is set as a NZCV setting instruction. Not all MRS instructions + // require doing this. The alternative was to explicitly model each one, but + // it feels like it is unnecessary because it seems there are no negative + // consequences setting these flags for all. + let Defs = [NZCV]; } // FIXME: Some of these def NZCV, others don't. Best way to model that? Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -607,7 +607,9 @@ def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>; - +def AArch64mrs : SDNode<"AArch64ISD::MRS", + SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>, + [SDNPHasChain, SDNPOutGlue]>; //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -1266,6 +1268,9 @@ def MSRpstateImm1 : MSRpstateImm0_1; def MSRpstateImm4 : MSRpstateImm0_15; +def : Pat<(AArch64mrs imm:$id), + (MRS imm:$id)>; + // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), Index: llvm/test/CodeGen/AArch64/rand.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/rand.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+v8.5a,+rand %s -o - | FileCheck %s + +define i32 @rndr(i64* %__addr) { +; CHECK-LABEL: rndr: +; CHECK: // %bb.0: +; CHECK-NEXT: mrs x9, RNDR +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: str x9, [x0] +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %1 = tail call { i64, i1 } @llvm.aarch64.rndr() + %2 = extractvalue { i64, i1 } %1, 0 + %3 = extractvalue { i64, i1 } %1, 1 + store i64 %2, i64* %__addr, align 8 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + + +define i32 @rndrrs(i64* %__addr) { +; CHECK-LABEL: rndrrs: +; CHECK: // %bb.0: +; CHECK-NEXT: mrs x9, RNDRRS +; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: str x9, [x0] +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: ret + %1 = tail call { i64, i1 } @llvm.aarch64.rndrrs() + %2 = extractvalue { i64, i1 } %1, 0 + %3 = extractvalue { i64, i1 } %1, 1 + store i64 %2, i64* %__addr, align 8 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + +declare { i64, i1 } @llvm.aarch64.rndr() +declare { i64, i1 } @llvm.aarch64.rndrrs() Index: llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir =================================================================== --- llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir +++ llvm/test/CodeGen/AArch64/stp-opt-with-renaming.mir @@ -100,11 +100,11 @@ bb.0: liveins: $x0 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv renamable $x8 = MOVZXi 15309, 0 renamable $x8 = MOVKXi renamable $x8, 26239, 16 STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 8) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 8) RET undef $lr @@ -134,9 +134,9 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv STRXui renamable $x8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRXui killed renamable $x8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr @@ -166,9 +166,9 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr @@ -275,10 +275,10 @@ bb.0: liveins: $x0, $x1 - renamable $x8 = MRS 58880 + renamable $x8 = MRS 58880, implicit-def $nzcv renamable $w8 = ORRWrs $wzr, killed renamable $w8, 0, implicit-def $x8 STRWui renamable $w8, renamable $x0, 0, implicit killed $x8 :: (store 4) - renamable $x8 = MRS 55840 + renamable $x8 = MRS 55840, implicit-def $nzcv STRWui killed renamable $w8, renamable killed $x0, 1, implicit killed $x8 :: (store 4) RET undef $lr