Index: clang/include/clang/Basic/arm_neon.td =================================================================== --- clang/include/clang/Basic/arm_neon.td +++ clang/include/clang/Basic/arm_neon.td @@ -1134,6 +1134,19 @@ def SHA256H : SInst<"vsha256h", "....", "QUi">; def SHA256H2 : SInst<"vsha256h2", "....", "QUi">; def SHA256SU1 : SInst<"vsha256su1", "....", "QUi">; + +def BCAX : SInst<"vbcax", "....", "QUcQUsQUiQUlQsQcQiQl">; +def EOR3 : SInst<"veor3", "....", "QUcQUsQUiQUlQsQcQiQl">; +def RAX1 : SInst<"vrax1", "...", "QUl">; + +let isVXAR = 1 in { +def XAR : SInst<"vxar", "...I", "QUl">; +} + +def SHA512SU0 : SInst<"vsha512su0", "...", "QUl">; +def SHA512su1 : SInst<"vsha512su1", "....", "QUl">; +def SHA512H : SInst<"vsha512h", "....", "QUl">; +def SHA512H2 : SInst<"vsha512h2", "....", "QUl">; } //////////////////////////////////////////////////////////////////////////////// Index: clang/include/clang/Basic/arm_neon_incl.td =================================================================== --- clang/include/clang/Basic/arm_neon_incl.td +++ clang/include/clang/Basic/arm_neon_incl.td @@ -272,6 +272,7 @@ bit isScalarShift = 0; bit isScalarNarrowShift = 0; bit isVCVT_N = 0; + bit isVXAR = 0; // For immediate checks: the immediate will be assumed to specify the lane of // a Q register. Only used for intrinsics which end up calling polymorphic // builtins. Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -5643,6 +5643,7 @@ NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0), NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0), NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0), + NEONMAP2(vbcaxq_v, aarch64_crypto_bcaxu, aarch64_crypto_bcaxs, Add1ArgType | UnsignedAlts), NEONMAP1(vbfdot_v, aarch64_neon_bfdot, 0), NEONMAP1(vbfdotq_v, aarch64_neon_bfdot, 0), NEONMAP1(vbfmlalbq_v, aarch64_neon_bfmlalb, 0), @@ -5712,6 +5713,7 @@ NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType), NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0), NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0), + NEONMAP2(veor3q_v, aarch64_crypto_eor3u, aarch64_crypto_eor3s, Add1ArgType | UnsignedAlts), NEONMAP0(vext_v), NEONMAP0(vextq_v), NEONMAP0(vfma_v), @@ -5777,6 +5779,7 @@ NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts), NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType), + NEONMAP1(vrax1q_v, aarch64_crypto_rax1, 0), NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0), NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType), @@ -5800,6 +5803,10 @@ NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0), NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0), NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0), + NEONMAP1(vsha512h2q_v, aarch64_crypto_sha512h2, 0), + NEONMAP1(vsha512hq_v, aarch64_crypto_sha512h, 0), + NEONMAP1(vsha512su0q_v, aarch64_crypto_sha512su0, 0), + NEONMAP1(vsha512su1q_v, aarch64_crypto_sha512su1, 0), NEONMAP0(vshl_n_v), NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts), NEONMAP0(vshll_n_v), @@ -5820,6 +5827,7 @@ NEONMAP1(vusdot_v, aarch64_neon_usdot, 0), NEONMAP1(vusdotq_v, aarch64_neon_usdot, 0), NEONMAP1(vusmmlaq_v, aarch64_neon_usmmla, 0), + NEONMAP1(vxarq_v, aarch64_crypto_xar, 0), }; static const ARMVectorIntrinsicInfo AArch64SISDIntrinsicMap[] = { @@ -6638,6 +6646,13 @@ case NEON::BI__builtin_neon_vrshrq_n_v: return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n", 1, true); + case NEON::BI__builtin_neon_vsha512hq_v: + case NEON::BI__builtin_neon_vsha512h2q_v: + case NEON::BI__builtin_neon_vsha512su0q_v: + case NEON::BI__builtin_neon_vsha512su1q_v: { + Function *F = CGM.getIntrinsic(Int); + return EmitNeonCall(F, Ops, ""); + } case NEON::BI__builtin_neon_vshl_n_v: case NEON::BI__builtin_neon_vshlq_n_v: Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false); @@ -6767,6 +6782,11 @@ } return SV; } + case NEON::BI__builtin_neon_vxarq_v: { + Function *F = CGM.getIntrinsic(Int); + Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty); + return EmitNeonCall(F, Ops, ""); + } case NEON::BI__builtin_neon_vzip_v: case NEON::BI__builtin_neon_vzipq_v: { Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty)); Index: clang/test/CodeGen/aarch64-neon-range-checks.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-neon-range-checks.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-feature +crypto -verify %s + +#include + +void test_range_check_xar(uint64x2_t a, uint64x2_t b) { + vxarq_u64(a, b, -1); // expected-error {{argument value -1 is outside the valid range [0, 63]}} + vxarq_u64(a, b, 64); // expected-error {{argument value 64 is outside the valid range [0, 63]}} + vxarq_u64(a, b, 0); + vxarq_u64(a, b, 63); +} + Index: clang/test/CodeGen/aarch64-neon-sha3.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-neon-sha3.c @@ -0,0 +1,166 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -target-feature +crypto -S -emit-llvm -o - %s \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vsha512h( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h +// +void test_vsha512h(uint64x2_t hash_ed, uint64x2_t hash_gf, uint64x2_t kwh_kwh2) { + uint64x2_t result = vsha512hq_u64(hash_ed, hash_gf, kwh_kwh2); +} + +// CHECK-LABEL: @test_vsha512h2( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512h2 +// +void test_vsha512h2(uint64x2_t sum_ab, uint64x2_t hash_c_, uint64x2_t hash_ab) { + uint64x2_t result = vsha512h2q_u64(sum_ab, hash_c_, hash_ab); +} + +// CHECK-LABEL: @test_vsha512su0( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su0 +// +void test_vsha512su0(uint64x2_t w0_1, uint64x2_t w2_) { + + uint64x2_t result = vsha512su0q_u64(w0_1, w2_); +} + +// CHECK-LABEL: @test_vsha512su1( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.sha512su1 +// +void test_vsha512su1(uint64x2_t s01_s02, uint64x2_t w14_15, uint64x2_t w9_10) { + + uint64x2_t result = vsha512su1q_u64(s01_s02, w14_15, w9_10); +} + +// CHECK-LABEL: @test_vrax1( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.rax1 +// +void test_vrax1(uint64x2_t a, uint64x2_t b) { + + uint64x2_t result = vrax1q_u64(a, b); +} + + +// CHECK-LABEL: @test_xar( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.xar +// +void test_xar(uint64x2_t a, uint64x2_t b) { + + uint64x2_t result = vxarq_u64(a, b, 10); +} + + +// CHECK-LABEL: @test_vbcax_u8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8 +// +void test_vbcax_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + uint8x16_t result = vbcaxq_u8(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16 +// +void test_vbcax_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { + uint16x8_t result = vbcaxq_u16(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32 +// +void test_vbcax_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { + uint32x4_t result = vbcaxq_u32(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_u64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64 +// +void test_vbcax_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { + uint64x2_t result = vbcaxq_u64(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8 +// +void test_vbcax_s8(int8x16_t a, int8x16_t b, int8x16_t c) { + int8x16_t result = vbcaxq_s8(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16 +// +void test_vbcax_s16(int16x8_t a, int16x8_t b, int16x8_t c) { + int16x8_t result = vbcaxq_s16(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32 +// +void test_vbcax_s32(int32x4_t a, int32x4_t b, int32x4_t c) { + int32x4_t result = vbcaxq_s32(a, b, c); +} + +// CHECK-LABEL: @test_vbcax_s64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64 +// +void test_vbcax_s64(int64x2_t a, int64x2_t b, int64x2_t c) { + int64x2_t result = vbcaxq_s64(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8 +// +void test_veor3_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) { + uint8x16_t result = veor3q_u8(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16 +// +void test_veor3_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) { + uint16x8_t result = veor3q_u16(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32 +// +void test_veor3_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) { + uint32x4_t result = veor3q_u32(a, b, c); +} + +// CHECK-LABEL: @test_veor3_u64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64 +// +void test_veor3_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) { + uint64x2_t result = veor3q_u64(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s8( +// CHECK: call <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8 +// +void test_veor3_s8(int8x16_t a, int8x16_t b, int8x16_t c) { + int8x16_t result = veor3q_s8(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s16( +// CHECK: call <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16 +// +void test_veor3_s16(int16x8_t a, int16x8_t b, int16x8_t c) { + int16x8_t result = veor3q_s16(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s32( +// CHECK: call <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32 +// +void test_veor3_s32(int32x4_t a, int32x4_t b, int32x4_t c) { + int32x4_t result = veor3q_s32(a, b, c); +} + +// CHECK-LABEL: @test_veor3_s64( +// CHECK: call <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64 +// +void test_veor3_s64(int64x2_t a, int64x2_t b, int64x2_t c) { + int64x2_t result = veor3q_s64(a, b, c); +} Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -2115,7 +2115,11 @@ std::string LowerBound, UpperBound; Record *R = Def->getRecord(); - if (R->getValueAsBit("isVCVT_N")) { + if (R->getValueAsBit("isVXAR")) { + //VXAR takes an immediate in the range [0, 63] + LowerBound = "0"; + UpperBound = "63"; + } else if (R->getValueAsBit("isVCVT_N")) { // VCVT between floating- and fixed-point values takes an immediate // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16. LowerBound = "1"; Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -711,6 +711,31 @@ class Crypto_SHA_8Hash4Schedule_Intrinsic : DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>; + + // SHA512 intrinsic taking 2 arguments + class Crypto_SHA512_2Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; + + // SHA512 intrinsic taking 3 Arguments + class Crypto_SHA512_3Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + + // SHA3 Intrinsics taking 3 arguments + class Crypto_SHA3_3Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; + + // SHA3 Intrinsic taking 2 arguments + class Crypto_SHA3_2Arg_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], + [IntrNoMem]>; + + // SHA3 Intrinsic taking 3 Arguments 1 immediate + class Crypto_SHA3_2ArgImm_Intrinsic + : DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i64_ty], + [IntrNoMem, ImmArg>]>; } // AES @@ -734,6 +759,20 @@ def int_aarch64_crypto_sha256su0 : Crypto_SHA_8Schedule_Intrinsic; def int_aarch64_crypto_sha256su1 : Crypto_SHA_12Schedule_Intrinsic; +//SHA3 +def int_aarch64_crypto_eor3s : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_eor3u : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_bcaxs : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_bcaxu : Crypto_SHA3_3Arg_Intrinsic; +def int_aarch64_crypto_rax1 : Crypto_SHA3_2Arg_Intrinsic; +def int_aarch64_crypto_xar : Crypto_SHA3_2ArgImm_Intrinsic; + +// SHA512 +def int_aarch64_crypto_sha512h : Crypto_SHA512_3Arg_Intrinsic; +def int_aarch64_crypto_sha512h2 : Crypto_SHA512_3Arg_Intrinsic; +def int_aarch64_crypto_sha512su0 : Crypto_SHA512_2Arg_Intrinsic; +def int_aarch64_crypto_sha512su1 : Crypto_SHA512_3Arg_Intrinsic; + //===----------------------------------------------------------------------===// // CRC32 Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -890,6 +890,12 @@ let ParserMatchClass = Imm0_63Operand; } +def timm0_63 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_63Operand; +} + // imm0_31 predicate - True if the immediate is in the range [0,31] def imm0_31 : Operand, ImmLeafop0, bits<2>op1, string asm, string asmops> - : BaseCryptoV82<(outs V128:$Vd), (ins V128:$Vn, V128:$Vm), asm, asmops, - "$Vm = $Vd", []> { + : BaseCryptoV82<(outs V128:$Vdst), (ins V128:$Vd, V128:$Vn), asm, asmops, + "$Vd = $Vdst", []> { let Inst{31-25} = 0b1100111; let Inst{24-21} = 0b0110; let Inst{20-15} = 0b000001; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -924,6 +924,49 @@ def EOR3 : CryptoRRRR_16B<0b00, "eor3">; def BCAX : CryptoRRRR_16B<0b01, "bcax">; def XAR : CryptoRRRi6<"xar">; + +class SHA512H_pattern + : Pat<(v2i64 (OpNode (v2i64 V128:$Vd), (v2i64 V128:$Vn), (v2i64 V128:$Vm))), + (INST (v2i64 FPR128:$Vd), (v2i64 FPR128:$Vn), (v2i64 V128:$Vm))>; + +class SHA3_pattern + : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))), + (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>; + +def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), + (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; +def : SHA3_pattern; + +def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))), + (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>; + +def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))), + (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>; + + } // HasSHA3 let Predicates = [HasSM4] in { Index: llvm/test/CodeGen/AArch64/neon-sha3.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/neon-sha3.ll @@ -0,0 +1,105 @@ +; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+sha3 -o - | FileCheck %s + +define <2 x i64> @test_vsha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512h q0, q1, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512h.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h.i +} + +define <2 x i64> @test_vsha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512h2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512h2 q0, q1, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512h2.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512h2.i +} + +define <2 x i64> @test_vsha512su0(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vsha512su0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512su0 v0.2d, v1.2d +; CHECK-NEXT: ret +entry: + %vsha512su0.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vsha512su0.i +} + +define <2 x i64> @test_vsha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CHECK-LABEL: test_vsha512su1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sha512su1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT: ret +entry: + %vsha512su1.i = tail call <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) + ret <2 x i64> %vsha512su1.i +} + +define <2 x i64> @test_vrax1(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vrax1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: rax1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: ret +entry: + %vrax1.i = tail call <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64> %a, <2 x i64> %b) + ret <2 x i64> %vrax1.i +} + +define <2 x i64> @test_vxar(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: test_vxar: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: xar v0.2d, v0.2d, v1.2d, #1 +; CHECK-NEXT: ret +entry: + %vxar.i = tail call <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64> %a, <2 x i64> %b, i64 1) + ret <2 x i64> %vxar.i +} + +define <16 x i8> @test_bcax_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_bcax_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: bcax v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %vbcax_8.i = tail call <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %vbcax_8.i +} + +define <16 x i8> @test_eor3_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: test_eor3_8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b +; CHECK-NEXT: ret +entry: + %veor3_8.i = tail call <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %veor3_8.i +} + +declare <2 x i64> @llvm.aarch64.crypto.sha512h(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512h2(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su0(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.sha512su1(<2 x i64>, <2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.rax1(<2 x i64>, <2 x i64>) +declare <2 x i64> @llvm.aarch64.crypto.xar(<2 x i64>, <2 x i64>, i64 immarg) +declare <16 x i8> @llvm.aarch64.crypto.bcaxu.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxu.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxu.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxu.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.bcaxs.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.bcaxs.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.bcaxs.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.bcaxs.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3u.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.eor3u.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.eor3u.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.eor3u.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +declare <16 x i8> @llvm.aarch64.crypto.eor3s.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.aarch64.crypto.eor3s.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) +declare <4 x i32> @llvm.aarch64.crypto.eor3s.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <2 x i64> @llvm.aarch64.crypto.eor3s.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) +