diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -726,6 +726,41 @@ def EOR3 : CryptoRRRR_16B<0b00, "eor3">; def BCAX : CryptoRRRR_16B<0b01, "bcax">; def XAR : CryptoRRRi6<"xar">; + +def : Pat<(xor (or (AArch64vshl v2i64:$rhs, (i32 1)), + (AArch64vlshr v2i64:$rhs, (i32 63))), + v2i64:$lhs), + (RAX1 $lhs, $rhs)>; + +def : Pat<(xor (or (AArch64vshl v1i64:$rhs, (i32 1)), + (AArch64vlshr v1i64:$rhs, (i32 63))), + v1i64:$lhs), + (EXTRACT_SUBREG + (RAX1 (SUBREG_TO_REG (i32 0), $lhs, dsub), + (SUBREG_TO_REG (i32 0), $rhs, dsub)), + dsub)>; + +foreach ty = [v16i8, v8i16, v4i32, v2i64] in { + def : Pat<(xor ty:$op0, (xor ty:$op1, ty:$op2)), + (EOR3 $op0, $op1, $op2)>; + def : Pat<(xor ty:$op0, (and ty:$op1, (vnot ty:$op2))), + (BCAX $op0, $op1, $op2)>; +} + +foreach ty = [v8i8, v4i16, v2i32, v1i64] in { + def : Pat<(xor ty:$op0, (xor ty:$op1, ty:$op2)), + (EXTRACT_SUBREG + (EOR3 (SUBREG_TO_REG (i32 0), $op0, dsub), + (SUBREG_TO_REG (i32 0), $op1, dsub), + (SUBREG_TO_REG (i32 0), $op2, dsub)), + dsub)>; + def : Pat<(xor ty:$op0, (and ty:$op1, (vnot ty:$op2))), + (EXTRACT_SUBREG + (BCAX (SUBREG_TO_REG (i32 0), $op0, dsub), + (SUBREG_TO_REG (i32 0), $op1, dsub), + (SUBREG_TO_REG (i32 0), $op2, dsub)), + dsub)>; +} } // HasSHA3 let Predicates = [HasSM4] in { diff --git a/llvm/test/CodeGen/AArch64/crypto-sha3.ll b/llvm/test/CodeGen/AArch64/crypto-sha3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/crypto-sha3.ll @@ -0,0 +1,293 @@ +; RUN: llc -mtriple=arm64-apple-ios -mattr=+sha3 %s -o - | FileCheck %s + +define <16 x i8> @test_eor3_v16i8_lefttree(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8> %mhs) { +; CHECK-LABEL: test_eor3_v16i8_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <16 x i8> %lhs, %mhs + %res = xor <16 x i8> %tmp, %rhs + ret <16 x i8> %res +} + +define <16 x i8> @test_eor3_v16i8_righttree(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8> %mhs) { +; CHECK-LABEL: test_eor3_v16i8_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <16 x i8> %lhs, %mhs + %res = xor <16 x i8> %rhs, %tmp + ret <16 x i8> %res +} + +define <8 x i16> @test_eor3_v8i16_lefttree(<8 x i16> %lhs, <8 x i16> %rhs, <8 x i16> %mhs) { +; CHECK-LABEL: test_eor3_v8i16_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <8 x i16> %lhs, %mhs + %res = xor <8 x i16> %tmp, %rhs + ret <8 x i16> %res +} + +define <8 x i16> @test_eor3_v8i16_righttree(<8 x i16> %lhs, <8 x i16> %rhs, <8 x i16> %mhs) { +; CHECK-LABEL: test_eor3_v8i16_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <8 x i16> %lhs, %mhs + %res = xor <8 x i16> %rhs, %tmp + ret <8 x i16> %res +} + +define <4 x i32> @test_eor3_v4i32_lefttree(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %mhs) { +; CHECK-LABEL: test_eor3_v4i32_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <4 x i32> %lhs, %mhs + %res = xor <4 x i32> %tmp, %rhs + ret <4 x i32> %res +} + +define <4 x i32> @test_eor3_v4i32_righttree(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %mhs) { +; CHECK-LABEL: test_eor3_v4i32_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <4 x i32> %lhs, %mhs + %res = xor <4 x i32> %rhs, %tmp + ret <4 x i32> %res +} + +define <2 x i64> @test_eor3_v2i64_lefttree(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %mhs) { +; CHECK-LABEL: test_eor3_v2i64_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <2 x i64> %lhs, %mhs + %res = xor <2 x i64> %tmp, %rhs + ret <2 x i64> %res +} + +define <2 x i64> @test_eor3_v2i64_righttree(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %mhs) { +; CHECK-LABEL: test_eor3_v2i64_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <2 x i64> %lhs, %mhs + %res = xor <2 x i64> %rhs, %tmp + ret <2 x i64> %res +} + +define <8 x i8> @test_eor3_v8i8_lefttree(<8 x i8> %lhs, <8 x i8> %rhs, <8 x i8> %mhs) { +; CHECK-LABEL: test_eor3_v8i8_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <8 x i8> %lhs, %mhs + %res = xor <8 x i8> %tmp, %rhs + ret <8 x i8> %res +} + +define <8 x i8> @test_eor3_v8i8_righttree(<8 x i8> %lhs, <8 x i8> %rhs, <8 x i8> %mhs) { +; CHECK-LABEL: test_eor3_v8i8_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <8 x i8> %lhs, %mhs + %res = xor <8 x i8> %rhs, %tmp + ret <8 x i8> %res +} + +define <4 x i16> @test_eor3_v4i16_lefttree(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %mhs) { +; CHECK-LABEL: test_eor3_v4i16_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <4 x i16> %lhs, %mhs + %res = xor <4 x i16> %tmp, %rhs + ret <4 x i16> %res +} + +define <4 x i16> @test_eor3_v4i16_righttree(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %mhs) { +; CHECK-LABEL: test_eor3_v4i16_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <4 x i16> %lhs, %mhs + %res = xor <4 x i16> %rhs, %tmp + ret <4 x i16> %res +} + +define <2 x i32> @test_eor3_v2i32_lefttree(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %mhs) { +; CHECK-LABEL: test_eor3_v2i32_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <2 x i32> %lhs, %mhs + %res = xor <2 x i32> %tmp, %rhs + ret <2 x i32> %res +} + +define <2 x i32> @test_eor3_v2i32_righttree(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %mhs) { +; CHECK-LABEL: test_eor3_v2i32_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <2 x i32> %lhs, %mhs + %res = xor <2 x i32> %rhs, %tmp + ret <2 x i32> %res +} + +define <1 x i64> @test_eor3_v1i64_lefttree(<1 x i64> %lhs, <1 x i64> %rhs, <1 x i64> %mhs) { +; CHECK-LABEL: test_eor3_v1i64_lefttree: +; CHECK: eor3.16b v0, v0, v2, v1 + %tmp = xor <1 x i64> %lhs, %mhs + %res = xor <1 x i64> %tmp, %rhs + ret <1 x i64> %res +} + +define <1 x i64> @test_eor3_v1i64_righttree(<1 x i64> %lhs, <1 x i64> %rhs, <1 x i64> %mhs) { +; CHECK-LABEL: test_eor3_v1i64_righttree: +; CHECK: eor3.16b v0, v1, v0, v2 + %tmp = xor <1 x i64> %lhs, %mhs + %res = xor <1 x i64> %rhs, %tmp + ret <1 x i64> %res +} + +define <16 x i8> @test_bcax_v16i8_lefttree(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8> %mhs) { +; CHECK-LABEL: test_bcax_v16i8_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <16 x i8> %rhs, + %bic = and <16 x i8> %mhs, %not + %res = xor <16 x i8> %bic, %lhs + ret <16 x i8> %res +} + +define <16 x i8> @test_bcax_v16i8_righttree(<16 x i8> %lhs, <16 x i8> %rhs, <16 x i8> %mhs) { +; CHECK-LABEL: test_bcax_v16i8_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <16 x i8> %rhs, + %bic = and <16 x i8> %mhs, %not + %res = xor <16 x i8> %lhs, %bic + ret <16 x i8> %res +} + +define <8 x i16> @test_bcax_v8i16_lefttree(<8 x i16> %lhs, <8 x i16> %rhs, <8 x i16> %mhs) { +; CHECK-LABEL: test_bcax_v8i16_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <8 x i16> %rhs, + %bic = and <8 x i16> %mhs, %not + %res = xor <8 x i16> %bic, %lhs + ret <8 x i16> %res +} + +define <8 x i16> @test_bcax_v8i16_righttree(<8 x i16> %lhs, <8 x i16> %rhs, <8 x i16> %mhs) { +; CHECK-LABEL: test_bcax_v8i16_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <8 x i16> %rhs, + %bic = and <8 x i16> %mhs, %not + %res = xor <8 x i16> %lhs, %bic + ret <8 x i16> %res +} + +define <4 x i32> @test_bcax_v4i32_lefttree(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %mhs) { +; CHECK-LABEL: test_bcax_v4i32_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <4 x i32> %rhs, + %bic = and <4 x i32> %mhs, %not + %res = xor <4 x i32> %bic, %lhs + ret <4 x i32> %res +} + +define <4 x i32> @test_bcax_v4i32_righttree(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %mhs) { +; CHECK-LABEL: test_bcax_v4i32_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <4 x i32> %rhs, + %bic = and <4 x i32> %mhs, %not + %res = xor <4 x i32> %lhs, %bic + ret <4 x i32> %res +} + +define <2 x i64> @test_bcax_v2i64_lefttree(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %mhs) { +; CHECK-LABEL: test_bcax_v2i64_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <2 x i64> %rhs, + %bic = and <2 x i64> %mhs, %not + %res = xor <2 x i64> %bic, %lhs + ret <2 x i64> %res +} + +define <2 x i64> @test_bcax_v2i64_righttree(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %mhs) { +; CHECK-LABEL: test_bcax_v2i64_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <2 x i64> %rhs, + %bic = and <2 x i64> %mhs, %not + %res = xor <2 x i64> %lhs, %bic + ret <2 x i64> %res +} + +define <8 x i8> @test_bcax_v8i8_lefttree(<8 x i8> %lhs, <8 x i8> %rhs, <8 x i8> %mhs) { +; CHECK-LABEL: test_bcax_v8i8_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <8 x i8> %rhs, + %bic = and <8 x i8> %mhs, %not + %res = xor <8 x i8> %bic, %lhs + ret <8 x i8> %res +} + +define <8 x i8> @test_bcax_v8i8_righttree(<8 x i8> %lhs, <8 x i8> %rhs, <8 x i8> %mhs) { +; CHECK-LABEL: test_bcax_v8i8_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <8 x i8> %rhs, + %bic = and <8 x i8> %mhs, %not + %res = xor <8 x i8> %lhs, %bic + ret <8 x i8> %res +} + +define <4 x i16> @test_bcax_v4i16_lefttree(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %mhs) { +; CHECK-LABEL: test_bcax_v4i16_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <4 x i16> %rhs, + %bic = and <4 x i16> %mhs, %not + %res = xor <4 x i16> %bic, %lhs + ret <4 x i16> %res +} + +define <4 x i16> @test_bcax_v4i16_righttree(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %mhs) { +; CHECK-LABEL: test_bcax_v4i16_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <4 x i16> %rhs, + %bic = and <4 x i16> %mhs, %not + %res = xor <4 x i16> %lhs, %bic + ret <4 x i16> %res +} + +define <2 x i32> @test_bcax_v2i32_lefttree(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %mhs) { +; CHECK-LABEL: test_bcax_v2i32_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <2 x i32> %rhs, + %bic = and <2 x i32> %mhs, %not + %res = xor <2 x i32> %bic, %lhs + ret <2 x i32> %res +} + +define <2 x i32> @test_bcax_v2i32_righttree(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %mhs) { +; CHECK-LABEL: test_bcax_v2i32_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <2 x i32> %rhs, + %bic = and <2 x i32> %mhs, %not + %res = xor <2 x i32> %lhs, %bic + ret <2 x i32> %res +} + +define <1 x i64> @test_bcax_v1i64_lefttree(<1 x i64> %lhs, <1 x i64> %rhs, <1 x i64> %mhs) { +; CHECK-LABEL: test_bcax_v1i64_lefttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <1 x i64> %rhs, + %bic = and <1 x i64> %mhs, %not + %res = xor <1 x i64> %bic, %lhs + ret <1 x i64> %res +} + +define <1 x i64> @test_bcax_v1i64_righttree(<1 x i64> %lhs, <1 x i64> %rhs, <1 x i64> %mhs) { +; CHECK-LABEL: test_bcax_v1i64_righttree: +; CHECK: bcax.16b v0, v0, v2, v1 + %not = xor <1 x i64> %rhs, + %bic = and <1 x i64> %mhs, %not + %res = xor <1 x i64> %lhs, %bic + ret <1 x i64> %res +} + +define <2 x i64> @test_rax1_v2i64(<2 x i64> %lhs, <2 x i64> %rhs) { +; CHECK-LABEL: test_rax1_v2i64: +; CHECK: rax1.2d v0, v1, v0 + %left = shl <2 x i64> %lhs, + %right = lshr <2 x i64> %lhs, + %rotate = or <2 x i64> %left, %right + %res = xor <2 x i64> %rhs, %rotate + ret <2 x i64> %res +} + +define <1 x i64> @test_rax1_v1i64(<1 x i64> %lhs, <1 x i64> %rhs) { +; CHECK-LABEL: test_rax1_v1i64: +; CHECK: rax1.2d v0, v1, v0 + %left = shl <1 x i64> %lhs, + %right = lshr <1 x i64> %lhs, + %rotate = or <1 x i64> %left, %right + %res = xor <1 x i64> %rhs, %rotate + ret <1 x i64> %res +}