Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2966,6 +2966,23 @@ break; } + case Intrinsic::arm_neon_aesd: + case Intrinsic::arm_neon_aese: + case Intrinsic::aarch64_crypto_aesd: + case Intrinsic::aarch64_crypto_aese: { + Value *DataArg = II->getArgOperand(0); + Value *KeyArg = II->getArgOperand(1); + + // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR + Value *Data, *Key; + if (match(KeyArg, m_ZeroInt()) && + match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { + II->setArgOperand(0, Data); + II->setArgOperand(1, Key); + return II; + } + break; + } case Intrinsic::amdgcn_rcp: { Value *Src = II->getArgOperand(0); Index: test/Transforms/InstCombine/AArch64/aes-intrinsics.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/AArch64/aes-intrinsics.ll @@ -0,0 +1,24 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM64 AES intrinsic variants + +define <16 x i8> @combineXorAeseARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #1 + Index: test/Transforms/InstCombine/ARM/aes-intrinsics.ll =================================================================== --- /dev/null +++ test/Transforms/InstCombine/ARM/aes-intrinsics.ll @@ -0,0 +1,23 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM AES intrinsic variants + +define <16 x i8> @combineXorAeseARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #1 +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #1