Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -2966,6 +2966,23 @@ break; } + case Intrinsic::arm_neon_aesd: + case Intrinsic::arm_neon_aese: + case Intrinsic::aarch64_crypto_aesd: + case Intrinsic::aarch64_crypto_aese: { + Value *DataArg = II->getArgOperand(0); + Value *KeyArg = II->getArgOperand(1); + + // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR + Value *Data, *Key; + if (match(KeyArg, m_ZeroInt()) && + match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) { + II->setArgOperand(0, Data); + II->setArgOperand(1, Key); + return II; + } + break; + } case Intrinsic::amdgcn_rcp: { Value *Src = II->getArgOperand(0); Index: llvm/trunk/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll +++ llvm/trunk/test/Transforms/InstCombine/AArch64/aes-intrinsics.ll @@ -0,0 +1,44 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM64 AES intrinsic variants + +define <16 x i8> @combineXorAeseZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseZeroARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAeseNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseNonZeroARM64( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdZeroARM64( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdNonZeroARM64(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdNonZeroARM64( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8>, <16 x i8>) #0 +declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8>, <16 x i8>) #0 + Index: llvm/trunk/test/Transforms/InstCombine/ARM/aes-intrinsics.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/ARM/aes-intrinsics.ll +++ llvm/trunk/test/Transforms/InstCombine/ARM/aes-intrinsics.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s +; ARM AES intrinsic variants + +define <16 x i8> @combineXorAeseZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseZeroARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAeseNonZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAeseNonZeroARM( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdZeroARM( +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data, <16 x i8> %key) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> zeroinitializer) + ret <16 x i8> %data.aes +} + +define <16 x i8> @combineXorAesdNonZeroARM(<16 x i8> %data, <16 x i8> %key) { +; CHECK-LABEL: @combineXorAesdNonZeroARM( +; CHECK-NEXT: %data.xor = xor <16 x i8> %data, %key +; CHECK-NEXT: %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> ) +; CHECK-NEXT: ret <16 x i8> %data.aes + %data.xor = xor <16 x i8> %data, %key + %data.aes = tail call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %data.xor, <16 x i8> ) + ret <16 x i8> %data.aes +} + +declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) #0 +declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) #0