Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1028,6 +1028,44 @@ return None; } +static Optional instCombineSVESDIV(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Type *Int32Ty = Builder.getInt32Ty(); + Value *Pred = II.getOperand(0); + Value *Vec = II.getOperand(1); + Value *DivVec = II.getOperand(2); + + if (!isSplatValue(DivVec)) + return None; + Value *SplatValue = getSplatValue(DivVec); + ConstantInt *SplatConstantInt = dyn_cast(SplatValue); + if (!SplatConstantInt) + return None; + APInt Exponent = SplatConstantInt->getValue(); + + if (Exponent.isPowerOf2()) { + Constant *ExponentLog2 = + ConstantInt::get(Int32Ty, Exponent.logBase2(), true); + auto ASRD = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, ExponentLog2}); + return IC.replaceInstUsesWith(II, ASRD); + } + if (Exponent.isNegatedPowerOf2()) { + Exponent.negate(); + Constant *ExponentLog2 = + ConstantInt::get(Int32Ty, Exponent.logBase2(), true); + auto ASRD = Builder.CreateIntrinsic( + Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, ExponentLog2}); + auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg, + {ASRD->getType()}, {ASRD, Pred, ASRD}); + return IC.replaceInstUsesWith(II, NEG); + } + + return None; +} + Optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -1088,6 +1126,8 @@ return instCombineSVELD1(IC, II, DL); case Intrinsic::aarch64_sve_st1: return instCombineSVEST1(IC, II, DL); + case Intrinsic::aarch64_sve_sdiv: + return instCombineSVESDIV(IC, II); } return None; Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-sdiv.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @sdiv_i32( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv4i32( [[PG:%.*]], [[A:%.*]], i32 23) +; CHECK-NEXT: ret [[TMP1]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, %a, shufflevector ( insertelement ( poison, i32 8388608, i32 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i32_neg( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i32_neg( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv4i32( [[PG:%.*]], [[A:%.*]], i32 23) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.neg.nxv4i32( [[TMP1]], [[PG]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, %a, shufflevector ( insertelement ( poison, i32 -8388608, i32 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i64( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i64( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv2i64( [[PG:%.*]], [[A:%.*]], i32 23) +; CHECK-NEXT: ret [[TMP1]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, %a, shufflevector ( insertelement ( poison, i64 8388608, i64 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i64_neg( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i64_neg( +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.asrd.nxv2i64( [[PG:%.*]], [[A:%.*]], i32 23) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.neg.nxv2i64( [[TMP1]], [[PG]], [[TMP1]]) +; CHECK-NEXT: ret [[TMP2]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv2i64( %pg, %a, shufflevector ( insertelement ( poison, i64 -8388608, i64 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i32_not_base2( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_base2( +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.sdiv.nxv4i32( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, i32 8388607, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[OUT]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, %a, shufflevector ( insertelement ( poison, i32 8388607, i32 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i32_not_base2_neg( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_base2_neg( +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.sdiv.nxv4i32( [[PG:%.*]], [[A:%.*]], shufflevector ( insertelement ( poison, i32 -8388607, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[OUT]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, %a, shufflevector ( insertelement ( poison, i32 -8388607, i32 0), poison, zeroinitializer)) + ret %out +} + +define @sdiv_i32_not_zero( %a, %pg) #0 { +; CHECK-LABEL: @sdiv_i32_not_zero( +; CHECK-NEXT: [[OUT:%.*]] = call @llvm.aarch64.sve.sdiv.nxv4i32( [[PG:%.*]], [[A:%.*]], zeroinitializer) +; CHECK-NEXT: ret [[OUT]] +; + %out = call @llvm.aarch64.sve.sdiv.nxv4i32( %pg, %a, shufflevector ( insertelement ( poison, i32 0, i32 0), poison, zeroinitializer)) + ret %out +} + + +declare @llvm.aarch64.sve.sdiv.nxv4i32(, , ) +declare @llvm.aarch64.sve.sdiv.nxv2i64(, , ) + +attributes #0 = { "target-features"="+sve" }