diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1229,6 +1229,43 @@ return None; } +static Optional instCombineSVESrshl(InstCombiner &IC, + IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + Value *Pred = II.getOperand(0); + Value *Vec = II.getOperand(1); + Value *Shift = II.getOperand(2); + + // Convert SRSHL into the simpler LSL intrinsic when fed by an ABS intrinsic. + Value *AbsPred, *MergedValue; + if (!match(Vec, m_Intrinsic( + m_Value(MergedValue), m_Value(AbsPred), m_Value())) && + !match(Vec, m_Intrinsic( + m_Value(MergedValue), m_Value(AbsPred), m_Value()))) + + return None; + + // Transform is valid if any of the following are true: + // * The ABS merge value is an undef or non-negative + // * The ABS predicate is all active + // * The ABS predicate and the SRSHL predicates are the same + if (!isa(MergedValue) && + !match(MergedValue, m_NonNegative()) && + AbsPred != Pred && !isAllActivePredicate(AbsPred)) + return None; + + // Only valid when the shift amount is non-negative, otherwise the rounding + // behaviour of SRSHL cannot be ignored. + if (!match(Shift, m_NonNegative())) + return None; + + auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()}, + {Pred, Vec, Shift}); + + return IC.replaceInstUsesWith(II, LSL); +} + Optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -1296,6 +1333,8 @@ return instCombineSVESDIV(IC, II); case Intrinsic::aarch64_sve_sel: return instCombineSVESel(IC, II); + case Intrinsic::aarch64_sve_srshl: + return instCombineSVESrshl(IC, II); } return None; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-abs-srshl.ll @@ -0,0 +1,150 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +define @srshl_abs_undef_merge( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_undef_merge( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_zero_merge( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_zero_merge( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( zeroinitializer, [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( zeroinitializer, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_positive_merge( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_positive_merge( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer), [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %absmerge = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %absmerge, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_all_active_pred( %a, %b, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_all_active_pred( +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %b, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_same_pred( %a, %b, %pg) #0 { +; CHECK-LABEL: @srshl_abs_same_pred( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %b, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg, %abs, %splat) + ret %shr +} + +define @srshl_sqabs( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_sqabs( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.sqabs.nxv8i16( undef, [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.lsl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[TMP1]] +; + %abs = tail call @llvm.aarch64.sve.sqabs.nxv8i16( undef, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_negative_merge( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_negative_merge( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( shufflevector ( insertelement ( poison, i16 -1, i32 0), poison, zeroinitializer), [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call @llvm.aarch64.sve.srshl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[SHR]] +; + %absmerge = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 -1) + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %absmerge, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_nonconst_merge( %a, %b, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_nonconst_merge( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call @llvm.aarch64.sve.srshl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[SHR]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %b, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_not_all_active_pred( %a, %b, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_not_all_active_pred( +; CHECK-NEXT: [[PG:%.*]] = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 8) +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG]], [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call @llvm.aarch64.sve.srshl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[SHR]] +; + %pg = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 8) + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %b, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_diff_pred( %a, %b, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_diff_pred( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( [[B:%.*]], [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call @llvm.aarch64.sve.srshl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[SHR]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( %b, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +define @srshl_abs_negative_shift( %a, %pg, %pg2) #0 { +; CHECK-LABEL: @srshl_abs_negative_shift( +; CHECK-NEXT: [[ABS:%.*]] = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, [[PG:%.*]], [[A:%.*]]) +; CHECK-NEXT: [[SHR:%.*]] = tail call @llvm.aarch64.sve.srshl.nxv8i16( [[PG2:%.*]], [[ABS]], shufflevector ( insertelement ( poison, i16 -2, i32 0), poison, zeroinitializer)) +; CHECK-NEXT: ret [[SHR]] +; + %abs = tail call @llvm.aarch64.sve.abs.nxv8i16( undef, %pg, %a) + %splat = tail call @llvm.aarch64.sve.dup.x.nxv8i16(i16 -2) + %shr = tail call @llvm.aarch64.sve.srshl.nxv8i16( %pg2, %abs, %splat) + ret %shr +} + +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32 immarg) +declare @llvm.aarch64.sve.dup.x.nxv8i16(i16) +declare @llvm.aarch64.sve.abs.nxv8i16(, , ) +declare @llvm.aarch64.sve.sqabs.nxv8i16(, , ) +declare @llvm.aarch64.sve.srshl.nxv8i16(, , ) + +attributes #0 = { "target-features"="+sve,+sve2" }