diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -555,6 +555,29 @@ if (auto *SplatVal = getSplatValue(Vec)) return IC.replaceInstUsesWith(II, SplatVal); + // lastX (binop (x, splat(y))) --> binop(lastX(x), y) + // lastX (binop (splat(x), y)) --> binop(x, lastX(y)) + Value *LHS, *RHS; + if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS)))) && + (isSplatValue(LHS) || isSplatValue(RHS))) { + BinaryOperator *NewBinOp = nullptr; + auto OpC = cast(Vec)->getOpcode(); + auto *NewII = cast(II.clone()); + NewII->insertBefore(&II); + + if (auto *SplatVal = getSplatValue(RHS)) { + NewII->setArgOperand(1, LHS); + NewBinOp = + BinaryOperator::Create(OpC, NewII, SplatVal, Vec->getName(), &II); + } else if (auto *SplatVal = getSplatValue(LHS)) { + NewII->setArgOperand(1, RHS); + NewBinOp = + BinaryOperator::Create(OpC, SplatVal, NewII, Vec->getName(), &II); + } + + return IC.replaceInstUsesWith(II, NewBinOp); + } + auto *C = dyn_cast(Pg); if (IsAfter && C && C->isNullValue()) { // The intrinsic is extracting lane 0 so use an extract instead. diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll @@ -163,6 +163,49 @@ ret i8 %last } +; Check that we move the lastb before the binary operation so that the new binary op is scalar. +define i8 @lastb_binop_RHS_splat( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat( +; OPT-NEXT: %1 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = udiv i8 %1, %scalar +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_LHS_splat( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat( +; OPT-NEXT: %1 = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = udiv i8 %scalar, %1 +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +; Check that we dont do anything as the binary op has multiple uses. +define i8 @lastb_binop_nochange( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_nochange( +; OPT-NEXT: %splat_insert = insertelement poison, i8 %scalar, i32 0 +; OPT-NEXT: %splat = shufflevector %splat_insert, poison, zeroinitializer +; OPT-NEXT: %binop = udiv %vector, %splat +; OPT-NEXT: %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) +; OPT-NEXT: call void @use( %binop) +; OPT-NEXT: ret i8 %last + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + call void @use( %binop) + ret i8 %last +} + +declare void @use() declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare i8 @llvm.aarch64.sve.lasta.nxv16i8(, ) declare i8 @llvm.aarch64.sve.lastb.nxv16i8(, )