diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -544,14 +544,34 @@ static Optional instCombineSVELast(InstCombiner &IC, IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); Value *Pg = II.getArgOperand(0); Value *Vec = II.getArgOperand(1); - bool IsAfter = II.getIntrinsicID() == Intrinsic::aarch64_sve_lasta; + auto IntrinsicID = II.getIntrinsicID(); + bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta; // lastX(splat(X)) --> X if (auto *SplatVal = getSplatValue(Vec)) return IC.replaceInstUsesWith(II, SplatVal); + // If x and/or y is a splat value then: + // lastX (binop (x, y)) --> binop(lastX(x), lastX(y)) + Value *LHS, *RHS; + if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS))))) { + if (isSplatValue(LHS) || isSplatValue(RHS)) { + auto *OldBinOp = cast(Vec); + auto OpC = OldBinOp->getOpcode(); + auto *NewLHS = + Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS}); + auto *NewRHS = + Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS}); + auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags( + OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II); + return IC.replaceInstUsesWith(II, NewBinOp); + } + } + auto *C = dyn_cast(Pg); if (IsAfter && C && C->isNullValue()) { // The intrinsic is extracting lane 0 so use an extract instead. diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll @@ -147,7 +147,8 @@ ; Return the splatted value irrespective of the predicate. define i8 @lasta_splat( %pg, i8 %a) #0 { ; OPT-LABEL: @lasta_splat( -; OPT-NEXT: ret i8 %a +; OPT-NEXT: ret i8 [[A:%.*]] +; %splat_insert = insertelement poison, i8 %a, i32 0 %splat = shufflevector %splat_insert, poison, zeroinitializer %last = tail call i8 @llvm.aarch64.sve.lasta.nxv16i8( %pg, %splat) @@ -156,15 +157,155 @@ define i8 @lastb_splat( %pg, i8 %a) #0 { ; OPT-LABEL: @lastb_splat( -; OPT-NEXT: ret i8 %a +; OPT-NEXT: ret i8 [[A:%.*]] +; %splat_insert = insertelement poison, i8 %a, i32 0 %splat = shufflevector %splat_insert, poison, zeroinitializer %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %splat) ret i8 %last } +; Check that we move the lastb before the binary operation so that the new binary op is scalar. +define i8 @lastb_binop_RHS_splat_sdiv( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_sdiv( +; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[TMP1]], [[SCALAR:%.*]] +; OPT-NEXT: ret i8 [[BINOP1]] +; + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_RHS_splat_sdiv_exact( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_sdiv_exact( +; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = sdiv exact i8 [[TMP1]], [[SCALAR:%.*]] +; OPT-NEXT: ret i8 [[BINOP1]] +; + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv exact %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_RHS_splat_fdiv_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float_fast( +; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = fdiv fast float [[TMP1]], [[SCALAR:%.*]] +; OPT-NEXT: ret float [[BINOP1]] +; + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv fast %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_RHS_splat_fdiv_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_fdiv_float( +; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = fdiv float [[TMP1]], [[SCALAR:%.*]] +; OPT-NEXT: ret float [[BINOP1]] +; + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_splat_sdiv( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_sdiv( +; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[SCALAR:%.*]], [[TMP1]] +; OPT-NEXT: ret i8 [[BINOP1]] +; + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_LHS_splat_sdiv_exact( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_sdiv_exact( +; OPT-NEXT: [[TMP1:%.*]] = call i8 @llvm.aarch64.sve.lastb.nxv16i8( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = sdiv exact i8 [[SCALAR:%.*]], [[TMP1]] +; OPT-NEXT: ret i8 [[BINOP1]] +; + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv exact %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_LHS_splat_fdiv_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float_fast( +; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = fdiv fast float [[SCALAR:%.*]], [[TMP1]] +; OPT-NEXT: ret float [[BINOP1]] +; + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv fast %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_LHS_splat_fdiv_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_fdiv_float( +; OPT-NEXT: [[TMP1:%.*]] = call float @llvm.aarch64.sve.lastb.nxv4f32( [[PG:%.*]], [[VECTOR:%.*]]) +; OPT-NEXT: [[BINOP1:%.*]] = fdiv float [[SCALAR:%.*]], [[TMP1]] +; OPT-NEXT: ret float [[BINOP1]] +; + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_RHS_splat_sdiv( %pg, i8 %scalar1, i8 %scalar2) #0 { +; OPT-LABEL: @lastb_binop_LHS_RHS_splat_sdiv( +; OPT-NEXT: [[BINOP1:%.*]] = sdiv i8 [[SCALAR1:%.*]], [[SCALAR2:%.*]] +; OPT-NEXT: ret i8 [[BINOP1]] +; + %splat_insert = insertelement poison, i8 %scalar1, i8 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %splat_insert2 = insertelement poison, i8 %scalar2, i8 0 + %splat2 = shufflevector %splat_insert2, poison, zeroinitializer + %binop = sdiv %splat, %splat2 + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +; Check that we don't do anything as the binary op has multiple uses. +define i8 @lastb_binop_nochange( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_nochange( +; OPT-NEXT: [[SPLAT_INSERT:%.*]] = insertelement poison, i8 [[SCALAR:%.*]], i32 0 +; OPT-NEXT: [[SPLAT:%.*]] = shufflevector [[SPLAT_INSERT]], poison, zeroinitializer +; OPT-NEXT: [[BINOP:%.*]] = sdiv [[VECTOR:%.*]], [[SPLAT]] +; OPT-NEXT: [[LAST:%.*]] = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( [[PG:%.*]], [[BINOP]]) +; OPT-NEXT: call void @use( [[BINOP]]) +; OPT-NEXT: ret i8 [[LAST]] +; + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + call void @use( %binop) + ret i8 %last +} + +declare void @use() declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) declare i8 @llvm.aarch64.sve.lasta.nxv16i8(, ) declare i8 @llvm.aarch64.sve.lastb.nxv16i8(, ) +declare float @llvm.aarch64.sve.lastb.nxv4f32(, ) attributes #0 = { "target-features"="+sve" }