Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -547,14 +547,34 @@ static Optional instCombineSVELast(InstCombiner &IC, IntrinsicInst &II) { + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); Value *Pg = II.getArgOperand(0); Value *Vec = II.getArgOperand(1); - bool IsAfter = II.getIntrinsicID() == Intrinsic::aarch64_sve_lasta; + auto IntrinsicID = II.getIntrinsicID(); + bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta; // lastX(splat(X)) --> X if (auto *SplatVal = getSplatValue(Vec)) return IC.replaceInstUsesWith(II, SplatVal); + // If x and/or y is a splat value then: + // lastX (binop (x, y)) --> binop(lastX(x), lastX(y)) + Value *LHS, *RHS; + if (match(Vec, m_OneUse(m_BinOp(m_Value(LHS), m_Value(RHS))))) { + if (isSplatValue(LHS) || isSplatValue(RHS)) { + auto OldBinOp = cast(Vec); + auto OpC = OldBinOp->getOpcode(); + auto *NewLHS = + Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS}); + auto *NewRHS = + Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS}); + auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags( + OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II); + return IC.replaceInstUsesWith(II, NewBinOp); + } + } + auto *C = dyn_cast(Pg); if (IsAfter && C && C->isNullValue()) { // The intrinsic is extracting lane 0 so use an extract instead. Index: llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll =================================================================== --- llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll +++ llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-lasta-lastb.ll @@ -163,7 +163,225 @@ ret i8 %last } +; Check that we move the lastb before the binary operation so that the new binary op is scalar. +define i8 @lastb_binop_RHS_splat_div( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_div( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = udiv i8 %1, %scalar +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_RHS_splat_sdiv_exact( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_sdiv_exact( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = sdiv exact i8 %1, %scalar +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv exact %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_RHS_splat_div_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_div_float_fast( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fdiv fast float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv fast %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_RHS_splat_div_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_div_float( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fdiv float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_splat_div( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_div( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = udiv i8 %scalar, %1 +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_LHS_splat_div_sdiv_exact( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_div_sdiv_exact( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = sdiv exact i8 %scalar, %1 +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = sdiv exact %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_LHS_splat_div_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_div_float_fast( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fdiv fast float %scalar, %1 +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv fast %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_LHS_splat_div_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_div_float( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fdiv float %scalar, %1 +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fdiv %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_RHS_splat_div( %pg, i8 %scalar1, i8 %scalar2) #0 { +; OPT-LABEL: @lastb_binop_LHS_RHS_splat_div( +; OPT-NEXT: %binop1 = udiv i8 %scalar1, %scalar2 +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar1, i8 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %splat_insert2 = insertelement poison, i8 %scalar2, i8 0 + %splat2 = shufflevector %splat_insert2, poison, zeroinitializer + %binop = udiv %splat, %splat2 + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define i8 @lastb_binop_RHS_splat_add( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_add( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = add i8 %1, %scalar +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = add %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_RHS_splat_add_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_add_float_fast( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fadd fast float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fadd fast %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_RHS_splat_add_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_RHS_splat_add_float( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fadd float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fadd %vector, %splat + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_splat_add( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_add( +; OPT-NEXT: %1 = call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %vector) +; OPT-NEXT: %binop1 = add i8 %1, %scalar +; OPT-NEXT: ret i8 %binop1 + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = add %splat, %vector + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +define float @lastb_binop_LHS_splat_add_float_fast( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_add_float_fast( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fadd fast float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fadd fast %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define float @lastb_binop_LHS_splat_add_float( %pg, float %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_LHS_splat_add_float( +; OPT-NEXT: %1 = call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %vector) +; OPT-NEXT: %binop1 = fadd float %1, %scalar +; OPT-NEXT: ret float %binop1 + %splat_insert = insertelement poison, float %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = fadd %splat, %vector + %last = tail call float @llvm.aarch64.sve.lastb.nxv4f32( %pg, %binop) + ret float %last +} + +define i8 @lastb_binop_LHS_RHS_splat_add( %pg, i8 %scalar1, i8 %scalar2) #0 { +; OPT-LABEL: @lastb_binop_LHS_RHS_splat_add( +; OPT-NEXT: %splat_insert = insertelement poison, i8 %scalar1, i8 0 +; OPT-NEXT: %splat_insert2 = insertelement poison, i8 %scalar2, i8 0 +; OPT-NEXT: %1 = add %splat_insert, %splat_insert2 +; OPT-NEXT: %binop = shufflevector %1, undef, zeroinitializer +; OPT-NEXT: %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) +; OPT-NEXT: ret i8 %last + %splat_insert = insertelement poison, i8 %scalar1, i8 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %splat_insert2 = insertelement poison, i8 %scalar2, i8 0 + %splat2 = shufflevector %splat_insert2, poison, zeroinitializer + %binop = add %splat, %splat2 + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + ret i8 %last +} + +; Check that we dont do anything as the binary op has multiple uses. +define i8 @lastb_binop_nochange( %pg, i8 %scalar, %vector) #0 { +; OPT-LABEL: @lastb_binop_nochange( +; OPT-NEXT: %splat_insert = insertelement poison, i8 %scalar, i32 0 +; OPT-NEXT: %splat = shufflevector %splat_insert, poison, zeroinitializer +; OPT-NEXT: %binop = udiv %vector, %splat +; OPT-NEXT: %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) +; OPT-NEXT: call void @use( %binop) +; OPT-NEXT: ret i8 %last + %splat_insert = insertelement poison, i8 %scalar, i32 0 + %splat = shufflevector %splat_insert, poison, zeroinitializer + %binop = udiv %vector, %splat + %last = tail call i8 @llvm.aarch64.sve.lastb.nxv16i8( %pg, %binop) + call void @use( %binop) + ret i8 %last +} + +declare void @use() declare @llvm.aarch64.sve.ptrue.nxv16i1(i32) +declare float @llvm.aarch64.sve.lastb.nxv4f32(, ) declare i8 @llvm.aarch64.sve.lasta.nxv16i8(, ) declare i8 @llvm.aarch64.sve.lastb.nxv16i8(, )