Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2044,6 +2044,52 @@ return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize; } +// s/urhadd instructions implement the following pattern, making the +// extends free: +// %x = add ((zext i8 -> i16), 1) +// %y = (zext i8 -> i16) +// trunc i16 (lshr (add %x, %y), 1) -> i8 +// +bool isExtShiftRightAdd(const Instruction *ExtUser, const CastInst *Ext, + Type *Dst, Type *Src) { + // Check that the cast is doubling the source type. + if ((Src->getScalarSizeInBits() != Dst->getScalarSizeInBits() / 2) || + ExtUser->getOpcode() != Instruction::Add) + return false; + + // Look for trunc/shl/add before trying to match the pattern. + const Instruction *Add = ExtUser; + auto *AddUser = dyn_cast(Add->getUniqueUndroppableUser()); + if (AddUser && AddUser->getOpcode() == Instruction::Add) + Add = AddUser; + + auto *Shr = dyn_cast(Add->getUniqueUndroppableUser()); + if (Shr->getOpcode() != Instruction::LShr) + return false; + + auto *Trunc = dyn_cast(Shr->getUniqueUndroppableUser()); + if (Trunc->getOpcode() != Instruction::Trunc || + Src->getScalarSizeInBits() != + cast(Trunc)->getDestTy()->getScalarSizeInBits()) + return false; + + // Try to match the whole pattern. Ext could be either the first or second + // m_ZExtOrSExt matched. + Value *Ex1, *Ex2; + if (!(match(Add, m_c_Add(m_ZExtOrSExt(m_Value(Ex1)), + m_c_Add(m_ZExtOrSExt(m_Value(Ex2)), m_SpecificInt(1)))))) + return false; + + // Ensure both extends are of the same type + Instruction *Ex1User = cast(Ex1->getUniqueUndroppableUser()); + Instruction *Ex2User = cast(Ex2->getUniqueUndroppableUser()); + if ((Ext->getOpcode() == Ex1User->getOpcode()) && + (Ext->getOpcode() == Ex2User->getOpcode())) + return true; + + return false; +} + InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, @@ -2068,6 +2114,11 @@ } else // Others are free so long as isWideningInstruction returned true. return 0; } + + // The cast will be free for the SVE2 s/urhadd instructions + if (ST->hasSVE2() && (isa(I) || isa(I)) && + isExtShiftRightAdd(SingleUser, cast(I), Dst, Src)) + return 0; } // TODO: Allow non-throughput costs that aren't binary. Index: llvm/test/Analysis/CostModel/AArch64/sve2-ext-rhadd.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/CostModel/AArch64/sve2-ext-rhadd.ll @@ -0,0 +1,82 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s + +; SRHADD + +define i8 @srhadd_i8_sext_i16(i8 %a, i8 %b, ptr %dst) { +; CHECK-LABEL: 'srhadd_i8_sext_i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext1 = sext i8 %a to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext2 = sext i8 %b to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add1 = add i16 %sext1, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add2 = add i16 %add1, %sext2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lsr = lshr i16 %add2, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i16 %lsr to i8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %trunc +; + %sext1 = sext i8 %a to i16 + %sext2 = sext i8 %b to i16 + %add1 = add i16 %sext1, 1 + %add2 = add i16 %add1, %sext2 + %lsr = lshr i16 %add2, 1 + %trunc = trunc i16 %lsr to i8 + ret i8 %trunc +} + +define i16 @srhadd_i16_sext_i32(i16 %a, i16 %b, ptr %dst) { +; CHECK-LABEL: 'srhadd_i16_sext_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext1 = sext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %sext2 = sext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add1 = add nuw nsw i32 %sext1, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add2 = add nuw nsw i32 %add1, %sext2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lsr = lshr i32 %add2, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i32 %lsr to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %trunc +; + %sext1 = sext i16 %a to i32 + %sext2 = sext i16 %b to i32 + %add1 = add nuw nsw i32 %sext1, 1 + %add2 = add nuw nsw i32 %add1, %sext2 + %lsr = lshr i32 %add2, 1 + %trunc = trunc i32 %lsr to i16 + ret i16 %trunc +} + +; URHADD + +define i8 @urhadd_i8_zext_i16(i8 %a, i8 %b, ptr %dst) { +; CHECK-LABEL: 'urhadd_i8_zext_i16' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext1 = zext i8 %a to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext2 = zext i8 %b to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add1 = add i16 %zext1, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add2 = add i16 %add1, %zext2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lsr = lshr i16 %add2, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i16 %lsr to i8 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i8 %trunc +; + %zext1 = zext i8 %a to i16 + %zext2 = zext i8 %b to i16 + %add1 = add i16 %zext1, 1 + %add2 = add i16 %add1, %zext2 + %lsr = lshr i16 %add2, 1 + %trunc = trunc i16 %lsr to i8 + ret i8 %trunc +} + +define i16 @urhadd_i16_zext_i32(i16 %a, i16 %b, ptr %dst) { +; CHECK-LABEL: 'urhadd_i16_zext_i32' +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext1 = zext i16 %a to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %zext2 = zext i16 %b to i32 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add1 = add nuw nsw i32 %zext1, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add2 = add nuw nsw i32 %add1, %zext2 +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %lsr = lshr i32 %add2, 1 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %trunc = trunc i32 %lsr to i16 +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %trunc +; + %zext1 = zext i16 %a to i32 + %zext2 = zext i16 %b to i32 + %add1 = add nuw nsw i32 %zext1, 1 + %add2 = add nuw nsw i32 %add1, %zext2 + %lsr = lshr i32 %add2, 1 + %trunc = trunc i32 %lsr to i16 + ret i16 %trunc +}