Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -387,6 +387,33 @@ bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const; + /// \brief isExpensiveToConvertToCondInstr checks if given instruction can be converted + /// to conditional instruction (e.g. csel/cset for AArch64) without generating additional instructions, + /// if not - then instruction is considered as expensive for converting. + /// In some cases (e.g. BB merge) such convertions may lead to performance downgrade. + /// Example of cheap convertion (for AArch64): + /// \code + /// add w0, w0, #1 + /// \endcode + /// can be converted to one instruction + /// \code + /// cinc w0, w0, + /// \endcode + /// Example of expensive convertion (for AArch64): + /// \code + /// orr x16, x16, x18 + /// \endcode + /// can not be represented by one conditional instruction + /// therefore additional instruction (`orr`) will be generated: + /// \code + /// orr x3, x16, x18 + /// csel x16, x16, x3, + /// \endcode + /// \param I - instructions for checking + /// \return true if this convertion will leads to generation of additional instructions + bool isExpensiveToConvertToCondInstr(const Instruction& I) const; + + /// Test whether calls to a function lower to actual program function /// calls. /// @@ -1203,6 +1230,7 @@ virtual unsigned getFlatAddressSpace() = 0; virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; + virtual bool isExpensiveToConvertToCondInstr(const Instruction& I) const = 0; virtual bool rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; virtual bool isLoweredToCall(const Function *F) = 0; @@ -1474,6 +1502,10 @@ return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } + bool isExpensiveToConvertToCondInstr(const Instruction& I) const override { + return Impl.isExpensiveToConvertToCondInstr(I); + } + bool isLoweredToCall(const Function *F) override { return Impl.isLoweredToCall(F); } Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -170,6 +170,11 @@ return false; } + bool isExpensiveToConvertToCondInstr(const Instruction& I) const { + static_cast(I); + return false; + } + bool isLoweredToCall(const Function *F) { assert(F && "A concrete function must be provided to this routine."); Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -234,6 +234,10 @@ return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); } +bool TargetTransformInfo::isExpensiveToConvertToCondInstr(const Instruction& I) const { + return TTIImpl->isExpensiveToConvertToCondInstr(I); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return TTIImpl->isLoweredToCall(F); } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -190,6 +190,15 @@ return 2; } + bool isExpensiveToConvertToCondInstr(const Instruction& I) const { + switch (I.getOpcode()) { + case Instruction::Or: + return true; + default: + return false; + } + } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; Index: llvm/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -134,6 +134,11 @@ cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions")); +static cl::opt CheckConvertionToCondInstrCost( + "check-convert-to-cond-instr", cl::Hidden, cl::init(true), + cl::desc("When merging conditional stores, add additional cost to instructions " + "which cannot be represented by one conditional instruction")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -3025,6 +3030,10 @@ // And finally, if this is a non-free instruction that we are okay // speculating, ensure that we consider the speculation budget. BudgetRemaining -= TTI.getUserCost(&I); + // Check if this instruction can be converted to conditional form + // without generating additional instructions + if (CheckConvertionToCondInstrCost && TTI.isExpensiveToConvertToCondInstr(I)) + BudgetRemaining--; // Additional instructions will be generated, reduce budget if (BudgetRemaining < 0) return false; // Eagerly refuse to fold as soon as we're out of budget. } Index: llvm/test/Transforms/SimplifyCFG/AArch64/check-convert-to-cond-instr.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SimplifyCFG/AArch64/check-convert-to-cond-instr.ll @@ -0,0 +1,59 @@ +; RUN: llc %s -O2 -mtriple=aarch64-linux-gnu -check-convert-to-cond-instr=true -o %t +; RUN: FileCheck --check-prefix=CHECKCOST %s < %t +; RUN: llc %s -O2 -mtriple=aarch64-linux-gnu -check-convert-to-cond-instr=false -o %t +; RUN: FileCheck --check-prefix=NOTCHECKCOST %s < %t + +; CHECKCOST-LABEL: .LBB0_2: +; CHECKCOST: tst +; CHECKCOST-NEXT: b.eq +; NOTCHECKCOST-LABEL: .LBB0_2: +; NOTCHECKCOST: and [[DSTREG:w[0-9]+]], [[SRCREG1:w[0-9]+]], [[SRCREG2:w[0-9]+]] +; NOTCHECKCOST-NEXT: orr [[OR_DSTREG:w[0-9]+]], [[OR_SRCREG1:w[0-9]+]], [[SRCREG1]] +; NOTCHECKCOST-NEXT: tst [[SRCREG1]], [[SRCREG2]] +; NOTCHECKCOST-NEXT: orr [[DSTREG]], [[DSTREG]], [[OR2_SRCREG:w[0-9]+]] +; NOTCHECKCOST-NEXT: cinc +; NOTCHECKCOST-NEXT: csel + +%struct.anon = type { i32*, i32* } + +@g_ptr = common dso_local local_unnamed_addr global %struct.anon* null, align 8 + +define dso_local i32 @test_func(i32 %in, i32 %bit, i32 %mask) local_unnamed_addr { +entry: + %0 = load %struct.anon*, %struct.anon** @g_ptr, align 8 + %result = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1 + %tobool2 = icmp eq i32 %mask, 0 + br label %do.body + +do.body: ; preds = %do.cond, %entry + %bit.addr.0 = phi i32 [ %bit, %entry ], [ %shl, %do.cond ] + %retval1.0 = phi i32 [ 0, %entry ], [ %retval1.1, %do.cond ] + %sum_bits.0 = phi i32 [ 0, %entry ], [ %sum_bits.1, %do.cond ] + %and = and i32 %bit.addr.0, %in + %tobool = icmp eq i32 %and, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %do.body + %or = or i32 %sum_bits.0, %bit.addr.0 + %inc = add i32 %retval1.0, 1 + store i32* null, i32** %result, align 8 + br label %if.end + +if.end: ; preds = %do.body, %if.then + %retval1.1 = phi i32 [ %inc, %if.then ], [ %retval1.0, %do.body ] + %sum_bits.1 = phi i32 [ %or, %if.then ], [ %sum_bits.0, %do.body ] + br i1 %tobool2, label %do.cond, label %if.then3 + +if.then3: ; preds = %if.end + store i32* null, i32** %result, align 8 + br label %do.cond + +do.cond: ; preds = %if.end, %if.then3 + %shl = shl i32 %bit.addr.0, 1 + %tobool6 = icmp eq i32 %shl, 0 + br i1 %tobool6, label %do.end, label %do.body + +do.end: ; preds = %do.cond + %add = add i32 %sum_bits.1, %retval1.1 + ret i32 %add +} \ No newline at end of file