Index: lib/Analysis/InlineCost.cpp =================================================================== --- lib/Analysis/InlineCost.cpp +++ lib/Analysis/InlineCost.cpp @@ -174,7 +174,8 @@ bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); bool simplifyCallSite(Function *F, CallSite CS); template - bool simplifyInstruction(Instruction &I, Callable Evaluate); + bool simplifyInstruction(Instruction &I, Callable Evaluate, + bool AllConstants = true); ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); /// Return true if the given argument to the function being considered for @@ -240,6 +241,7 @@ bool visitCallSite(CallSite CS); bool visitReturnInst(ReturnInst &RI); bool visitBranchInst(BranchInst &BI); + bool visitSelectInst(SelectInst &SI); bool visitSwitchInst(SwitchInst &SI); bool visitIndirectBrInst(IndirectBrInst &IBI); bool visitResumeInst(ResumeInst &RI); @@ -483,31 +485,56 @@ return isGEPFree(I); } -/// Simplify \p I if its operands are constants and update SimplifiedValues. -/// \p Evaluate is a callable specific to instruction type that evaluates the -/// instruction when all the operands are constants. +/// Simplify \p I and update lists such as SimplifiedValues. \p Evaluate is a +/// callable specific to instruction type that evaluates the instruction. If \p +/// AllConstants is set to true, this function tries to map all constant +/// operands to a constant. Otherwise, it tries to fold the instruction to a +/// simplified value. template -bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate) { - SmallVector COps; +bool CallAnalyzer::simplifyInstruction(Instruction &I, Callable Evaluate, + bool AllConstants) { + SmallVector Ops; for (Value *Op : I.operands()) { - Constant *COp = dyn_cast(Op); - if (!COp) - COp = SimplifiedValues.lookup(Op); - if (!COp) - return false; - COps.push_back(COp); + Constant *COp = + isa(Op) ? cast(Op) : SimplifiedValues.lookup(Op); + if (COp) + Ops.push_back(COp); + else { + if (AllConstants) + return false; + else + Ops.push_back(Op); + } } - auto *C = Evaluate(COps); - if (!C) + auto *V = Evaluate(Ops); + if (!V) return false; - SimplifiedValues[&I] = C; + if (Constant *C = dyn_cast_or_null(V)) { + SimplifiedValues[&I] = C; + return true; + } + + // We cannot simplify the instruction into a constant. + if (AllConstants) + return false; + + if (I.getType()->isPointerTy()) { + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(V); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + } + return true; } bool CallAnalyzer::visitBitCast(BitCastInst &I) { // Propagate constants through bitcasts. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getBitCast(COps[0], I.getType()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getBitCast(cast(COps[0]), I.getType()); })) return true; @@ -530,8 +557,8 @@ bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { // Propagate constants through ptrtoint. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getPtrToInt(COps[0], I.getType()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getPtrToInt(cast(COps[0]), I.getType()); })) return true; @@ -562,8 +589,8 @@ bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { // Propagate constants through ptrtoint. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getIntToPtr(COps[0], I.getType()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getIntToPtr(cast(COps[0]), I.getType()); })) return true; @@ -588,8 +615,9 @@ bool CallAnalyzer::visitCastInst(CastInst &I) { // Propagate constants through ptrtoint. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getCast(I.getOpcode(), COps[0], I.getType()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getCast(I.getOpcode(), cast(COps[0]), + I.getType()); })) return true; @@ -601,8 +629,8 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { Value *Operand = I.getOperand(0); - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantFoldInstOperands(&I, COps[0], DL); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantFoldInstOperands(&I, cast(COps[0]), DL); })) return true; @@ -848,8 +876,9 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getCompare(I.getPredicate(), COps[0], COps[1]); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getCompare( + I.getPredicate(), cast(COps[0]), cast(COps[1])); })) return true; @@ -957,13 +986,15 @@ bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - auto Evaluate = [&](SmallVectorImpl &COps) { + auto Evaluate = [&](SmallVectorImpl &COps) { Value *SimpleV = nullptr; if (auto FI = dyn_cast(&I)) - SimpleV = SimplifyFPBinOp(I.getOpcode(), COps[0], COps[1], - FI->getFastMathFlags(), DL); + SimpleV = + SimplifyFPBinOp(I.getOpcode(), cast(COps[0]), + cast(COps[1]), FI->getFastMathFlags(), DL); else - SimpleV = SimplifyBinOp(I.getOpcode(), COps[0], COps[1], DL); + SimpleV = SimplifyBinOp(I.getOpcode(), cast(COps[0]), + cast(COps[1]), DL); return dyn_cast_or_null(SimpleV); }; @@ -1009,8 +1040,9 @@ bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { // Constant folding for extract value is trivial. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getExtractValue(COps[0], I.getIndices()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getExtractValue(cast(COps[0]), + I.getIndices()); })) return true; @@ -1020,10 +1052,10 @@ bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { // Constant folding for insert value is trivial. - if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { - return ConstantExpr::getInsertValue(/*AggregateOperand*/ COps[0], - /*InsertedValueOperand*/ COps[1], - I.getIndices()); + if (simplifyInstruction(I, [&](SmallVectorImpl &COps) { + return ConstantExpr::getInsertValue( + /*AggregateOperand*/ cast(COps[0]), + /*InsertedValueOperand*/ cast(COps[1]), I.getIndices()); })) return true; @@ -1174,6 +1206,18 @@ SimplifiedValues.lookup(BI.getCondition())); } +bool CallAnalyzer::visitSelectInst(SelectInst &SI) { + if (simplifyInstruction(SI, + [&](SmallVectorImpl &Ops) { + return SimplifySelectInst(Ops[0], Ops[1], Ops[2], + DL); + }, + false)) + return true; + + return Base::visitSelectInst(SI); +} + bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { // We model unconditional switches as free, see the comments on handling // branches. Index: test/Transforms/Inline/AArch64/select.ll =================================================================== --- /dev/null +++ test/Transforms/Inline/AArch64/select.ll @@ -0,0 +1,206 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define i32 @outer1(i1 %cond) { + %C = call i32 @inner1(i1 %cond, i32 1) + ret i32 %C +} + +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner1(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 1, i32 %val ; Simplified to 1 + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i32 @outer2(i32 %val) { + %C = call i32 @inner2(i1 true, i32 %val) + ret i32 %C +} + +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner2(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 1, i32 %val ; Simplifies to 1 + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i32 @outer3(i32 %val) { + %C = call i32 @inner3(i1 false, i32 %val) + ret i32 %C +} + +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner3(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 %val, i32 -1 ; Simplifies to -1 + ret i32 %select ; Simplifies to ret i32 -1 +} + + +define i32 @outer4() { + %C = call i32 @inner4(i1 true, i32 1, i32 -1) + ret i32 %C +} + +; CHECK: Analyzing call of inner4 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner4(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplifies to 1 + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i1 @outer5() { + %C = call i1 @inner5(i1 true, i1 true, i1 false) + ret i1 %C +} + +; CHECK: Analyzing call of inner5 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 3 +define i1 @inner5(i1 %cond, i1 %val1, i1 %val2) { + %select = select i1 %cond, i1 %val1, i1 %val2 ; Simplifies to true + br i1 %select, label %exit, label %isfalse ; Simplifies to br label %end + +isfalse: ; This block is unreachable once inlined + call void @dead() + call void @dead() + call void @dead() + call void @dead() + call void @dead() + br label %exit + +exit: + ret i1 %select ; Simplifies to ret i1 true +} + +declare void @dead() + + +define i32 @outer6(i32* %ptr) { + %A = alloca i32 + %C = call i32 @inner6(i1 true, i32* %A, i32* %ptr) + ret i32 %C +} + +; CHECK: Analyzing call of inner6 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 3 +define i32 @inner6(i1 %cond, i32* %p1, i32* %p2) { + %select = select i1 %cond, i32* %p1, i32* %p2 ; Simplifies to %A + %load = load i32, i32* %select ; SROA'ed + ret i32 %load ; Simplified +} + + +define i32 @outer7(i32* %ptr) { + %A = alloca i32 + %C = call i32 @inner7(i1 false, i32* %ptr, i32* %A) + ret i32 %C +} + +; CHECK: Analyzing call of inner7 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 3 +define i32 @inner7(i1 %cond, i32* %p1, i32* %p2) { + %select = select i1 %cond, i32* %p1, i32* %p2 ; Simplifies to %A + %load = load i32, i32* %select ; SROA'ed + ret i32 %load ; Simplified +} + + +define <2 x i32> @outer8(<2 x i32> %val) { + %C = call <2 x i32> @inner8(<2 x i1> , <2 x i32> %val) + ret <2 x i32> %C +} + +; CHECK: Analyzing call of inner8 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define <2 x i32> @inner8(<2 x i1> %cond, <2 x i32> %val) { + %select = select <2 x i1> %cond, <2 x i32> , <2 x i32> %val ; Simplifies to <1, 1> + ret <2 x i32> %select ; Simplifies to ret <2 x i32> <1, 1> +} + + +define <2 x i32> @outer9(<2 x i32> %val) { + %C = call <2 x i32> @inner9(<2 x i1> , <2 x i32> %val) + ret <2 x i32> %C +} + +; CHECK: Analyzing call of inner9 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define <2 x i32> @inner9(<2 x i1> %cond, <2 x i32> %val) { + %select = select <2 x i1> %cond, < 2 x i32> %val, <2 x i32> ; Simplifies to <-1, -1> + ret <2 x i32> %select ; Simplifies to ret <2 x i32> <-1, -1> +} + + +@glbl = external global i32 + +define i1 @outer10(i32* %ptr) { + %C = call i1 @inner10(i1 true, i32* @glbl, i32* %ptr) + ret i1 %C +} + +; CHECK: Analyzing call of inner10 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 3 +define i1 @inner10(i1 %cond, i32* %ptr1, i32* %ptr2) { + %select = select i1 %cond, i32* %ptr1, i32* %ptr2 ; Simplified to @glbl + %cmp = icmp eq i32* %select, @glbl ; Simplified to true + ret i1 %cmp ; Simplifies to ret i1 true +} + + +define <2 x i32> @outer11(<2 x i32> %val1, <2 x i32> %val2) { + %C = call <2 x i32> @inner11(<2 x i1> , <2 x i32> %val1, <2 x i32> %val2) + ret <2 x i32> %C +} + +; CHECK: Analyzing call of inner11 +; CHECK: NumInstructionsSimplified: 1 +; CHECK: NumInstructions: 2 +define <2 x i32> @inner11(<2 x i1> %cond, <2 x i32> %val1, < 2 x i32> %val2) { + %select = select <2 x i1> %cond, <2 x i32> %val1, < 2 x i32> %val2 ; Cannot be Simplified + ret <2 x i32> %select ; Simplified +} + + +define i32 @outer12(i32 %val1, i32 %val2) { + %C = call i32 @inner12(i1 true, i32 %val1, i32 %val2) + ret i32 %C +} + +; CHECK: Analyzing call of inner12 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner12(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplified to %val1 + ret i32 %select ; Simplifies to ret i32 %val1 +} + + +define i32 @outer13(i32 %val1, i32 %val2) { + %C = call i32 @inner13(i1 false, i32 %val1, i32 %val2) + ret i32 %C +} + +; CHECK: Analyzing call of inner13 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner13(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplified to %val2 + ret i32 %select ; Simplifies to ret i32 %val2 +} Index: test/Transforms/Inline/alloca-bonus.ll =================================================================== --- test/Transforms/Inline/alloca-bonus.ll +++ test/Transforms/Inline/alloca-bonus.ll @@ -43,6 +43,7 @@ %D = getelementptr inbounds i32, i32* %ptr, i32 %A %E = bitcast i32* %ptr to i8* %F = select i1 false, i32* %ptr, i32* @glbl + %L = load i32, i32* %F call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) call void @extern() ret void