diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1157,6 +1157,14 @@ InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const; + /// \return The expected cost of vector Insert and Extract. + /// Caller guarantees that 'I' is not nullptr. + /// + /// A typical suitable use case is cost estimation when vector instruction + /// exists (e.g., from basic blocks during transformation). + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index = -1) const; + /// \return The cost of replication shuffle of \p VF elements typed \p EltTy /// \p ReplicationFactor times. /// @@ -1723,6 +1731,8 @@ const Instruction *I) = 0; virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; + virtual InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) = 0; virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, @@ -2271,6 +2281,10 @@ unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) override { + return Impl.getVectorInstrCost(I, Val, Index); + } InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -572,6 +572,10 @@ unsigned Index) const { return 1; } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) const { + return 1; + } unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, @@ -1139,7 +1143,7 @@ if (auto *CI = dyn_cast(IE->getOperand(2))) if (CI->getValue().getActiveBits() <= 32) Idx = CI->getZExtValue(); - return TargetTTI->getVectorInstrCost(Opcode, Ty, Idx); + return TargetTTI->getVectorInstrCost(IE, Ty, Idx); } case Instruction::ShuffleVector: { auto *Shuffle = dyn_cast(U); @@ -1229,7 +1233,7 @@ if (CI->getValue().getActiveBits() <= 32) Idx = CI->getZExtValue(); Type *DstTy = U->getOperand(0)->getType(); - return TargetTTI->getVectorInstrCost(Opcode, DstTy, Idx); + return TargetTTI->getVectorInstrCost(EEI, DstTy, Idx); } } // By default, just classify everything as 'basic'. diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1154,6 +1154,11 @@ return LT.first; } + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index) { + return thisT()->getVectorInstrCost(I->getOpcode(), Val, Index); + } + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -863,11 +863,29 @@ InstructionCost TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const { + // FIXME: Assert that Opcode is either InsertElement or ExtractElement. + // This is mentioned in the interface description and respected by all + // callers, but never asserted upon. + assert((Opcode == Instruction::ExtractElement || + Opcode == Instruction::InsertElement) && + "Expect InsertElement or ExtractElement"); InstructionCost Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } +InstructionCost TargetTransformInfo::getVectorInstrCost(const Instruction *I, + Type *Val, + unsigned Index) const { + assert((I != nullptr) && "Expect not-null instruction pointer"); + assert((I->getOpcode() == Instruction::ExtractElement || + I->getOpcode() == Instruction::InsertElement) && + "Expect InsertElement or ExtractElement"); + InstructionCost Cost = TTIImpl->getVectorInstrCost(I, Val, Index); + assert(Cost >= 0 && "TTI should not produce negative costs!"); + return Cost; +} + InstructionCost TargetTransformInfo::getReplicationShuffleCost( Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) { diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7230,7 +7230,7 @@ // scalar to vector. // The vector chain has to account for the combining cost. InstructionCost ScalarCost = - TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + TTI.getVectorInstrCost(Transition, PromotedType, Index); InstructionCost VectorCost = StoreExtractCombineCost; enum TargetTransformInfo::TargetCostKind CostKind = TargetTransformInfo::TCK_RecipThroughput; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -173,8 +173,11 @@ InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); + InstructionCost getVectorInstrCost(const Instruction *I, Type *Val, + unsigned Index); InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1857,6 +1857,44 @@ return ST->getVectorInsertExtractBaseCost(); } +InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction *I, + Type *Val, unsigned Index) { + unsigned Opcode = I->getOpcode(); + InstructionCost cost = this->getVectorInstrCost(Opcode, Val, Index); + + auto IsExtractedElementUsedAsInteger = + [Val](const Instruction *Inst) -> bool { + if (!isa_and_nonnull(Inst) || + !Val->getScalarType()->isIntegerTy()) + return false; + + // According to NEON programmer guide, other than multiply instructions, + // instructions that access scalars can access any element in the register + // file. + // + // The cost of extracting a scalar element from a vector register depends + // on how scalar will be used: + // 1. If users could use scalars in vector registers directly, the + // extract-element + // operation is essentially free. + // 2. If the user instruction requires core register as operand (i.e., + // cannot use + // scalars in vector register), an explicit move operation will be + // codegen'd. + + // FIXME: + // Do more accurate cost estimation by analyzing the uses of instruction. + + return !Inst->use_empty(); + }; + + // 'cost' might be an optimistic 0 when lane is 0. + // Returns the base cost if we know an explicit move is needed. + return IsExtractedElementUsedAsInteger(I) + ? ST->getVectorInsertExtractBaseCost() + : cost; +} + InstructionCost AArch64TTIImpl::getArithmeticInstrCost( unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -159,6 +159,7 @@ bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef Indices = {}) const; + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); bool isSourceOfDivergence(const Value *V) const; diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h @@ -60,6 +60,7 @@ unsigned getMaxInterleaveFactor(unsigned VF); InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); }; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -237,6 +237,7 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -151,6 +151,7 @@ TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -123,6 +123,7 @@ CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -107,6 +107,7 @@ CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -67,6 +67,7 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -146,6 +146,7 @@ CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + using BaseT::getVectorInstrCost; InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); InstructionCost getScalarizationOverhead(VectorType *Ty, diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5873,8 +5873,7 @@ continue; } } - Cost -= TTIRef.getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), Idx); + Cost -= TTIRef.getVectorInstrCost(EE, EE->getVectorOperandType(), Idx); } // Add a cost for subvector extracts/inserts if required. for (const auto &Data : ExtractVectorsTys) { @@ -6107,9 +6106,8 @@ for (unsigned I : E->ReuseShuffleIndices) { if (ShuffleOrOp == Instruction::ExtractElement) { auto *EE = cast(VL[I]); - CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), - *getExtractIndex(EE)); + CommonCost -= TTI->getVectorInstrCost( + EE, EE->getVectorOperandType(), *getExtractIndex(EE)); } else { CommonCost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, Idx); @@ -6120,9 +6118,8 @@ for (Value *V : VL) { if (ShuffleOrOp == Instruction::ExtractElement) { auto *EE = cast(V); - CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement, - EE->getVectorOperandType(), - *getExtractIndex(EE)); + CommonCost += TTI->getVectorInstrCost( + EE, EE->getVectorOperandType(), *getExtractIndex(EE)); } else { --Idx; CommonCost += TTI->getVectorInstrCost(Instruction::ExtractElement, diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -270,10 +270,8 @@ Type *VecTy = Ext0->getVectorOperand()->getType(); assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types"); - InstructionCost Cost0 = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); - InstructionCost Cost1 = - TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); + InstructionCost Cost0 = TTI.getVectorInstrCost(Ext0, VecTy, Index0); + InstructionCost Cost1 = TTI.getVectorInstrCost(Ext1, VecTy, Index1); // If both costs are invalid no shuffle is needed if (!Cost0.isValid() && !Cost1.isValid()) @@ -337,10 +335,8 @@ unsigned Ext0Index = Ext0IndexC->getZExtValue(); unsigned Ext1Index = Ext1IndexC->getZExtValue(); - InstructionCost Extract0Cost = - TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index); - InstructionCost Extract1Cost = - TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index); + InstructionCost Extract0Cost = TTI.getVectorInstrCost(Ext0, VecTy, Ext0Index); + InstructionCost Extract1Cost = TTI.getVectorInstrCost(Ext1, VecTy, Ext1Index); // A more expensive extract will always be replaced by a splat shuffle. // For example, if Ext0 is more expensive: @@ -754,9 +750,8 @@ if (!VecTy) return false; - InstructionCost OldCost = - TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0); - OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1); + InstructionCost OldCost = TTI.getVectorInstrCost(Ext0, VecTy, Index0); + OldCost += TTI.getVectorInstrCost(Ext1, VecTy, Index1); OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(), CmpInst::makeCmpResultType(I0->getType()), Pred) * @@ -776,7 +771,7 @@ NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy, ShufMask); NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy); - NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex); + NewCost += TTI.getVectorInstrCost(Ext0, CmpTy, CheapIndex); // Aggressively form vector ops if the cost is equal because the transform // may enable further optimization. diff --git a/llvm/test/Analysis/CostModel/AArch64/kryo.ll b/llvm/test/Analysis/CostModel/AArch64/kryo.ll --- a/llvm/test/Analysis/CostModel/AArch64/kryo.ll +++ b/llvm/test/Analysis/CostModel/AArch64/kryo.ll @@ -21,26 +21,22 @@ ; CHECK: cost of 2 {{.*}} insertelement <2 x i64> undef, i64 undef, i32 1 %t3 = insertelement <2 x i64> undef, i64 undef, i32 0 %t4 = insertelement <2 x i64> undef, i64 undef, i32 1 - ret void } ; CHECK-LABEL: vectorInstrExtractCost define i64 @vectorInstrExtractCost(<4 x i64> %vecreg) { - - ; Vector extracts - extracting each element at index 0 is considered - ; free in the current implementation. When extracting element at index - ; 2, 2 is rounded to 0, so extracting element at index 2 has cost 0 as - ; well. - ; ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 1 - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 2 + ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 2 %t1 = extractelement <4 x i64> %vecreg, i32 1 %t2 = extractelement <4 x i64> %vecreg, i32 2 %ele = add i64 %t2, 1 %cond = icmp eq i64 %t1, %ele - ; CHECK: cost of 0 {{.*}} extractelement <4 x i64> %vecreg, i32 0 + ; Vector extracts - extracting each element should have a cost + ; if they are used as integers. + ; + ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 0 ; CHECK: cost of 2 {{.*}} extractelement <4 x i64> %vecreg, i32 3 %t0 = extractelement <4 x i64> %vecreg, i32 0 %t3 = extractelement <4 x i64> %vecreg, i32 3 diff --git a/llvm/test/Transforms/LICM/AArch64/extract-element.ll b/llvm/test/Transforms/LICM/AArch64/extract-element.ll --- a/llvm/test/Transforms/LICM/AArch64/extract-element.ll +++ b/llvm/test/Transforms/LICM/AArch64/extract-element.ll @@ -18,24 +18,23 @@ ; CHECK-NEXT: [[TMP12]] = add i64 [[TMP4]], 1 ; CHECK-NEXT: br label [[TMP3]] ; CHECK: .split.loop.exit: -; CHECK-NEXT: [[DOTLCSSA7:%.*]] = phi <1 x i64> [ [[TMP8]], [[TMP6]] ] +; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP9]], [[TMP6]] ] ; CHECK-NEXT: [[DOTLCSSA6:%.*]] = phi i64 [ [[TMP4]], [[TMP6]] ] ; CHECK-NEXT: [[DOTPH:%.*]] = phi i1 [ [[TMP5]], [[TMP6]] ] -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <1 x i64> [[DOTLCSSA7]], i64 0 -; CHECK-NEXT: [[TMP14:%.*]] = xor i64 [[TMP13]], -1 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[DOTLCSSA6]] -; CHECK-NEXT: [[TMP16:%.*]] = icmp uge i64 [[TMP15]], [[TMP1]] -; CHECK-NEXT: br label [[TMP17:%.*]] +; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[DOTLCSSA]], -1 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], [[DOTLCSSA6]] +; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[TMP14]], [[TMP1]] +; CHECK-NEXT: br label [[TMP16:%.*]] ; CHECK: .split.loop.exit2: ; CHECK-NEXT: [[DOTPH3:%.*]] = phi i1 [ [[TMP5]], [[TMP3]] ] ; CHECK-NEXT: [[DOTPH4:%.*]] = phi i1 [ undef, [[TMP3]] ] -; CHECK-NEXT: br label [[TMP17]] -; CHECK: 17: -; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP19:%.*]] = phi i1 [ [[TMP16]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] -; CHECK-NEXT: [[TMP20:%.*]] = xor i1 [[TMP18]], true -; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i1 true, i1 [[TMP19]] -; CHECK-NEXT: ret i1 [[TMP21]] +; CHECK-NEXT: br label [[TMP16]] +; CHECK: 16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i1 [ [[DOTPH]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH3]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP18:%.*]] = phi i1 [ [[TMP15]], [[DOTSPLIT_LOOP_EXIT]] ], [ [[DOTPH4]], [[DOTSPLIT_LOOP_EXIT2]] ] +; CHECK-NEXT: [[TMP19:%.*]] = xor i1 [[TMP17]], true +; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i1 true, i1 [[TMP18]] +; CHECK-NEXT: ret i1 [[TMP20]] ; br label %3