Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -109,24 +109,25 @@ namespace { class IRPromoter { SmallPtrSet NewInsts; - SmallVector InstsToRemove; - DenseMap TruncTysMap; + SmallPtrSet InstsToRemove; + DenseMap> TruncTysMap; SmallPtrSet Promoted; Module *M = nullptr; LLVMContext &Ctx; IntegerType *ExtTy = nullptr; IntegerType *OrigTy = nullptr; - - void PrepareConstants(SmallPtrSetImpl &Visited, - SmallPtrSetImpl &SafeToPromote); - void ExtendSources(SmallPtrSetImpl &Sources); - void PromoteTree(SmallPtrSetImpl &Visited, - SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote); - void TruncateSinks(SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks); - void Cleanup(SmallPtrSetImpl &Visited); + SmallPtrSetImpl *Visited; + SmallPtrSetImpl *Sources; + SmallPtrSetImpl *Sinks; + SmallPtrSetImpl *SafeToPromote; + + void ReplaceAllUsersOfWith(Value *From, Value *To); + void PrepareConstants(void); + void ExtendSources(void); + void ConvertTruncs(void); + void PromoteTree(void); + void TruncateSinks(void); + void Cleanup(void); public: IRPromoter(Module *M) : M(M), Ctx(M->getContext()), @@ -192,6 +193,10 @@ return V->getType()->getScalarSizeInBits() > ARMCodeGenPrepare::TypeSize; } +static bool LessThanTypeSize(Value *V) { + return V->getType()->getScalarSizeInBits() < ARMCodeGenPrepare::TypeSize; +} + /// Some instructions can use 8- and 16-bit operands, and we don't need to /// promote anything larger. We disallow booleans to make life easier when /// dealing with icmps but allow any other integer that is <= 16 bits. Void @@ -214,7 +219,7 @@ } /// Return true if the given value is a source in the use-def chain, producing -/// a narrow (i8, i16) value. These values will be zext to start the promotion +/// a narrow 'TypeSize' value. These values will be zext to start the promotion /// of the tree to i32. We guarantee that these won't populate the upper bits /// of the register. ZExt on the loads will be free, and the same for call /// return values because we only accept ones that guarantee a zeroext ret val. @@ -246,16 +251,22 @@ // proved that the data value is kept within the range of the original data // type. + // Sinks are: + // - points where the value in the register is being observed, such as an + // icmp, switch or store. + // - points where value types have to match, such as calls and returns. + // - zext are included to ease the transformation and are generally removed + // later on. if (auto *Store = dyn_cast(V)) return LessOrEqualTypeSize(Store->getValueOperand()); if (auto *Return = dyn_cast(V)) return LessOrEqualTypeSize(Return->getReturnValue()); - if (auto *Trunc = dyn_cast(V)) - return EqualTypeSize(Trunc->getOperand(0)); if (auto *ZExt = dyn_cast(V)) return GreaterThanTypeSize(ZExt); + if (auto *Switch = dyn_cast(V)) + return LessThanTypeSize(Switch->getCondition()); if (auto *ICmp = dyn_cast(V)) - return ICmp->isSigned(); + return ICmp->isSigned() || LessThanTypeSize(ICmp->getOperand(0)); return isa(V); } @@ -426,23 +437,32 @@ llvm_unreachable("unhandled opcode for narrow intrinsic"); } -static void ReplaceAllUsersOfWith(Value *From, Value *To) { +void IRPromoter::ReplaceAllUsersOfWith(Value *From, Value *To) { SmallVector Users; Instruction *InstTo = dyn_cast(To); + bool ReplacedAll = true; + + LLVM_DEBUG(dbgs() << "ARM CGP: Replacing " << *From << " with " << *To + << "\n"); + for (Use &U : From->uses()) { auto *User = cast(U.getUser()); - if (InstTo && User->isIdenticalTo(InstTo)) + if (InstTo && User->isIdenticalTo(InstTo)) { + ReplacedAll = false; continue; + } Users.push_back(User); } for (auto *U : Users) U->replaceUsesOfWith(From, To); + + if (ReplacedAll) + if (auto *I = dyn_cast(From)) + InstsToRemove.insert(I); } -void -IRPromoter::PrepareConstants(SmallPtrSetImpl &Visited, - SmallPtrSetImpl &SafeToPromote) { +void IRPromoter::PrepareConstants() { IRBuilder<> Builder{Ctx}; // First step is to prepare the instructions for mutation. Most constants // just need to be zero extended into their new type, but complications arise @@ -463,12 +483,12 @@ // immediate as operand 1, we create an equivalent instruction using a // positive immediate. That positive immediate can then be zext along with // all the other immediates later. - for (auto *V : Visited) { + for (auto *V : *Visited) { if (!isa(V)) continue; auto *I = cast(V); - if (SafeToPromote.count(I)) { + if (SafeToPromote->count(I)) { if (!isa(I)) continue; @@ -493,16 +513,16 @@ NewInst->copyIRFlags(I); NewInsts.insert(NewInst); } - InstsToRemove.push_back(I); + InstsToRemove.insert(I); I->replaceAllUsesWith(NewVal); } } } for (auto *I : NewInsts) - Visited.insert(I); + Visited->insert(I); } -void IRPromoter::ExtendSources(SmallPtrSetImpl &Sources) { +void IRPromoter::ExtendSources() { IRBuilder<> Builder{Ctx}; auto InsertZExt = [&](Value *V, Instruction *InsertPt) { @@ -520,13 +540,13 @@ I->moveAfter(InsertPt); NewInsts.insert(I); } + ReplaceAllUsersOfWith(V, ZExt); - TruncTysMap[ZExt] = TruncTysMap[V]; }; // Now, insert extending instructions between the sources and their users. LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n"); - for (auto V : Sources) { + for (auto V : *Sources) { LLVM_DEBUG(dbgs() << " - " << *V << "\n"); if (auto *I = dyn_cast(V)) InsertZExt(I, I); @@ -540,22 +560,19 @@ } } -void IRPromoter::PromoteTree(SmallPtrSetImpl &Visited, - SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks, - SmallPtrSetImpl &SafeToPromote) { +void IRPromoter::PromoteTree() { LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n"); IRBuilder<> Builder{Ctx}; // Mutate the types of the instructions within the tree. Here we handle // constant operands. - for (auto *V : Visited) { - if (Sources.count(V)) + for (auto *V : *Visited) { + if (Sources->count(V)) continue; auto *I = cast(V); - if (Sinks.count(I)) + if (Sinks->count(I)) continue; for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { @@ -578,15 +595,15 @@ // Finally, any instructions that should be promoted but haven't yet been, // need to be handled using intrinsics. - for (auto *V : Visited) { + for (auto *V : *Visited) { auto *I = dyn_cast(V); if (!I) continue; - if (Sources.count(I) || Sinks.count(I)) + if (Sources->count(I) || Sinks->count(I)) continue; - if (!shouldPromote(I) || SafeToPromote.count(I) || NewInsts.count(I)) + if (!shouldPromote(I) || SafeToPromote->count(I) || NewInsts.count(I)) continue; assert(EnableDSP && "DSP intrinisc insertion not enabled!"); @@ -600,29 +617,21 @@ Builder.SetCurrentDebugLocation(I->getDebugLoc()); Value *Args[] = { I->getOperand(0), I->getOperand(1) }; CallInst *Call = Builder.CreateCall(DSPInst, Args); - ReplaceAllUsersOfWith(I, Call); - InstsToRemove.push_back(I); NewInsts.insert(Call); - TruncTysMap[Call] = OrigTy; + ReplaceAllUsersOfWith(I, Call); } } -void IRPromoter::TruncateSinks(SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks) { +void IRPromoter::TruncateSinks() { LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n"); IRBuilder<> Builder{Ctx}; - auto InsertTrunc = [&](Value *V) -> Instruction* { + auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* { if (!isa(V) || !isa(V->getType())) return nullptr; - if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V) || - Sources.count(V)) - return nullptr; - - Type *TruncTy = TruncTysMap[V]; - if (TruncTy == ExtTy) + if ((!Promoted.count(V) && !NewInsts.count(V)) || Sources->count(V)) return nullptr; LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for " @@ -636,14 +645,15 @@ // Fix up any stores or returns that use the results of the promoted // chain. - for (auto I : Sinks) { - LLVM_DEBUG(dbgs() << " - " << *I << "\n"); + for (auto I : *Sinks) { + LLVM_DEBUG(dbgs() << "ARM CGP: For Sink: " << *I << "\n"); // Handle calls separately as we need to iterate over arg operands. if (auto *Call = dyn_cast(I)) { for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { Value *Arg = Call->getArgOperand(i); - if (Instruction *Trunc = InsertTrunc(Arg)) { + Type *Ty = TruncTysMap[Call][i]; + if (Instruction *Trunc = InsertTrunc(Arg, Ty)) { Trunc->moveBefore(Call); Call->setArgOperand(i, Trunc); } @@ -651,9 +661,20 @@ continue; } + // Special case switches because we need to truncate the condition. + if (auto *Switch = dyn_cast(I)) { + Type *Ty = TruncTysMap[Switch][0]; + if (Instruction *Trunc = InsertTrunc(Switch->getCondition(), Ty)) { + Trunc->moveBefore(Switch); + Switch->setCondition(Trunc); + } + continue; + } + // Now handle the others. for (unsigned i = 0; i < I->getNumOperands(); ++i) { - if (Instruction *Trunc = InsertTrunc(I->getOperand(i))) { + Type *Ty = TruncTysMap[I][i]; + if (Instruction *Trunc = InsertTrunc(I->getOperand(i), Ty)) { Trunc->moveBefore(I); I->setOperand(i, Trunc); } @@ -661,35 +682,32 @@ } } -void IRPromoter::Cleanup(SmallPtrSetImpl &Visited) { +void IRPromoter::Cleanup() { // Some zexts will now have become redundant, along with their trunc // operands, so remove them - for (auto V : Visited) { - if (!isa(V)) + for (auto V : *Visited) { + if (!isa(V)) continue; - auto ZExt = cast(V); + auto ZExt = cast(V); if (ZExt->getDestTy() != ExtTy) continue; Value *Src = ZExt->getOperand(0); if (ZExt->getSrcTy() == ZExt->getDestTy()) { - LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast.\n"); + LLVM_DEBUG(dbgs() << "ARM CGP: Removing unnecessary cast: " << *ZExt + << "\n"); ReplaceAllUsersOfWith(ZExt, Src); - InstsToRemove.push_back(ZExt); continue; } // For any truncs that we insert to handle zexts, we can replace the // result of the zext with the input to the trunc. - if (NewInsts.count(Src) && isa(Src)) { + if (NewInsts.count(Src) && isa(V) && isa(Src)) { auto *Trunc = cast(Src); assert(Trunc->getOperand(0)->getType() == ExtTy && "expected inserted trunc to be operating on i32"); - LLVM_DEBUG(dbgs() << "ARM CGP: Replacing zext with trunc operand: " - << *Trunc->getOperand(0)); ReplaceAllUsersOfWith(ZExt, Trunc->getOperand(0)); - InstsToRemove.push_back(ZExt); } } @@ -705,6 +723,29 @@ Promoted.clear(); } +void IRPromoter::ConvertTruncs() { + IRBuilder<> Builder{Ctx}; + + for (auto *V : *Visited) { + if (!isa(V) || Sources->count(V)) + continue; + + auto *Trunc = cast(V); + assert(LessThanTypeSize(Trunc) && "expected narrow trunc"); + + Builder.SetInsertPoint(Trunc); + unsigned NumBits = + cast(Trunc->getType())->getScalarSizeInBits(); + ConstantInt *Mask = ConstantInt::get(Ctx, APInt::getMaxValue(NumBits)); + Value *Masked = Builder.CreateAnd(Trunc->getOperand(0), Mask); + + if (auto *I = dyn_cast(Masked)) + NewInsts.insert(I); + + ReplaceAllUsersOfWith(Trunc, Masked); + } +} + void IRPromoter::Mutate(Type *OrigTy, SmallPtrSetImpl &Visited, SmallPtrSetImpl &Sources, @@ -718,28 +759,47 @@ assert(OrigTy->getPrimitiveSizeInBits() < ExtTy->getPrimitiveSizeInBits() && "original type not smaller than extended type"); - // Cache original types. - for (auto *V : Visited) - TruncTysMap[V] = V->getType(); + this->Visited = &Visited; + this->Sources = &Sources; + this->Sinks = &Sinks; + this->SafeToPromote = &SafeToPromote; + + // Cache original types of the values that will likely need truncating + for (auto *I : Sinks) { + if (auto *Call = dyn_cast(I)) { + for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { + Value *Arg = Call->getArgOperand(i); + TruncTysMap[Call].push_back(Arg->getType()); + } + } else if (auto *Switch = dyn_cast(I)) + TruncTysMap[I].push_back(Switch->getCondition()->getType()); + else { + for (unsigned i = 0; i < I->getNumOperands(); ++i) + TruncTysMap[I].push_back(I->getOperand(i)->getType()); + } + } // Convert adds and subs using negative immediates to equivalent instructions // that use positive constants. - PrepareConstants(Visited, SafeToPromote); + PrepareConstants(); // Insert zext instructions between sources and their users. - ExtendSources(Sources); + ExtendSources(); + + // Convert any truncs, that aren't sources, into AND masks. + ConvertTruncs(); // Promote visited instructions, mutating their types in place. Also insert // DSP intrinsics, if enabled, for adds and subs which would be unsafe to // promote. - PromoteTree(Visited, Sources, Sinks, SafeToPromote); + PromoteTree(); // Insert trunc instructions for use by calls, stores etc... - TruncateSinks(Sources, Sinks); + TruncateSinks(); // Finally, remove unecessary zexts and truncs, delete old instructions and // clear the data structures. - Cleanup(Visited); + Cleanup(); LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete\n"); } Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll @@ -200,11 +200,23 @@ ret i1 %retval } +; CHECK-LABEL: promote_arg_pass_to_call +; CHECK-NOT: uxt +define i16 @promote_arg_pass_to_call(i16 zeroext %arg1, i16 zeroext %arg2) { + %conv = add nuw i16 %arg1, 15 + %mul = mul nuw nsw i16 %conv, 3 + %cmp = icmp ult i16 %mul, %arg2 + %trunc = trunc i16 %arg1 to i8 + %res = call zeroext i16 @dummy4(i1 %cmp, i8 %trunc, i16 %arg1) + ret i16 %res +} + declare i32 @assert(...) declare i8 @dummy_i8(i8) declare i8 @dummy2(i8*, i8, i8) declare i16 @dummy3(i16) +declare i16 @dummy4(i1, i8, i16) declare dso_local i32 @e(...) local_unnamed_addr #1 declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -122,36 +122,6 @@ ret i1 %or } -; CHECK-COMMON-LABEL: icmp_switch_trunc: -; CHECK-COMMON-NOT: uxt -define i16 @icmp_switch_trunc(i16 zeroext %arg) { -entry: - %conv = add nuw i16 %arg, 15 - %mul = mul nuw nsw i16 %conv, 3 - %trunc = trunc i16 %arg to i3 - switch i3 %trunc, label %default [ - i3 0, label %sw.bb - i3 1, label %sw.bb.i - ] - -sw.bb: - %cmp0 = icmp ult i16 %mul, 127 - %select = select i1 %cmp0, i16 %mul, i16 127 - br label %exit - -sw.bb.i: - %cmp1 = icmp ugt i16 %mul, 34 - %select.i = select i1 %cmp1, i16 %mul, i16 34 - br label %exit - -default: - br label %exit - -exit: - %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] - ret i16 %res -} - ; We currently only handle truncs as sinks, so a uxt will still be needed for ; the icmp ugt instruction. ; CHECK-COMMON-LABEL: urem_trunc_icmps @@ -187,47 +157,6 @@ ret void } -; CHECK-COMMON-LABEL: phi_feeding_switch -; CHECK-COMMON: ldrb -; CHECK-COMMON: uxtb -define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) { -entry: - %pre = load i8, i8* %memblock, align 1 - %conv = trunc i16 %arg to i8 - br label %header - -header: - %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ] - %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ] - %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ] - switch i8 %phi.0, label %default [ - i8 43, label %for.inc.i - i8 45, label %for.inc.i.i - ] - -for.inc.i: - %xor = xor i8 %phi.1, 1 - br label %latch - -for.inc.i.i: - %and = and i8 %phi.1, 3 - br label %latch - -default: - %sub = sub i8 %phi.0, 1 - %cmp2 = icmp ugt i8 %sub, 4 - br i1 %cmp2, label %latch, label %exit - -latch: - %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ] - %count = add nuw i8 %phi.2, 1 - store i8 %count, i8* %store, align 1 - br label %header - -exit: - ret void -} - ; Check that %exp requires uxth in all cases, and will also be required to ; promote %1 for the call - unless we can generate a uadd16. ; CHECK-COMMON-LABEL: zext_load_sink_call: @@ -254,40 +183,6 @@ ret i32 %exitval } -%class.ae = type { i8 } -%class.x = type { i8 } -%class.v = type { %class.q } -%class.q = type { i16 } - -; CHECK-COMMON-LABEL: trunc_i16_i9_switch -; CHECK-COMMON-NOT: uxt -define i32 @trunc_i16_i9_switch(%class.ae* %this) { -entry: - %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this) - %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call) - %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0 - %1 = load i16, i16* %0, align 2 - %2 = trunc i16 %1 to i9 - %trunc = and i9 %2, -64 - switch i9 %trunc, label %cleanup.fold.split [ - i9 0, label %cleanup - i9 -256, label %if.then7 - ] - -if.then7: - %3 = and i16 %1, 7 - %tobool = icmp eq i16 %3, 0 - %cond = select i1 %tobool, i32 2, i32 1 - br label %cleanup - -cleanup.fold.split: - br label %cleanup - -cleanup: - %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ] - ret i32 %retval.0 -} - ; CHECK-COMMON-LABEL: bitcast_i16 ; CHECK-COMMON-NOT: uxt define i16 @bitcast_i16(i16 zeroext %arg0, i16 zeroext %arg1) { @@ -332,8 +227,6 @@ ret i8 %res } -declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr -declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr declare i32 @dummy(i32, i32) @d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1 @@ -583,6 +476,8 @@ ret i8 %retval } +; CHECK-COMMON-LABEL: bitcast_i1 +; CHECK-COMMON-NOT: uxt define i32 @bitcast_i1(i16 zeroext %a, i32 %b, i32 %c) { entry: %0 = bitcast i1 1 to i1 @@ -601,3 +496,40 @@ %retval = phi i32 [ %select, %if.then ], [ 0, %entry ] ret i32 %retval } + +; CHECK-COMMON-LABEL: search_back_through_trunc +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp +; CHECK-COMMON: strb +; CHECK-COMMON: strb +define void @search_back_through_trunc(i8* %a, i8* %b, i8* %c, i8* %d, i16* %e) { +entry: + %0 = load i8, i8* %a, align 1 + %conv106 = zext i8 %0 to i16 + %shl = shl nuw i16 %conv106, 8 + %1 = load i8, i8* %b, align 1 + %conv108 = zext i8 %1 to i16 + %or109 = or i16 %shl, %conv108 + %2 = load i8, i8* %c, align 1 + %conv119 = zext i8 %2 to i16 + %shl120 = shl nuw i16 %conv119, 8 + %3 = load i8, i8* %d, align 1 + %conv122 = zext i8 %3 to i16 + %or123 = or i16 %shl120, %conv122 + %cmp133 = icmp eq i16 %or109, %or123 + br i1 %cmp133, label %if.end183, label %if.else136 + +if.else136: + %4 = load i16, i16* %e, align 2 + %extract.t854 = trunc i16 %4 to i8 + %extract856 = lshr i16 %4, 8 + %extract.t857 = trunc i16 %extract856 to i8 + br label %if.end183 + +if.end183: + %w.0.off0 = phi i8 [ %extract.t854, %if.else136 ], [ %1, %entry ] + %w.0.off8 = phi i8 [ %extract.t857, %if.else136 ], [ %2, %entry ] + store i8 %w.0.off8, i8* %c, align 1 + store i8 %w.0.off0, i8* %d, align 1 + ret void +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll @@ -172,3 +172,15 @@ exit: ret i16 %unrelated } + +; CHECK-COMMON-LABEL: promote_arg_return +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: strb +define i16 @promote_arg_return(i16 zeroext %arg1, i16 zeroext %arg2, i8* %res) { + %add = add nuw i16 %arg1, 15 + %mul = mul nuw nsw i16 %add, 3 + %cmp = icmp ult i16 %mul, %arg2 + %conv = zext i1 %cmp to i8 + store i8 %conv, i8* %res + ret i16 %arg1 +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-switch.ll @@ -0,0 +1,168 @@ +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s + +; CHECK-LABEL: truncate_source_phi_switch +; CHECK: ldrb +; CHECK: uxtb +define void @truncate_source_phi_switch(i8* %memblock, i8* %store, i16 %arg) { +entry: + %pre = load i8, i8* %memblock, align 1 + %conv = trunc i16 %arg to i8 + br label %header + +header: + %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ] + %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ] + %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ] + switch i8 %phi.0, label %default [ + i8 43, label %for.inc.i + i8 45, label %for.inc.i.i + ] + +for.inc.i: + %xor = xor i8 %phi.1, 1 + br label %latch + +for.inc.i.i: + %and = and i8 %phi.1, 3 + br label %latch + +default: + %sub = sub i8 %phi.0, 1 + %cmp2 = icmp ugt i8 %sub, 4 + br i1 %cmp2, label %latch, label %exit + +latch: + %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ] + %count = add nuw i8 %phi.2, 1 + store i8 %count, i8* %store, align 1 + br label %header + +exit: + ret void +} + +; CHECK-LABEL: icmp_switch_source: +; CHECK-NOT: uxt +define i16 @icmp_switch_source(i16 zeroext %arg) { +entry: + %conv = add nuw i16 %arg, 15 + %mul = mul nuw nsw i16 %conv, 3 + switch i16 %arg, label %default [ + i16 0, label %sw.bb + i16 1, label %sw.bb.i + ] + +sw.bb: + %cmp0 = icmp ult i16 %mul, 127 + %select = select i1 %cmp0, i16 %mul, i16 127 + br label %exit + +sw.bb.i: + %cmp1 = icmp ugt i16 %mul, 34 + %select.i = select i1 %cmp1, i16 %mul, i16 34 + br label %exit + +default: + br label %exit + +exit: + %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] + ret i16 %res +} + +; CHECK-LABEL: icmp_switch_narrow_source: +; CHECK-NOT: uxt +define i16 @icmp_switch_narrow_source(i8 zeroext %arg) { +entry: + %conv = zext i8 %arg to i16 + %add = add nuw i16 %conv, 15 + %mul = mul nuw nsw i16 %add, 3 + switch i8 %arg, label %default [ + i8 0, label %sw.bb + i8 1, label %sw.bb.i + ] + +sw.bb: + %cmp0 = icmp ult i16 %mul, 127 + %select = select i1 %cmp0, i16 %mul, i16 127 + br label %exit + +sw.bb.i: + %cmp1 = icmp ugt i16 %mul, 34 + %select.i = select i1 %cmp1, i16 %mul, i16 34 + br label %exit + +default: + br label %exit + +exit: + %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] + ret i16 %res +} + +; CHECK-LABEL: icmp_switch_trunc: +; CHECK-NOT: uxt +define i16 @icmp_switch_trunc(i16 zeroext %arg) { +entry: + %conv = add nuw i16 %arg, 15 + %mul = mul nuw nsw i16 %conv, 3 + %trunc = trunc i16 %arg to i3 + switch i3 %trunc, label %default [ + i3 0, label %sw.bb + i3 1, label %sw.bb.i + ] + +sw.bb: + %cmp0 = icmp ult i16 %mul, 127 + %select = select i1 %cmp0, i16 %mul, i16 127 + br label %exit + +sw.bb.i: + %cmp1 = icmp ugt i16 %mul, 34 + %select.i = select i1 %cmp1, i16 %mul, i16 34 + br label %exit + +default: + br label %exit + +exit: + %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] + ret i16 %res +} + +%class.ae = type { i8 } +%class.x = type { i8 } +%class.v = type { %class.q } +%class.q = type { i16 } +declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr +declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr + +; CHECK-LABEL: trunc_i16_i9_switch +; CHECK-NOT: uxt +define i32 @trunc_i16_i9_switch(%class.ae* %this) { +entry: + %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this) + %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call) + %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0 + %1 = load i16, i16* %0, align 2 + %2 = trunc i16 %1 to i9 + %trunc = and i9 %2, -64 + switch i9 %trunc, label %cleanup.fold.split [ + i9 0, label %cleanup + i9 -256, label %if.then7 + ] + +if.then7: + %3 = and i16 %1, 7 + %tobool = icmp eq i16 %3, 0 + %cond = select i1 %tobool, i32 2, i32 1 + br label %cleanup + +cleanup.fold.split: + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ] + ret i32 %retval.0 +}