Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -110,11 +110,26 @@ class IRPromoter { SmallPtrSet NewInsts; SmallVector InstsToRemove; + DenseMap TruncTysMap; + SmallPtrSet Promoted; Module *M = nullptr; LLVMContext &Ctx; + Type *ExtTy = nullptr; + Type *OrigTy = nullptr; + + void PrepareConstants(SmallPtrSetImpl &Visited, + SmallPtrSetImpl &SafeToPromote); + void ExtendSources(SmallPtrSetImpl &Sources); + void PromoteTree(SmallPtrSetImpl &Visited, + SmallPtrSetImpl &Sources, + SmallPtrSetImpl &Sinks, + SmallPtrSetImpl &SafeToPromote); + void TruncateSinks(SmallPtrSetImpl &Sources, + SmallPtrSetImpl &Sinks); public: - IRPromoter(Module *M) : M(M), Ctx(M->getContext()) { } + IRPromoter(Module *M) : M(M), Ctx(M->getContext()), + ExtTy(Type::getInt32Ty(Ctx)) { } void Cleanup() { for (auto *I : InstsToRemove) { @@ -129,14 +144,17 @@ void Mutate(Type *OrigTy, SmallPtrSetImpl &Visited, SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks); + SmallPtrSetImpl &Sinks, + SmallPtrSetImpl &SafeToPromote); }; class ARMCodeGenPrepare : public FunctionPass { const ARMSubtarget *ST = nullptr; IRPromoter *Promoter = nullptr; std::set AllVisited; + SmallPtrSet SafeToPromote; + bool isSafeOverflow(Instruction *I); bool isSupportedValue(Value *V); bool isLegalToPromote(Value *V); bool TryToPromote(Value *V); @@ -241,8 +259,8 @@ } /// Return whether the instruction can be promoted within any modifications to -/// it's operands or result. -static bool isSafeOverflow(Instruction *I) { +/// its operands or result. +bool ARMCodeGenPrepare::isSafeOverflow(Instruction *I) { // FIXME Do we need NSW too? if (isa(I) && I->hasNoUnsignedWrap()) return true; @@ -386,11 +404,13 @@ // If I is only being used by something that will require its value to be // truncated, then we don't care about the promoted result. auto *I = cast(V); - if (I->hasOneUse() && isSink(*I->use_begin())) + if (I->hasOneUse() && isSink(*I->use_begin())) { + LLVM_DEBUG(dbgs() << "ARM CGP: Only use is a sink: " << *V << "\n"); return true; + } if (isa(I)) - return isSafeOverflow(I); + return false; return true; } @@ -414,56 +434,84 @@ llvm_unreachable("unhandled opcode for narrow intrinsic"); } -void IRPromoter::Mutate(Type *OrigTy, - SmallPtrSetImpl &Visited, - SmallPtrSetImpl &Sources, - SmallPtrSetImpl &Sinks) { +static void ReplaceAllUsersOfWith(Value *From, Value *To) { + SmallVector Users; + Instruction *InstTo = dyn_cast(To); + for (Use &U : From->uses()) { + auto *User = cast(U.getUser()); + if (InstTo && User->isIdenticalTo(InstTo)) + continue; + Users.push_back(User); + } + + for (auto *U : Users) + U->replaceUsesOfWith(From, To); +} + +void +IRPromoter::PrepareConstants(SmallPtrSetImpl &Visited, + SmallPtrSetImpl &SafeToPromote) { IRBuilder<> Builder{Ctx}; - Type *ExtTy = Type::getInt32Ty(M->getContext()); - SmallPtrSet Promoted; - LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " - << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); + // First step is to prepare the instructions for mutation. Most constants + // just need to be zero extended into their new type, but complications arise + // because: + // - For nuw binary operators, negative immediates would need sign extending; + // however, instead we'll change them to positive and zext them. We can do + // this because: + // > The operators that can wrap are: add, sub, mul and shl. + // > shl interprets its second operand as unsigned and if the first operand + // is an immediate, it will need zext to be nuw. + // > I'm assuming mul cannot be nuw while using a negative immediate... + // > Which leaves the nuw add and sub to be handled; as with shl, if an + // immediate is used as operand 0, it will need zext to be nuw. + // - We also allow add and sub to safely overflow in certain circumstances + // and only when the value (operand 0) is being decreased. + // + // For adds and subs, that are either nuw or safely wrap and use a negative + // immediate as operand 1, we create an equivalent instruction using a + // positive immediate. That positive immediate can then be zext along with + // all the other immediates later. + for (auto *V : Visited) { + if (!isa(V)) + continue; - // Cache original types. - DenseMap TruncTysMap; - for (auto *V : Visited) - TruncTysMap[V] = V->getType(); + auto *I = cast(V); + if (SafeToPromote.count(I)) { - auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) { - SmallVector Users; - Instruction *InstTo = dyn_cast(To); - for (Use &U : From->uses()) { - auto *User = cast(U.getUser()); - if (InstTo && User->isIdenticalTo(InstTo)) + if (!isa(I)) continue; - Users.push_back(User); - } - for (auto *U : Users) - U->replaceUsesOfWith(From, To); - }; - - auto FixConst = [&](ConstantInt *Const, Instruction *I) { - Constant *NewConst = isSafeOverflow(I) && Const->isNegative() ? - ConstantExpr::getSExt(Const, ExtTy) : - ConstantExpr::getZExt(Const, ExtTy); - I->replaceUsesOfWith(Const, NewConst); - }; + if (auto *Const = dyn_cast(I->getOperand(1))) { + if (!Const->isNegative()) + break; + + unsigned Opc = I->getOpcode(); + assert((Opc == Instruction::Add || Opc == Instruction::Sub) && + "expected only an add or sub to use a negative imm"); + + LLVM_DEBUG(dbgs() << "ARM CGP: Adjusting " << *I << "\n"); + auto *NewConst = ConstantInt::get(Ctx, Const->getValue().abs()); + Builder.SetInsertPoint(I); + Value *NewVal = Opc == Instruction::Sub ? + Builder.CreateAdd(I->getOperand(0), NewConst) : + Builder.CreateSub(I->getOperand(0), NewConst); + LLVM_DEBUG(dbgs() << "ARM CGP: New equivalent: " << *NewVal << "\n"); + + if (auto *NewInst = dyn_cast(NewVal)) { + NewInst->copyIRFlags(I); + NewInsts.insert(NewInst); + } + InstsToRemove.push_back(I); + I->replaceAllUsesWith(NewVal); + } + } + } + for (auto *I : NewInsts) + Visited.insert(I); +} - auto InsertDSPIntrinsic = [&](Instruction *I) { - LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for " - << *I << "\n"); - Function *DSPInst = - Intrinsic::getDeclaration(M, getNarrowIntrinsic(I)); - Builder.SetInsertPoint(I); - Builder.SetCurrentDebugLocation(I->getDebugLoc()); - Value *Args[] = { I->getOperand(0), I->getOperand(1) }; - CallInst *Call = Builder.CreateCall(DSPInst, Args); - ReplaceAllUsersOfWith(I, Call); - InstsToRemove.push_back(I); - NewInsts.insert(Call); - TruncTysMap[Call] = OrigTy; - }; +void IRPromoter::ExtendSources(SmallPtrSetImpl &Sources) { + IRBuilder<> Builder{Ctx}; auto InsertZExt = [&](Value *V, Instruction *InsertPt) { LLVM_DEBUG(dbgs() << "ARM CGP: Inserting ZExt for " << *V << "\n"); @@ -480,7 +528,8 @@ TruncTysMap[ZExt] = TruncTysMap[V]; }; - // First, insert extending instructions between the sources and their users. + + // Now, insert extending instructions between the sources and their users. LLVM_DEBUG(dbgs() << "ARM CGP: Promoting sources:\n"); for (auto V : Sources) { LLVM_DEBUG(dbgs() << " - " << *V << "\n"); @@ -494,9 +543,17 @@ } Promoted.insert(V); } +} +void IRPromoter::PromoteTree(SmallPtrSetImpl &Visited, + SmallPtrSetImpl &Sources, + SmallPtrSetImpl &Sinks, + SmallPtrSetImpl &SafeToPromote) { LLVM_DEBUG(dbgs() << "ARM CGP: Mutating the tree..\n"); - // Then mutate the types of the instructions within the tree. Here we handle + + IRBuilder<> Builder{Ctx}; + + // Mutate the types of the instructions within the tree. Here we handle // constant operands. for (auto *V : Visited) { if (Sources.count(V)) @@ -511,9 +568,10 @@ if ((Op->getType() == ExtTy) || !isa(Op->getType())) continue; - if (auto *Const = dyn_cast(Op)) - FixConst(Const, I); - else if (isa(Op)) + if (auto *Const = dyn_cast(Op)) { + Constant *NewConst = ConstantExpr::getZExt(Const, ExtTy); + I->setOperand(i, NewConst); + } else if (isa(Op)) I->setOperand(i, UndefValue::get(ExtTy)); } @@ -523,20 +581,42 @@ } } - // Now we need to remove any zexts that have become unnecessary, as well - // as insert any intrinsics. + // Finally, any instructions that should be promoted but haven't yet been, + // need to be handled using intrinsics. for (auto *V : Visited) { - if (Sources.count(V)) + auto *I = dyn_cast(V); + if (!I) continue; - if (!shouldPromote(V) || isPromotedResultSafe(V)) + if (Sources.count(I) || Sinks.count(I)) continue; + if (!shouldPromote(I) || SafeToPromote.count(I) || NewInsts.count(I)) + continue; + assert(EnableDSP && "DSP intrinisc insertion not enabled!"); // Replace unsafe instructions with appropriate intrinsic calls. - InsertDSPIntrinsic(cast(V)); + LLVM_DEBUG(dbgs() << "ARM CGP: Inserting DSP intrinsic for " + << *I << "\n"); + Function *DSPInst = + Intrinsic::getDeclaration(M, getNarrowIntrinsic(I)); + Builder.SetInsertPoint(I); + Builder.SetCurrentDebugLocation(I->getDebugLoc()); + Value *Args[] = { I->getOperand(0), I->getOperand(1) }; + CallInst *Call = Builder.CreateCall(DSPInst, Args); + ReplaceAllUsersOfWith(I, Call); + InstsToRemove.push_back(I); + NewInsts.insert(Call); + TruncTysMap[Call] = OrigTy; } +} + +void IRPromoter::TruncateSinks(SmallPtrSetImpl &Sources, + SmallPtrSetImpl &Sinks) { + LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n"); + + IRBuilder<> Builder{Ctx}; auto InsertTrunc = [&](Value *V) -> Instruction* { if (!isa(V) || !isa(V->getType())) @@ -558,7 +638,6 @@ return Trunc; }; - LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the sinks:\n"); // Fix up any stores or returns that use the results of the promoted // chain. for (auto I : Sinks) { @@ -584,6 +663,36 @@ } } } +} + +void IRPromoter::Mutate(Type *OrigTy, + SmallPtrSetImpl &Visited, + SmallPtrSetImpl &Sources, + SmallPtrSetImpl &Sinks, + SmallPtrSetImpl &SafeToPromote) { + LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " + << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); + this->OrigTy = OrigTy; + + // Cache original types. + for (auto *V : Visited) + TruncTysMap[V] = V->getType(); + + // Convert adds and subs using negative immediates to equivalent instructions + // that use positive constants. + PrepareConstants(Visited, SafeToPromote); + + // Insert zext instructions between sources and their users. + ExtendSources(Sources); + + // Promote visited instructions, mutating their types in place. Also insert + // DSP intrinsics, if enabled, for adds and subs which would be unsafe to + // promote. + PromoteTree(Visited, Sources, Sinks, SafeToPromote); + + // Finally, insert trunc instructions for use by calls, stores etc... + TruncateSinks(Sources, Sinks); + LLVM_DEBUG(dbgs() << "ARM CGP: Mutation complete:\n"); LLVM_DEBUG(dbgs(); for (auto *V : Sources) @@ -651,11 +760,20 @@ /// smaller than the targeted promoted type. Check that we're not trying to /// promote something larger than our base 'TypeSize' type. bool ARMCodeGenPrepare::isLegalToPromote(Value *V) { - if (isPromotedResultSafe(V)) - return true; auto *I = dyn_cast(V); if (!I) + return true; + + if (SafeToPromote.count(I)) + return true; + + if (isPromotedResultSafe(V) || isSafeOverflow(I)) { + SafeToPromote.insert(I); + return true; + } + + if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub) return false; // If promotion is not safe, can we use a DSP instruction to natively @@ -666,9 +784,6 @@ if (ST->isThumb() && !ST->hasThumb2()) return false; - if (I->getOpcode() != Instruction::Add && I->getOpcode() != Instruction::Sub) - return false; - // TODO // Would it be profitable? For Thumb code, these parallel DSP instructions // are only Thumb-2, so we wouldn't be able to dual issue on Cortex-M33. For @@ -680,6 +795,7 @@ return false; } } + LLVM_DEBUG(dbgs() << "ARM CGP: Will use an intrinsic for: " << *I << "\n"); return true; } @@ -689,6 +805,8 @@ if (TypeSize > 16 || TypeSize < 8) return false; + SafeToPromote.clear(); + if (!isSupportedValue(V) || !shouldPromote(V) || !isLegalToPromote(V)) return false; @@ -698,9 +816,8 @@ SetVector WorkList; SmallPtrSet Sources; SmallPtrSet Sinks; - WorkList.insert(V); SmallPtrSet CurrentVisited; - CurrentVisited.clear(); + WorkList.insert(V); // Return true if V was added to the worklist as a supported instruction, // if it was already visited, or if we don't need to explore it (e.g. @@ -783,7 +900,7 @@ if (ToPromote < 2) return false; - Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks); + Promoter->Mutate(OrigTy, CurrentVisited, Sources, Sinks, SafeToPromote); return true; } Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-calls.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s + +; Check that the pass doesn't try to promote the immediate parameters. +; CHECK-LABEL: call_with_imms +; CHECK-NOT: uxt +define i8 @call_with_imms(i8* %arg) { + %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0) + %cmp = icmp eq i8 %call, 0 + %res = select i1 %cmp, i8 %call, i8 1 + ret i8 %res +} + +; Test that the call result is still extended. +; CHECK-LABEL: test_call: +; CHECK: bl +; CHECK-NEXT: sxtb r1, r0 +define i16 @test_call(i8 zeroext %arg) { + %call = call i8 @dummy_i8(i8 %arg) + %cmp = icmp ult i8 %call, 128 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +; Test that the transformation bails when it finds that i16 is larger than i8. +; TODO: We should be able to remove the uxtb in these cases. +; CHECK-LABEL: promote_i8_sink_i16_1 +; CHECK: bl dummy_i8 +; CHECK: add{{.*}} r0, #1 +; CHECK: uxtb r0, r0 +; CHECK: cmp r0 +define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { + %call = tail call zeroext i8 @dummy_i8(i8 %arg0) + %add = add nuw i8 %call, 1 + %conv = zext i8 %add to i16 + %cmp = icmp ne i16 %conv, %arg1 + %sel = select i1 %cmp, i16 %arg1, i16 %arg2 + %res = tail call zeroext i16 @dummy3(i16 %sel) + ret i16 %res +} + +; CHECK-LABEL: promote_i8_sink_i16_2 +; CHECK: bl dummy_i8 +; CHECK: add{{.*}} r0, #1 +; CHECK-NOT: uxt +; CHECK: cmp r0 +define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) { + %call = tail call zeroext i8 @dummy_i8(i8 %arg0) + %add = add nuw i8 %call, 1 + %cmp = icmp ne i8 %add, %arg1 + %conv = zext i8 %arg1 to i16 + %sel = select i1 %cmp, i16 %conv, i16 %arg2 + %res = tail call zeroext i16 @dummy3(i16 %sel) + ret i16 %res +} + +@uc = global i8 42, align 1 +@LL = global i64 0, align 8 + +; CHECK-LABEL: zext_i64 +; CHECK: ldrb +; CHECK: strd +define void @zext_i64() { +entry: + %0 = load i8, i8* @uc, align 1 + %conv = zext i8 %0 to i64 + store i64 %conv, i64* @LL, align 8 + %cmp = icmp eq i8 %0, 42 + %conv1 = zext i1 %cmp to i32 + %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1) + ret void +} + +@a = global i16* null, align 4 +@b = global i32 0, align 4 + +; CHECK-LABEL: constexpr +; CHECK: uxth +define i32 @constexpr() { +entry: + store i32 ptrtoint (i32* @b to i32), i32* @b, align 4 + %0 = load i16*, i16** @a, align 4 + %1 = load i16, i16* %0, align 2 + %or = or i16 %1, ptrtoint (i32* @b to i16) + store i16 %or, i16* %0, align 2 + %cmp = icmp ne i16 %or, 4 + %conv3 = zext i1 %cmp to i32 + %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2 + ret i32 undef +} + +; The call to safe_lshift_func takes two parameters, but they're the same value +; just one is zext. We do support zext now, so the transformation should +; trigger and we don't want see uxtb here. +; CHECK-LABEL: call_zext_i8_i32 +; CHECK-NOT: uxt +define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) { +for.cond8.preheader: + %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef) + %tobool219 = icmp eq i8 %call217, 0 + br i1 %tobool219, label %for.end411, label %for.cond273.preheader + +for.cond273.preheader: ; preds = %for.cond8.preheader + %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ] + %conv218.le = zext i8 %call217.lcssa to i32 + %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le) + unreachable + +for.end411: ; preds = %for.cond8.preheader + %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4) + unreachable +} + +%struct.anon = type { i32 } + +@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@g_82 = hidden local_unnamed_addr global i32 0, align 4 + +; Test that the transform bails on finding %conv4, a trunc +; CHECK-LABEL: call_return_pointer +; CHECK: sxth +; CHECK: uxt +define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 { +entry: + %conv1 = zext i8 %p_13 to i16 + %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef) + %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4 + %conv2 = trunc i32 %0 to i16 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ] + %tobool = icmp eq i8 %p_13.addr.0, 0 + br i1 %tobool, label %for.cond.backedge, label %if.then + +for.cond.backedge: ; preds = %for.cond, %if.then + %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ] + br label %for.cond + +if.then: ; preds = %for.cond + %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2) + %conv4 = trunc i16 %call3 to i8 + br label %for.cond.backedge +} + +; Transform will bail because of the zext +; Check that d.sroa.0.0.be is promoted passed directly into the tail call. +; CHECK-LABEL: check_zext_phi_call_arg +; CHECK: uxt +define i32 @check_zext_phi_call_arg() { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ] + %tobool = icmp eq i16 %d.sroa.0.0, 0 + br i1 %tobool, label %for.cond.backedge, label %if.then + +for.cond.backedge: ; preds = %for.cond, %if.then + %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ] + br label %for.cond + +if.then: ; preds = %for.cond + %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32 + %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2 + br label %for.cond.backedge +} + +declare i32 @assert(...) +declare i8 @dummy_i8(i8) +declare i8 @dummy2(i8*, i8, i8) +declare i16 @dummy3(i16) + +declare dso_local i32 @e(...) local_unnamed_addr #1 +declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 + +declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66) +declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2) +declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64) +declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32) +declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext) Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-casts.ll @@ -0,0 +1,340 @@ +; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; Transform will fail because the trunc is not a sink. +; CHECK-COMMON-LABEL: dsp_trunc +; CHECK-COMMON: add [[ADD:[^ ]+]], +; CHECK-DSP-NEXT: ldrh r1, [r3] +; CHECK-DSP-NEXT: ldrh r2, [r2] +; CHECK-DSP-NEXT: subs r1, r1, [[ADD]] +; CHECK-DSP-NEXT: add r0, r2 +; CHECK-DSP-NEXT: uxth r3, r1 +; CHECK-DSP-NEXT: uxth r2, r0 +; CHECK-DSP-NEXT: cmp r2, r3 + +; With DSP-IMM, we could have: +; movs r1, #0 +; uxth r0, r0 +; usub16 r1, r1, r0 +; ldrh r0, [r2] +; ldrh r3, [r3] +; usub16 r0, r0, r1 +; uadd16 r1, r3, r1 +; cmp r0, r1 +define i16 @dsp_trunc(i32 %arg0, i32 %arg1, i16* %gep0, i16* %gep1) { +entry: + %add0 = add i32 %arg0, %arg1 + %conv0 = trunc i32 %add0 to i16 + %sub0 = sub i16 0, %conv0 + %load0 = load i16, i16* %gep0, align 2 + %load1 = load i16, i16* %gep1, align 2 + %sub1 = sub i16 %load0, %sub0 + %add1 = add i16 %load1, %sub0 + %cmp = icmp ult i16 %sub1, %add1 + %res = select i1 %cmp, i16 %add1, i16 %sub1 + ret i16 %res +} + +; CHECK-COMMON-LABEL: trunc_i16_i8 +; CHECK-COMMON: ldrh +; CHECK-COMMON: uxtb +; CHECK-COMMON: cmp +define i8 @trunc_i16_i8(i16* %ptr, i16 zeroext %arg0, i8 zeroext %arg1) { +entry: + %0 = load i16, i16* %ptr + %1 = add i16 %0, %arg0 + %2 = trunc i16 %1 to i8 + %3 = icmp ugt i8 %2, %arg1 + %4 = select i1 %3, i8 %2, i8 %arg1 + ret i8 %4 +} + +; The pass perform the transform, but a uxtb will still be inserted to handle +; the zext to the icmp. +; CHECK-COMMON-LABEL: icmp_i32_zext: +; CHECK-COMMON: sub +; CHECK-COMMON: uxtb +; CHECK-COMMON: cmp +define i8 @icmp_i32_zext(i8* %ptr) { +entry: + %gep = getelementptr inbounds i8, i8* %ptr, i32 0 + %0 = load i8, i8* %gep, align 1 + %1 = sub nuw nsw i8 %0, 1 + %conv44 = zext i8 %0 to i32 + br label %preheader + +preheader: + br label %body + +body: + %2 = phi i8 [ %1, %preheader ], [ %3, %if.end ] + %si.0274 = phi i32 [ %conv44, %preheader ], [ %inc, %if.end ] + %conv51266 = zext i8 %2 to i32 + %cmp52267 = icmp eq i32 %si.0274, %conv51266 + br i1 %cmp52267, label %if.end, label %exit + +if.end: + %inc = add i32 %si.0274, 1 + %gep1 = getelementptr inbounds i8, i8* %ptr, i32 %inc + %3 = load i8, i8* %gep1, align 1 + br label %body + +exit: + ret i8 %2 +} + +; Won't don't handle sext +; CHECK-COMMON-LABEL: icmp_sext_zext_store_i8_i16 +; CHECK-COMMON: ldrb +; CHECK-COMMON: ldrsh +define i32 @icmp_sext_zext_store_i8_i16() { +entry: + %0 = load i8, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @d_uch, i32 0, i32 2), align 1 + %conv = zext i8 %0 to i16 + store i16 %conv, i16* @sh1, align 2 + %conv1 = zext i8 %0 to i32 + %1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @d_sh, i32 0, i32 2), align 2 + %conv2 = sext i16 %1 to i32 + %cmp = icmp eq i32 %conv1, %conv2 + %conv3 = zext i1 %cmp to i32 + ret i32 %conv3 +} + +; CHECK-COMMON-LABEL: or_icmp_ugt: +; CHECK-COMMON: ldrb +; CHECK-COMMON: sub.w +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp.w +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp +define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) { +entry: + %0 = load i8, i8* %ptr + %1 = zext i8 %0 to i32 + %mul = shl nuw nsw i32 %1, 1 + %add0 = add nuw nsw i32 %mul, 6 + %cmp0 = icmp ne i32 %arg, %add0 + %add1 = add i8 %0, -1 + %cmp1 = icmp ugt i8 %add1, 3 + %or = or i1 %cmp0, %cmp1 + ret i1 %or +} + +; CHECK-COMMON-LABEL: icmp_switch_trunc: +; CHECK-COMMON-NOT: uxt +define i16 @icmp_switch_trunc(i16 zeroext %arg) { +entry: + %conv = add nuw i16 %arg, 15 + %mul = mul nuw nsw i16 %conv, 3 + %trunc = trunc i16 %arg to i3 + switch i3 %trunc, label %default [ + i3 0, label %sw.bb + i3 1, label %sw.bb.i + ] + +sw.bb: + %cmp0 = icmp ult i16 %mul, 127 + %select = select i1 %cmp0, i16 %mul, i16 127 + br label %exit + +sw.bb.i: + %cmp1 = icmp ugt i16 %mul, 34 + %select.i = select i1 %cmp1, i16 %mul, i16 34 + br label %exit + +default: + br label %exit + +exit: + %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] + ret i16 %res +} + +; We currently only handle truncs as sinks, so a uxt will still be needed for +; the icmp ugt instruction. +; CHECK-COMMON-LABEL: urem_trunc_icmps +; CHECK-COMMON: cmp +; CHECK-COMMON: uxt +; CHECK-COMMON: cmp +define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { +entry: + %ptr = load i16*, i16** %in, align 4 + %ld = load i16, i16* %ptr, align 2 + %cmp.i = icmp eq i16 %ld, 0 + br i1 %cmp.i, label %exit, label %cond.false.i + +cond.false.i: + %rem = urem i16 5, %ld + %extract.t = trunc i16 %rem to i8 + br label %body + +body: + %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] + %cmp = icmp ugt i8 %cond.in.i.off0, 7 + %conv5 = zext i1 %cmp to i32 + store i32 %conv5, i32* %g, align 4 + %.pr = load i32, i32* %k, align 4 + %tobool13150 = icmp eq i32 %.pr, 0 + br i1 %tobool13150, label %for.inc, label %exit + +for.inc: + %add = add nuw i8 %cond.in.i.off0, 1 + br label %body + +exit: + ret void +} + +; CHECK-COMMON-LABEL: phi_feeding_switch +; CHECK-COMMON: ldrb +; CHECK-COMMON: uxtb +define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) { +entry: + %pre = load i8, i8* %memblock, align 1 + %conv = trunc i16 %arg to i8 + br label %header + +header: + %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ] + %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ] + %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ] + switch i8 %phi.0, label %default [ + i8 43, label %for.inc.i + i8 45, label %for.inc.i.i + ] + +for.inc.i: + %xor = xor i8 %phi.1, 1 + br label %latch + +for.inc.i.i: + %and = and i8 %phi.1, 3 + br label %latch + +default: + %sub = sub i8 %phi.0, 1 + %cmp2 = icmp ugt i8 %sub, 4 + br i1 %cmp2, label %latch, label %exit + +latch: + %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ] + %count = add nuw i8 %phi.2, 1 + store i8 %count, i8* %store, align 1 + br label %header + +exit: + ret void +} + +; Check that %exp requires uxth in all cases, and will also be required to +; promote %1 for the call - unless we can generate a uadd16. +; CHECK-COMMON-LABEL: zext_load_sink_call: +; CHECK-COMMON: uxt +; uadd16 +; cmp +; CHECK-COMMON: uxt +define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) { +entry: + %0 = load i16, i16* %ptr, align 4 + %1 = add i16 %exp, 3 + %cmp = icmp eq i16 %0, %exp + br i1 %cmp, label %exit, label %if.then + +if.then: + %conv0 = zext i16 %0 to i32 + %conv1 = zext i16 %1 to i32 + %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1) + br label %exit + +exit: + %exitval = phi i32 [ %call, %if.then ], [ 0, %entry ] + ret i32 %exitval +} + +%class.ae = type { i8 } +%class.x = type { i8 } +%class.v = type { %class.q } +%class.q = type { i16 } + +; CHECK-COMMON-LABEL: trunc_i16_i9_switch +; CHECK-COMMON-NOT: uxt +define i32 @trunc_i16_i9_switch(%class.ae* %this) { +entry: + %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this) + %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call) + %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0 + %1 = load i16, i16* %0, align 2 + %2 = trunc i16 %1 to i9 + %trunc = and i9 %2, -64 + switch i9 %trunc, label %cleanup.fold.split [ + i9 0, label %cleanup + i9 -256, label %if.then7 + ] + +if.then7: + %3 = and i16 %1, 7 + %tobool = icmp eq i16 %3, 0 + %cond = select i1 %tobool, i32 2, i32 1 + br label %cleanup + +cleanup.fold.split: + br label %cleanup + +cleanup: + %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ] + ret i32 %retval.0 +} + +; CHECK-COMMON-LABEL: bitcast_i16 +; CHECK-COMMON-NOT: uxt +define i16 @bitcast_i16(i16 zeroext %arg0, i16 zeroext %arg1) { +entry: + %cast = bitcast i16 12345 to i16 + %add = add nuw i16 %arg0, 1 + %cmp = icmp ule i16 %add, %cast + %res = select i1 %cmp, i16 %arg1, i16 32657 + ret i16 %res +} + +; CHECK-COMMON-LABEL: bitcast_i8 +; CHECK-COMMON-NOT: uxt +define i8 @bitcast_i8(i8 zeroext %arg0, i8 zeroext %arg1) { +entry: + %cast = bitcast i8 127 to i8 + %mul = shl nuw i8 %arg0, 1 + %cmp = icmp uge i8 %mul, %arg1 + %res = select i1 %cmp, i8 %cast, i8 128 + ret i8 %res +} + +; CHECK-COMMON-LABEL: bitcast_i16_minus +; CHECK-COMMON-NOT: uxt +define i16 @bitcast_i16_minus(i16 zeroext %arg0, i16 zeroext %arg1) { +entry: + %cast = bitcast i16 -12345 to i16 + %xor = xor i16 %arg0, 7 + %cmp = icmp eq i16 %xor, %arg1 + %res = select i1 %cmp, i16 %cast, i16 32657 + ret i16 %res +} + +; CHECK-COMMON-LABEL: bitcast_i8_minus +; CHECK-COMMON-NOT: uxt +define i8 @bitcast_i8_minus(i8 zeroext %arg0, i8 zeroext %arg1) { +entry: + %cast = bitcast i8 -127 to i8 + %and = and i8 %arg0, 3 + %cmp = icmp ne i8 %and, %arg1 + %res = select i1 %cmp, i8 %cast, i8 128 + ret i8 %res +} + +declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr +declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr +declare i32 @dummy(i32, i32) + +@d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1 +@sh1 = hidden local_unnamed_addr global i16 0, align 2 +@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2 Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-icmps.ll @@ -0,0 +1,312 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; CHECK-COMMON-LABEL: test_ult_254_inc_imm: +; CHECK-DSP: adds r0, #1 +; CHECK-DSP-NEXT: uxtb r1, r0 +; CHECK-DSP-NEXT: movs r0, #47 +; CHECK-DSP-NEXT: cmp r1, #254 +; CHECK-DSP-NEXT: it lo +; CHECK-DSP-NEXT: movlo r0, #35 + +; CHECK-DSP-IMM: movs r1, #1 +; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 +; CHECK-DSP-IMM-NEXT: movs r0, #47 +; CHECK-DSP-IMM-NEXT: cmp r1, #254 +; CHECK-DSP-IMM-NEXT: it lo +; CHECK-DSP-IMM-NEXT: movlo r0, #35 +define i32 @test_ult_254_inc_imm(i8 zeroext %x) { +entry: + %add = add i8 %x, 1 + %cmp = icmp ult i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_slt_254_inc_imm +; CHECK-COMMON: adds +; CHECK-COMMON: sxtb +define i32 @test_slt_254_inc_imm(i8 signext %x) { +entry: + %add = add i8 %x, 1 + %cmp = icmp slt i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_ult_254_inc_var: +; CHECK-NODSP: add r0, r1 +; CHECK-NODSP-NEXT: uxtb r1, r0 +; CHECK-NODSP-NEXT: movs r0, #47 +; CHECK-NODSP-NEXT: cmp r1, #254 +; CHECK-NODSP-NEXT: it lo +; CHECK-NODSP-NEXT: movlo r0, #35 + +; CHECK-DSP: uadd8 r1, r0, r1 +; CHECK-DSP-NEXT: movs r0, #47 +; CHECK-DSP-NEXT: cmp r1, #254 +; CHECK-DSP-NEXT: it lo +; CHECK-DSP-NEXT: movlo r0, #35 +define i32 @test_ult_254_inc_var(i8 zeroext %x, i8 zeroext %y) { +entry: + %add = add i8 %x, %y + %cmp = icmp ult i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_sle_254_inc_var +; CHECK-COMMON: add +; CHECK-COMMON: sxtb +; CHECK-COMMON: cmp +define i32 @test_sle_254_inc_var(i8 %x, i8 %y) { +entry: + %add = add i8 %x, %y + %cmp = icmp sle i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_ugt_1_dec_imm: +; CHECK-COMMON: subs r1, r0, #1 +; CHECK-COMMON-NEXT: movs r0, #47 +; CHECK-COMMON-NEXT: cmp r1, #1 +; CHECK-COMMON-NEXT: it hi +; CHECK-COMMON-NEXT: movhi r0, #35 +define i32 @test_ugt_1_dec_imm(i8 zeroext %x) { +entry: + %add = add i8 %x, -1 + %cmp = icmp ugt i8 %add, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_sgt_1_dec_imm +; CHECK-COMMON: subs +; CHECK-COMMON: sxtb +; CHECK-COMMON: cmp +define i32 @test_sgt_1_dec_imm(i8 %x) { +entry: + %add = add i8 %x, -1 + %cmp = icmp sgt i8 %add, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_ugt_1_dec_var: +; CHECK-NODSP: subs r0, r0, r1 +; CHECK-NODSP-NEXT: uxtb r1, r0 +; CHECK-NODSP-NEXT: movs r0, #47 +; CHECK-NODSP-NEXT: cmp r1, #1 +; CHECK-NODSP-NEXT: it hi +; CHECK-NODSP-NEXT: movhi r0, #35 + +; CHECK-DSP: usub8 r1, r0, r1 +; CHECK-DSP-NEXT: movs r0, #47 +; CHECK-DSP-NEXT: cmp r1, #1 +; CHECK-DSP-NEXT: it hi +; CHECK-DSP-NEXT: movhi r0, #35 +define i32 @test_ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { +entry: + %sub = sub i8 %x, %y + %cmp = icmp ugt i8 %sub, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: test_sge_1_dec_var +; CHECK-COMMON: sub +; CHECK-COMMON: sxtb +; CHECK-COMMON: cmp +define i32 @test_sge_1_dec_var(i8 %x, i8 %y) { +entry: + %sub = sub i8 %x, %y + %cmp = icmp sge i8 %sub, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: dsp_imm1: +; CHECK-DSP: eors r1, r0 +; CHECK-DSP-NEXT: and r0, r0, #7 +; CHECK-DSP-NEXT: subs r0, r0, r1 +; CHECK-DSP-NEXT: adds r0, #1 +; CHECK-DSP-NEXT: uxtb r1, r0 +; CHECK-DSP-NEXT: movs r0, #47 +; CHECK-DSP-NEXT: cmp r1, #254 +; CHECK-DSP-NEXT: it lo +; CHECK-DSP-NEXT: movlo r0, #35 + +; CHECK-DSP-IMM: eors r1, r0 +; CHECK-DSP-IMM-NEXT: and r0, r0, #7 +; CHECK-DSP-IMM-NEXT: usub8 r0, r0, r1 +; CHECK-DSP-IMM-NEXT: movs r1, #1 +; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 +; CHECK-DSP-IMM-NEXT: movs r0, #47 +; CHECK-DSP-IMM-NEXT: cmp r1, #254 +; CHECK-DSP-IMM-NEXT: it lo +; CHECK-DSP-IMM-NEXT: movlo r0, #35 +define i32 @dsp_imm1(i8 zeroext %x, i8 zeroext %y) { +entry: + %xor = xor i8 %x, %y + %and = and i8 %x, 7 + %sub = sub i8 %and, %xor + %add = add i8 %sub, 1 + %cmp = icmp ult i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: dsp_var: +; CHECK-COMMON: eors r1, r0 +; CHECK-COMMON: and r2, r0, #7 +; CHECK-NODSP: subs r1, r2, r1 +; CHECK-NODSP: add.w r0, r1, r0, lsl #1 +; CHECK-NODSP: uxtb r1, r0 +; CHECK-DSP: usub8 r1, r2, r1 +; CHECK-DSP: lsls r0, r0, #1 +; CHECK-DSP: uadd8 r1, r1, r0 +; CHECK-DSP-NOT: uxt +; CHECK-COMMON: movs r0, #47 +; CHECK-COMMON: cmp r1, #254 +; CHECK-COMMON: it lo +; CHECK-COMMON: movlo r0, #35 +define i32 @dsp_var(i8 zeroext %x, i8 zeroext %y) { + %xor = xor i8 %x, %y + %and = and i8 %x, 7 + %sub = sub i8 %and, %xor + %mul = shl nuw i8 %x, 1 + %add = add i8 %sub, %mul + %cmp = icmp ult i8 %add, 254 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: store_dsp_res +; CHECK-DSP: usub8 +; CHECK-DSP: strb +define void @store_dsp_res(i8* %in, i8* %out, i8 %compare) { + %first = getelementptr inbounds i8, i8* %in, i32 0 + %second = getelementptr inbounds i8, i8* %in, i32 1 + %ld0 = load i8, i8* %first + %ld1 = load i8, i8* %second + %xor = xor i8 %ld0, -1 + %cmp = icmp ult i8 %compare, %ld1 + %select = select i1 %cmp, i8 %compare, i8 %xor + %sub = sub i8 %ld0, %select + store i8 %sub, i8* %out, align 1 + ret void +} + +; CHECK-COMMON-LABEL: ugt_1_dec_imm: +; CHECK-COMMON: subs r1, r0, #1 +; CHECK-COMMON-NEXT: movs r0, #47 +; CHECK-COMMON-NEXT: cmp r1, #1 +; CHECK-COMMON-NEXT: it hi +; CHECK-COMMON-NEXT: movhi r0, #35 +define i32 @ugt_1_dec_imm(i8 zeroext %x) { +entry: + %add = add i8 %x, -1 + %cmp = icmp ugt i8 %add, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: ugt_1_dec_var: +; CHECK-NODSP: subs r0, r0, r1 +; CHECK-NODSP-NEXT: uxtb r1, r0 +; CHECK-NODSP-NEXT: movs r0, #47 +; CHECK-NODSP-NEXT: cmp r1, #1 +; CHECK-NODSP-NEXT: it hi +; CHECK-NODSP-NEXT: movhi r0, #35 + +; CHECK-DSP: usub8 r1, r0, r1 +; CHECK-DSP-NEXT: movs r0, #47 +; CHECK-DSP-NEXT: cmp r1, #1 +; CHECK-DSP-NEXT: it hi +; CHECK-DSP-NEXT: movhi r0, #35 +define i32 @ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { +entry: + %sub = sub i8 %x, %y + %cmp = icmp ugt i8 %sub, 1 + %res = select i1 %cmp, i32 35, i32 47 + ret i32 %res +} + +; CHECK-COMMON-LABEL: icmp_eq_minus_one +; CHECK-COMMON: cmp r0, #255 +define i32 @icmp_eq_minus_one(i8* %ptr) { + %load = load i8, i8* %ptr, align 1 + %conv = zext i8 %load to i32 + %cmp = icmp eq i8 %load, -1 + %ret = select i1 %cmp, i32 %conv, i32 -1 + ret i32 %ret +} + +; CHECK-COMMON-LABEL: icmp_not +; CHECK-COMMON: movw r2, #65535 +; CHECK-COMMON: eors r2, r0 +; CHECK-COMMON: movs r0, #32 +; CHECK-COMMON: cmp r2, r1 +define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) { + %not = xor i16 %arg0, -1 + %cmp = icmp eq i16 %not, %arg1 + %res = select i1 %cmp, i32 16, i32 32 + ret i32 %res +} + +; CHECK-COMMON-LABEL: icmp_i1 +; CHECK-NOT: uxt +define i32 @icmp_i1(i1* %arg0, i1 zeroext %arg1, i32 %a, i32 %b) { +entry: + %load = load i1, i1* %arg0 + %not = xor i1 %load, 1 + %cmp = icmp eq i1 %arg1, %not + %res = select i1 %cmp, i32 %a, i32 %b + ret i32 %res +} + +; CHECK-COMMON-LABEL: icmp_i7 +; CHECK-COMMON: ldrb +; CHECK-COMMON: cmp +define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) { +entry: + %load = load i7, i7* %arg0 + %add = add nuw i7 %load, 1 + %cmp = icmp ult i7 %arg1, %add + %res = select i1 %cmp, i32 %a, i32 %b + ret i32 %res +} + +; CHECK-COMMON-LABEL: icmp_i15 +; CHECK-COMMON: movw [[MINUS_ONE:r[0-9]+]], #32767 +define i32 @icmp_i15(i15 zeroext %arg0, i15 zeroext %arg1) { + %xor = xor i15 %arg0, -1 + %cmp = icmp eq i15 %xor, %arg1 + %res = select i1 %cmp, i32 21, i32 42 + ret i32 %res +} + +; CHECK-COMMON-LABEL: icmp_minus_imm +; CHECK-NODSP: subs [[SUB:r[0-9]+]], +; CHECK-NODSP: uxtb [[UXT:r[0-9]+]], +; CHECK-NODSP: cmp [[UXT]], #251 + +; CHECK-DSP: subs [[SUB:r[0-9]+]], +; CHECK-DSP: uxtb [[UXT:r[0-9]+]], +; CHECK-DSP: cmp [[UXT]], #251 + +; CHECK-DSP-IMM: ldrb [[A:r[0-9]+]], +; CHECK-DSP-IMM: movs [[MINUS_7:r[0-9]+]], #249 +; CHECK-DSP-IMM: uadd8 [[RES:r[0-9]+]], [[A]], [[MINUS_7]] +; CHECK-DSP-IMM: cmp [[RES]], #251 +define i32 @icmp_minus_imm(i8* %a) { +entry: + %0 = load i8, i8* %a, align 1 + %add.i = add i8 %0, -7 + %cmp = icmp ugt i8 %add.i, -5 + %conv1 = zext i1 %cmp to i32 + ret i32 %conv1 +} + Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-overflow.ll @@ -0,0 +1,232 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s + +; CHECK: overflow_add +; CHECK: add +; CHECK: uxth +; CHECK: cmp +define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { + %add = add i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +; CHECK-LABEL: overflow_sub +; CHECK: sub +; CHECK: uxth +; CHECK: cmp +define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { + %add = sub i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +; CHECK-LABEL: overflow_mul +; CHECK: mul +; CHECK: uxth +; CHECK: cmp +define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { + %add = mul i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +; CHECK-LABEL: overflow_shl +; CHECK-COMMON: lsl +; CHECK-COMMON: uxth +; CHECK-COMMON: cmp +define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { + %add = shl i16 %a, %b + %or = or i16 %add, 1 + %cmp = icmp ugt i16 %or, 1024 + %res = select i1 %cmp, i16 2, i16 5 + ret i16 %res +} + +; CHECK-LABEL: overflow_add_no_consts: +; CHECK: add r0, r1 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], r2 +; CHECK: movhi r0, #8 +define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) { + %add = add i8 %a, %b + %cmp = icmp ugt i8 %add, %limit + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: overflow_add_const_limit: +; CHECK: add r0, r1 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #128 +; CHECK: movhi r0, #8 +define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { + %add = add i8 %a, %b + %cmp = icmp ugt i8 %add, 128 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: overflow_add_positive_const_limit: +; CHECK: adds r0, #1 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #128 +; CHECK: movhi r0, #8 +define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { + %add = add i8 %a, 1 + %cmp = icmp ugt i8 %add, 128 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: unsafe_add_underflow: +; CHECK: subs r0, #2 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #255 +; CHECK: moveq r0, #8 +define i32 @unsafe_add_underflow(i8 zeroext %a) { + %add = add i8 %a, -2 + %cmp = icmp ugt i8 %add, 254 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: safe_add_underflow: +; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 +; CHECK-NOT: uxtb +; CHECK: cmp [[MINUS_1]], #254 +; CHECK: movhi r0, #8 +define i32 @safe_add_underflow(i8 zeroext %a) { + %add = add i8 %a, -1 + %cmp = icmp ugt i8 %add, 254 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: safe_add_underflow_neg: +; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #2 +; CHECK-NOT: uxtb +; CHECK: cmp [[MINUS_1]], #251 +; CHECK: movlo r0, #8 +define i32 @safe_add_underflow_neg(i8 zeroext %a) { + %add = add i8 %a, -2 + %cmp = icmp ule i8 %add, -6 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: overflow_sub_negative_const_limit: +; CHECK: adds r0, #1 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #128 +; CHECK: movhi r0, #8 +define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { + %sub = sub i8 %a, -1 + %cmp = icmp ugt i8 %sub, 128 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: unsafe_sub_underflow: +; CHECK: subs r0, #6 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #250 +; CHECK: movhi r0, #8 +define i32 @unsafe_sub_underflow(i8 zeroext %a) { + %sub = sub i8 %a, 6 + %cmp = icmp ugt i8 %sub, 250 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: safe_sub_underflow: +; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 +; CHECK-NOT: uxtb +; CHECK: cmp [[MINUS_1]], #255 +; CHECK: movlo r0, #8 +define i32 @safe_sub_underflow(i8 zeroext %a) { + %sub = sub i8 %a, 1 + %cmp = icmp ule i8 %sub, 254 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: safe_sub_underflow_neg +; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #4 +; CHECK-NOT: uxtb +; CHECK: cmp [[MINUS_1]], #250 +; CHECK: movhi r0, #8 +define i32 @safe_sub_underflow_neg(i8 zeroext %a) { + %sub = sub i8 %a, 4 + %cmp = icmp uge i8 %sub, -5 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK-LABEL: unsafe_sub_underflow_neg +; CHECK: subs r0, #4 +; CHECK: uxtb [[EXT:r[0-9]+]], r0 +; CHECK: cmp [[EXT]], #253 +; CHECK: movlo r0, #8 +define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) { + %sub = sub i8 %a, 4 + %cmp = icmp ult i8 %sub, -3 + %res = select i1 %cmp, i32 8, i32 16 + ret i32 %res +} + +; CHECK: rsb.w [[RSUB:r[0-9]+]], r0, #248 +; CHECK-NOT: uxt +; CHECK: cmp [[RSUB]], #252 +define i32 @safe_sub_imm_var(i8* %b) { +entry: + %0 = load i8, i8* %b, align 1 + %sub = sub nuw nsw i8 -8, %0 + %cmp = icmp ugt i8 %sub, 252 + %conv4 = zext i1 %cmp to i32 + ret i32 %conv4 +} + +; CHECK-LABEL: safe_sub_var_imm +; CHECK: add.w [[ADD:r[0-9]+]], r0, #8 +; CHECK-NOT: uxt +; CHECK: cmp [[ADD]], #252 +define i32 @safe_sub_var_imm(i8* %b) { +entry: + %0 = load i8, i8* %b, align 1 + %sub = sub nuw nsw i8 %0, -8 + %cmp = icmp ugt i8 %sub, 252 + %conv4 = zext i1 %cmp to i32 + ret i32 %conv4 +} + +; CHECK-LABEL: safe_add_imm_var +; CHECK: add.w [[ADD:r[0-9]+]], r0, #129 +; CHECK-NOT: uxt +; CHECK: cmp [[ADD]], #127 +define i32 @safe_add_imm_var(i8* %b) { +entry: + %0 = load i8, i8* %b, align 1 + %add = add nuw nsw i8 -127, %0 + %cmp = icmp ugt i8 %add, 127 + %conv4 = zext i1 %cmp to i32 + ret i32 %conv4 +} + +; CHECK-LABEL: safe_add_var_imm +; CHECK: sub.w [[SUB:r[0-9]+]], r0, #127 +; CHECK-NOT: uxt +; CHECK: cmp [[SUB]], #127 +define i32 @safe_add_var_imm(i8* %b) { +entry: + %0 = load i8, i8* %b, align 1 + %add = add nuw nsw i8 %0, -127 + %cmp = icmp ugt i8 %add, 127 + %conv4 = zext i1 %cmp to i32 + ret i32 %conv4 +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-phis-ret.ll @@ -0,0 +1,174 @@ +; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; Test that ARMCodeGenPrepare can handle: +; - loops +; - call operands +; - call return values +; - ret instructions +; We use nuw on the arithmetic instructions to avoid complications. + +; Check that the arguments are extended but then nothing else is. +; This also ensures that the pass can handle loops. +; CHECK-COMMON-LABEL: phi_feeding_phi_args +; CHECK-COMMON: uxtb +; CHECK-COMMON: uxtb +; CHECK-NOT: uxtb +define void @phi_feeding_phi_args(i8 %a, i8 %b) { +entry: + %0 = icmp ugt i8 %a, %b + br i1 %0, label %preheader, label %empty + +empty: + br label %preheader + +preheader: + %1 = phi i8 [ %a, %entry ], [ %b, %empty ] + br label %loop + +loop: + %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 254 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = sub nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = shl nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp eq i8 %inc2, 255 + br i1 %cmp1, label %exit, label %loop + +exit: + ret void +} + +; Same as above, but as the args are zeroext, we shouldn't see any uxts. +; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args +; CHECK-COMMON-NOT: uxt +define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) { +entry: + %0 = icmp ugt i8 %a, %b + br i1 %0, label %preheader, label %empty + +empty: + br label %preheader + +preheader: + %1 = phi i8 [ %a, %entry ], [ %b, %empty ] + br label %loop + +loop: + %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 254 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = sub nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = shl nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp eq i8 %inc2, 255 + br i1 %cmp1, label %exit, label %loop + +exit: + ret void +} + +; Just check that phis also work with i16s. +; CHECK-COMMON-LABEL: phi_i16: +; CHECK-COMMON-NOT: uxt +define void @phi_i16() { +entry: + br label %loop + +loop: + %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i16 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i16 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i16 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp ult i16 %inc2, 253 + br i1 %cmp1, label %loop, label %exit + +exit: + ret void +} + +; CHECK-COMMON-LABEL: ret_i8 +; CHECK-COMMON-NOT: uxt +define i8 @ret_i8() { +entry: + br label %loop + +loop: + %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp ult i8 %inc2, 253 + br i1 %cmp1, label %exit, label %loop + +exit: + ret i8 %inc2 +} + +; CHECK-COMMON-LABEL: phi_multiple_undefs +; CHECK-COMMON-NOT: uxt +define i16 @phi_multiple_undefs(i16 zeroext %arg) { +entry: + br label %loop + +loop: + %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i16 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i16 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i16 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] + %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ] + %cmp1 = icmp ult i16 %inc2, 253 + br i1 %cmp1, label %loop, label %exit + +exit: + ret i16 %unrelated +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-pointers.ll @@ -0,0 +1,135 @@ +; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s + +; CHECK-LABEL: phi_pointers +; CHECK-NOT: uxt +define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { +entry: + %add = add nuw i8 %M, 1 + %and = and i8 %add, 1 + %cmp = icmp ugt i8 %add, %N + %base = select i1 %cmp, i16* %a, i16* %b + %other = select i1 %cmp, i16* %b, i16* %b + br label %loop + +loop: + %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ] + %idx = phi i8 [ %and, %entry ], [ %inc, %loop ] + %load = load i16, i16* %ptr, align 2 + %inc = add nuw nsw i8 %idx, 1 + %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc + %cond = icmp eq i16* %gep, %other + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: phi_pointers_null +; CHECK-NOT: uxt +define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { +entry: + %add = add nuw i8 %M, 1 + %and = and i8 %add, 1 + %cmp = icmp ugt i8 %add, %N + %base = select i1 %cmp, i16* %a, i16* %b + %other = select i1 %cmp, i16* %b, i16* %b + %cmp.1 = icmp eq i16* %base, %other + br i1 %cmp.1, label %fail, label %loop + +fail: + br label %loop + +loop: + %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ] + %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ] + %undef = icmp eq i16* %ptr, undef + br i1 %undef, label %exit, label %if.then + +if.then: + %load = load i16, i16* %ptr, align 2 + %inc = add nuw nsw i8 %idx, 1 + %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc + %cond = icmp eq i16* %gep, %other + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +declare i8 @do_something_with_ptr(i8, i16*) + +; CHECK-LABEL: call_pointer +; CHECK-NOT: uxt +define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) { + %or = or i8 %x, %y + %shr = lshr i8 %or, 1 + %add = add nuw i8 %shr, 2 + %cmp = icmp ne i8 %add, 0 + %ptr = select i1 %cmp, i16* %a, i16* %b + %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr) + ret i8 %call +} + +; CHECK-LABEL: pointer_to_pointer +; CHECK-NOT: uxt +define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) { +entry: + %addr = load i16*, i16** %arg + %val = load i16, i16* %addr + %add = add nuw i16 %val, 7 + %cmp = icmp ult i16 %add, 256 + %res = select i1 %cmp, i16 128, i16 255 + ret i16 %res +} + +; CHECK-LABEL: gep_2d_array +; CHECK-NOT: uxt +define i8 @gep_2d_array(i8** %a, i8 zeroext %arg) { +entry: + %arrayidx.us = getelementptr inbounds i8*, i8** %a, i32 0 + %0 = load i8*, i8** %arrayidx.us, align 4 + %1 = load i8, i8* %0, align 1 + %sub = sub nuw i8 %1, 1 + %cmp = icmp ult i8 %sub, %arg + %res = select i1 %cmp, i8 27, i8 54 + ret i8 %res +} + +; CHECK-LABEL: gep_2d_array_loop +; CHECK-NOT: uxt +define void @gep_2d_array_loop(i16** nocapture readonly %a, i16** nocapture readonly %b, i32 %N) { +entry: + %cmp30 = icmp eq i32 %N, 0 + br i1 %cmp30, label %for.cond.cleanup, label %for.cond1.preheader.us + +for.cond1.preheader.us: + %y.031.us = phi i32 [ %inc13.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ] + br label %for.body4.us + +for.body4.us: + %x.029.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] + %arrayidx.us = getelementptr inbounds i16*, i16** %a, i32 %x.029.us + %0 = load i16*, i16** %arrayidx.us, align 4 + %arrayidx5.us = getelementptr inbounds i16, i16* %0, i32 %y.031.us + %1 = load i16, i16* %arrayidx5.us, align 2 + %dec.us = add nuw i16 %1, -1 + %cmp6.us = icmp ult i16 %dec.us, 16383 + %shl.us = shl nuw i16 %dec.us, 2 + %spec.select.us = select i1 %cmp6.us, i16 %shl.us, i16 %dec.us + %arrayidx10.us = getelementptr inbounds i16*, i16** %b, i32 %x.029.us + %2 = load i16*, i16** %arrayidx10.us, align 4 + %arrayidx11.us = getelementptr inbounds i16, i16* %2, i32 %y.031.us + store i16 %spec.select.us, i16* %arrayidx11.us, align 2 + %inc.us = add nuw i32 %x.029.us, 1 + %exitcond = icmp eq i32 %inc.us, %N + br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us + +for.cond1.for.cond.cleanup3_crit_edge.us: + %inc13.us = add nuw i32 %y.031.us, 1 + %exitcond32 = icmp eq i32 %inc13.us, %N + br i1 %exitcond32, label %for.cond.cleanup, label %for.cond1.preheader.us + +for.cond.cleanup: + ret void +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed-icmps.ll @@ -0,0 +1,109 @@ +; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; CHECK-COMMON-LABEL: eq_sgt +; CHECK-NODSP: add +; CHECK-NODSP: uxtb +; CHECK-NODSP: sxtb +; CHECK-NODSP: cmp +; CHECK-NODSP: sub +; CHECK-NODSP: sxtb +; CHECK-NODSP: cmp + +; CHECK-DSP: add +; CHECK-DSP: uxtb +; CHECK-DSP: cmp +; CHECK-DSP: sxtb +; CHECK-DSP: sub +; CHECK-DSP: sxtb +; CHECK-DSP: cmp + +; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]], +; CHECK-DSP-IMM: cmp [[ADD]], +; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]] +; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]], +; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]] +; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]] +define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) { +entry: + %load0 = load i8, i8* %x, align 1 + %load1 = load i8, i8* %y, align 1 + %add = add i8 %load0, %z + %sub = sub i8 %load1, 1 + %cmp = icmp eq i8 %add, 200 + %cmp1 = icmp sgt i8 %sub, %add + %res0 = select i1 %cmp, i8 35, i8 47 + %res1 = select i1 %cmp1, i8 %res0, i8 %sub + ret i8 %res1 +} + +; CHECK-COMMON-LABEL: ugt_slt +; CHECK-NODSP: sub +; CHECK-NODSP: sxth +; CHECK-NODSP: uxth +; CHECK-NODSP: add +; CHECK-NODSP: sxth +; CHECK-NODSP: cmp +; CHECK-NODSP: cmp + +; CHECK-DSP: sub +; CHECK-DSP: sxth +; CHECK-DSP: add +; CHECK-DSP: uxth +; CHECK-DSP: sxth +; CHECK-DSP: cmp +; CHECK-DSP: cmp + +; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2 +; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]], +; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]] +; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]] +; CHECK-DSP-IMM-NOT: uxt +; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1 +; CHECK-DSP-IMM: usub16 [[SUB:r[0-9]+]], r1, [[ONE]] +; CHECK-DSP-IMM: cmp [[SUB]], r2 +define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) { +entry: + %load0 = load i16, i16* %x, align 1 + %add = add i16 %load0, %z + %sub = sub i16 %y, 1 + %cmp = icmp slt i16 %add, %z + %cmp1 = icmp ugt i16 %sub, %z + %res0 = select i1 %cmp, i16 35, i16 -1 + %res1 = select i1 %cmp1, i16 %res0, i16 0 + ret i16 %res1 +} + +; CHECK-COMMON-LABEL: urem_trunc_icmps +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]], +; CHECK-COMMON: cmp [[SEXT]], #7 +define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { +entry: + %ptr = load i16*, i16** %in, align 4 + %ld = load i16, i16* %ptr, align 2 + %cmp.i = icmp eq i16 %ld, 0 + br i1 %cmp.i, label %exit, label %cond.false.i + +cond.false.i: + %rem = urem i16 5, %ld + %extract.t = trunc i16 %rem to i8 + br label %body + +body: + %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] + %cmp = icmp sgt i8 %cond.in.i.off0, 7 + %conv5 = zext i1 %cmp to i32 + store i32 %conv5, i32* %g, align 4 + %.pr = load i32, i32* %k, align 4 + %tobool13150 = icmp eq i32 %.pr, 0 + br i1 %tobool13150, label %for.inc, label %exit + +for.inc: + %add = add nuw i8 %cond.in.i.off0, 1 + br label %body + +exit: + ret void +} Index: llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed.ll +++ llvm/trunk/test/CodeGen/ARM/CGP/arm-cgp-signed.ll @@ -0,0 +1,45 @@ +; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=thumbv7 %s -arm-disable-cgp=false -o - | FileCheck %s +; RUN: llc -mtriple=armv8 %s -arm-disable-cgp=false -o - | FileCheck %s + +; Test to check that ARMCodeGenPrepare doesn't optimised away sign extends. +; CHECK-LABEL: test_signed_load: +; CHECK: uxth +define i16 @test_signed_load(i16* %ptr) { + %load = load i16, i16* %ptr + %conv0 = zext i16 %load to i32 + %conv1 = sext i16 %load to i32 + %cmp = icmp eq i32 %conv0, %conv1 + %conv2 = zext i1 %cmp to i16 + ret i16 %conv2 +} + +; Don't allow sign bit generating opcodes. +; CHECK-LABEL: test_ashr: +; CHECK: sxth +define i16 @test_ashr(i16 zeroext %arg) { + %ashr = ashr i16 %arg, 1 + %cmp = icmp eq i16 %ashr, 0 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +; CHECK-LABEL: test_sdiv: +; CHECK: sxth +define i16 @test_sdiv(i16 zeroext %arg) { + %sdiv = sdiv i16 %arg, 2 + %cmp = icmp ne i16 %sdiv, 0 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +; CHECK-LABEL: test_srem +; CHECK: sxth +define i16 @test_srem(i16 zeroext %arg) { + %srem = srem i16 %arg, 4 + %cmp = icmp ne i16 %srem, 0 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll @@ -1,182 +0,0 @@ -; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s - -; Check that the pass doesn't try to promote the immediate parameters. -; CHECK-LABEL: call_with_imms -; CHECK-NOT: uxt -define i8 @call_with_imms(i8* %arg) { - %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0) - %cmp = icmp eq i8 %call, 0 - %res = select i1 %cmp, i8 %call, i8 1 - ret i8 %res -} - -; Test that the call result is still extended. -; CHECK-LABEL: test_call: -; CHECK: bl -; CHECK-NEXT: sxtb r1, r0 -define i16 @test_call(i8 zeroext %arg) { - %call = call i8 @dummy_i8(i8 %arg) - %cmp = icmp ult i8 %call, 128 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; Test that the transformation bails when it finds that i16 is larger than i8. -; TODO: We should be able to remove the uxtb in these cases. -; CHECK-LABEL: promote_i8_sink_i16_1 -; CHECK: bl dummy_i8 -; CHECK: add{{.*}} r0, #1 -; CHECK: uxtb r0, r0 -; CHECK: cmp r0 -define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %conv = zext i8 %add to i16 - %cmp = icmp ne i16 %conv, %arg1 - %sel = select i1 %cmp, i16 %arg1, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -; CHECK-LABEL: promote_i8_sink_i16_2 -; CHECK: bl dummy_i8 -; CHECK: add{{.*}} r0, #1 -; CHECK-NOT: uxt -; CHECK: cmp r0 -define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %cmp = icmp ne i8 %add, %arg1 - %conv = zext i8 %arg1 to i16 - %sel = select i1 %cmp, i16 %conv, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -@uc = global i8 42, align 1 -@LL = global i64 0, align 8 - -; CHECK-LABEL: zext_i64 -; CHECK: ldrb -; CHECK: strd -define void @zext_i64() { -entry: - %0 = load i8, i8* @uc, align 1 - %conv = zext i8 %0 to i64 - store i64 %conv, i64* @LL, align 8 - %cmp = icmp eq i8 %0, 42 - %conv1 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1) - ret void -} - -@a = global i16* null, align 4 -@b = global i32 0, align 4 - -; CHECK-LABEL: constexpr -; CHECK: uxth -define i32 @constexpr() { -entry: - store i32 ptrtoint (i32* @b to i32), i32* @b, align 4 - %0 = load i16*, i16** @a, align 4 - %1 = load i16, i16* %0, align 2 - %or = or i16 %1, ptrtoint (i32* @b to i16) - store i16 %or, i16* %0, align 2 - %cmp = icmp ne i16 %or, 4 - %conv3 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2 - ret i32 undef -} - -; The call to safe_lshift_func takes two parameters, but they're the same value -; just one is zext. We do support zext now, so the transformation should -; trigger and we don't want see uxtb here. -; CHECK-LABEL: call_zext_i8_i32 -; CHECK-NOT: uxt -define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) { -for.cond8.preheader: - %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef) - %tobool219 = icmp eq i8 %call217, 0 - br i1 %tobool219, label %for.end411, label %for.cond273.preheader - -for.cond273.preheader: ; preds = %for.cond8.preheader - %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ] - %conv218.le = zext i8 %call217.lcssa to i32 - %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le) - unreachable - -for.end411: ; preds = %for.cond8.preheader - %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4) - unreachable -} - -%struct.anon = type { i32 } - -@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_82 = hidden local_unnamed_addr global i32 0, align 4 - -; Test that the transform bails on finding %conv4, a trunc -; CHECK-LABEL: call_return_pointer -; CHECK: sxth -; CHECK: uxt -define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 { -entry: - %conv1 = zext i8 %p_13 to i16 - %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef) - %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4 - %conv2 = trunc i32 %0 to i16 - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ] - %tobool = icmp eq i8 %p_13.addr.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2) - %conv4 = trunc i16 %call3 to i8 - br label %for.cond.backedge -} - -; Transform will bail because of the zext -; Check that d.sroa.0.0.be is promoted passed directly into the tail call. -; CHECK-LABEL: check_zext_phi_call_arg -; CHECK: uxt -define i32 @check_zext_phi_call_arg() { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ] - %tobool = icmp eq i16 %d.sroa.0.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32 - %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2 - br label %for.cond.backedge -} - -declare i32 @assert(...) -declare i8 @dummy_i8(i8) -declare i8 @dummy2(i8*, i8, i8) -declare i16 @dummy3(i16) - -declare dso_local i32 @e(...) local_unnamed_addr #1 -declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 - -declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66) -declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2) -declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64) -declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32) -declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext) Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-casts.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-casts.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-casts.ll @@ -1,340 +0,0 @@ -; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7-linux-android %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; Transform will fail because the trunc is not a sink. -; CHECK-COMMON-LABEL: dsp_trunc -; CHECK-COMMON: add [[ADD:[^ ]+]], -; CHECK-DSP-NEXT: ldrh r1, [r3] -; CHECK-DSP-NEXT: ldrh r2, [r2] -; CHECK-DSP-NEXT: subs r1, r1, [[ADD]] -; CHECK-DSP-NEXT: add r0, r2 -; CHECK-DSP-NEXT: uxth r3, r1 -; CHECK-DSP-NEXT: uxth r2, r0 -; CHECK-DSP-NEXT: cmp r2, r3 - -; With DSP-IMM, we could have: -; movs r1, #0 -; uxth r0, r0 -; usub16 r1, r1, r0 -; ldrh r0, [r2] -; ldrh r3, [r3] -; usub16 r0, r0, r1 -; uadd16 r1, r3, r1 -; cmp r0, r1 -define i16 @dsp_trunc(i32 %arg0, i32 %arg1, i16* %gep0, i16* %gep1) { -entry: - %add0 = add i32 %arg0, %arg1 - %conv0 = trunc i32 %add0 to i16 - %sub0 = sub i16 0, %conv0 - %load0 = load i16, i16* %gep0, align 2 - %load1 = load i16, i16* %gep1, align 2 - %sub1 = sub i16 %load0, %sub0 - %add1 = add i16 %load1, %sub0 - %cmp = icmp ult i16 %sub1, %add1 - %res = select i1 %cmp, i16 %add1, i16 %sub1 - ret i16 %res -} - -; CHECK-COMMON-LABEL: trunc_i16_i8 -; CHECK-COMMON: ldrh -; CHECK-COMMON: uxtb -; CHECK-COMMON: cmp -define i8 @trunc_i16_i8(i16* %ptr, i16 zeroext %arg0, i8 zeroext %arg1) { -entry: - %0 = load i16, i16* %ptr - %1 = add i16 %0, %arg0 - %2 = trunc i16 %1 to i8 - %3 = icmp ugt i8 %2, %arg1 - %4 = select i1 %3, i8 %2, i8 %arg1 - ret i8 %4 -} - -; The pass perform the transform, but a uxtb will still be inserted to handle -; the zext to the icmp. -; CHECK-COMMON-LABEL: icmp_i32_zext: -; CHECK-COMMON: sub -; CHECK-COMMON: uxtb -; CHECK-COMMON: cmp -define i8 @icmp_i32_zext(i8* %ptr) { -entry: - %gep = getelementptr inbounds i8, i8* %ptr, i32 0 - %0 = load i8, i8* %gep, align 1 - %1 = sub nuw nsw i8 %0, 1 - %conv44 = zext i8 %0 to i32 - br label %preheader - -preheader: - br label %body - -body: - %2 = phi i8 [ %1, %preheader ], [ %3, %if.end ] - %si.0274 = phi i32 [ %conv44, %preheader ], [ %inc, %if.end ] - %conv51266 = zext i8 %2 to i32 - %cmp52267 = icmp eq i32 %si.0274, %conv51266 - br i1 %cmp52267, label %if.end, label %exit - -if.end: - %inc = add i32 %si.0274, 1 - %gep1 = getelementptr inbounds i8, i8* %ptr, i32 %inc - %3 = load i8, i8* %gep1, align 1 - br label %body - -exit: - ret i8 %2 -} - -; Won't don't handle sext -; CHECK-COMMON-LABEL: icmp_sext_zext_store_i8_i16 -; CHECK-COMMON: ldrb -; CHECK-COMMON: ldrsh -define i32 @icmp_sext_zext_store_i8_i16() { -entry: - %0 = load i8, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @d_uch, i32 0, i32 2), align 1 - %conv = zext i8 %0 to i16 - store i16 %conv, i16* @sh1, align 2 - %conv1 = zext i8 %0 to i32 - %1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @d_sh, i32 0, i32 2), align 2 - %conv2 = sext i16 %1 to i32 - %cmp = icmp eq i32 %conv1, %conv2 - %conv3 = zext i1 %cmp to i32 - ret i32 %conv3 -} - -; CHECK-COMMON-LABEL: or_icmp_ugt: -; CHECK-COMMON: ldrb -; CHECK-COMMON: sub.w -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: cmp.w -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: cmp -define i1 @or_icmp_ugt(i32 %arg, i8* %ptr) { -entry: - %0 = load i8, i8* %ptr - %1 = zext i8 %0 to i32 - %mul = shl nuw nsw i32 %1, 1 - %add0 = add nuw nsw i32 %mul, 6 - %cmp0 = icmp ne i32 %arg, %add0 - %add1 = add i8 %0, -1 - %cmp1 = icmp ugt i8 %add1, 3 - %or = or i1 %cmp0, %cmp1 - ret i1 %or -} - -; CHECK-COMMON-LABEL: icmp_switch_trunc: -; CHECK-COMMON-NOT: uxt -define i16 @icmp_switch_trunc(i16 zeroext %arg) { -entry: - %conv = add nuw i16 %arg, 15 - %mul = mul nuw nsw i16 %conv, 3 - %trunc = trunc i16 %arg to i3 - switch i3 %trunc, label %default [ - i3 0, label %sw.bb - i3 1, label %sw.bb.i - ] - -sw.bb: - %cmp0 = icmp ult i16 %mul, 127 - %select = select i1 %cmp0, i16 %mul, i16 127 - br label %exit - -sw.bb.i: - %cmp1 = icmp ugt i16 %mul, 34 - %select.i = select i1 %cmp1, i16 %mul, i16 34 - br label %exit - -default: - br label %exit - -exit: - %res = phi i16 [ %select, %sw.bb ], [ %select.i, %sw.bb.i ], [ %mul, %default ] - ret i16 %res -} - -; We currently only handle truncs as sinks, so a uxt will still be needed for -; the icmp ugt instruction. -; CHECK-COMMON-LABEL: urem_trunc_icmps -; CHECK-COMMON: cmp -; CHECK-COMMON: uxt -; CHECK-COMMON: cmp -define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { -entry: - %ptr = load i16*, i16** %in, align 4 - %ld = load i16, i16* %ptr, align 2 - %cmp.i = icmp eq i16 %ld, 0 - br i1 %cmp.i, label %exit, label %cond.false.i - -cond.false.i: - %rem = urem i16 5, %ld - %extract.t = trunc i16 %rem to i8 - br label %body - -body: - %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] - %cmp = icmp ugt i8 %cond.in.i.off0, 7 - %conv5 = zext i1 %cmp to i32 - store i32 %conv5, i32* %g, align 4 - %.pr = load i32, i32* %k, align 4 - %tobool13150 = icmp eq i32 %.pr, 0 - br i1 %tobool13150, label %for.inc, label %exit - -for.inc: - %add = add nuw i8 %cond.in.i.off0, 1 - br label %body - -exit: - ret void -} - -; CHECK-COMMON-LABEL: phi_feeding_switch -; CHECK-COMMON: ldrb -; CHECK-COMMON: uxtb -define void @phi_feeding_switch(i8* %memblock, i8* %store, i16 %arg) { -entry: - %pre = load i8, i8* %memblock, align 1 - %conv = trunc i16 %arg to i8 - br label %header - -header: - %phi.0 = phi i8 [ %pre, %entry ], [ %count, %latch ] - %phi.1 = phi i8 [ %conv, %entry ], [ %phi.3, %latch ] - %phi.2 = phi i8 [ 0, %entry], [ %count, %latch ] - switch i8 %phi.0, label %default [ - i8 43, label %for.inc.i - i8 45, label %for.inc.i.i - ] - -for.inc.i: - %xor = xor i8 %phi.1, 1 - br label %latch - -for.inc.i.i: - %and = and i8 %phi.1, 3 - br label %latch - -default: - %sub = sub i8 %phi.0, 1 - %cmp2 = icmp ugt i8 %sub, 4 - br i1 %cmp2, label %latch, label %exit - -latch: - %phi.3 = phi i8 [ %xor, %for.inc.i ], [ %and, %for.inc.i.i ], [ %phi.2, %default ] - %count = add nuw i8 %phi.2, 1 - store i8 %count, i8* %store, align 1 - br label %header - -exit: - ret void -} - -; Check that %exp requires uxth in all cases, and will also be required to -; promote %1 for the call - unless we can generate a uadd16. -; CHECK-COMMON-LABEL: zext_load_sink_call: -; CHECK-COMMON: uxt -; uadd16 -; cmp -; CHECK-COMMON: uxt -define i32 @zext_load_sink_call(i16* %ptr, i16 %exp) { -entry: - %0 = load i16, i16* %ptr, align 4 - %1 = add i16 %exp, 3 - %cmp = icmp eq i16 %0, %exp - br i1 %cmp, label %exit, label %if.then - -if.then: - %conv0 = zext i16 %0 to i32 - %conv1 = zext i16 %1 to i32 - %call = tail call arm_aapcs_vfpcc i32 @dummy(i32 %conv0, i32 %conv1) - br label %exit - -exit: - %exitval = phi i32 [ %call, %if.then ], [ 0, %entry ] - ret i32 %exitval -} - -%class.ae = type { i8 } -%class.x = type { i8 } -%class.v = type { %class.q } -%class.q = type { i16 } - -; CHECK-COMMON-LABEL: trunc_i16_i9_switch -; CHECK-COMMON-NOT: uxt -define i32 @trunc_i16_i9_switch(%class.ae* %this) { -entry: - %call = tail call %class.x* @_ZNK2ae2afEv(%class.ae* %this) - %call2 = tail call %class.v* @_ZN1x2acEv(%class.x* %call) - %0 = getelementptr inbounds %class.v, %class.v* %call2, i32 0, i32 0, i32 0 - %1 = load i16, i16* %0, align 2 - %2 = trunc i16 %1 to i9 - %trunc = and i9 %2, -64 - switch i9 %trunc, label %cleanup.fold.split [ - i9 0, label %cleanup - i9 -256, label %if.then7 - ] - -if.then7: - %3 = and i16 %1, 7 - %tobool = icmp eq i16 %3, 0 - %cond = select i1 %tobool, i32 2, i32 1 - br label %cleanup - -cleanup.fold.split: - br label %cleanup - -cleanup: - %retval.0 = phi i32 [ %cond, %if.then7 ], [ 0, %entry ], [ 2, %cleanup.fold.split ] - ret i32 %retval.0 -} - -; CHECK-COMMON-LABEL: bitcast_i16 -; CHECK-COMMON-NOT: uxt -define i16 @bitcast_i16(i16 zeroext %arg0, i16 zeroext %arg1) { -entry: - %cast = bitcast i16 12345 to i16 - %add = add nuw i16 %arg0, 1 - %cmp = icmp ule i16 %add, %cast - %res = select i1 %cmp, i16 %arg1, i16 32657 - ret i16 %res -} - -; CHECK-COMMON-LABEL: bitcast_i8 -; CHECK-COMMON-NOT: uxt -define i8 @bitcast_i8(i8 zeroext %arg0, i8 zeroext %arg1) { -entry: - %cast = bitcast i8 127 to i8 - %mul = shl nuw i8 %arg0, 1 - %cmp = icmp uge i8 %mul, %arg1 - %res = select i1 %cmp, i8 %cast, i8 128 - ret i8 %res -} - -; CHECK-COMMON-LABEL: bitcast_i16_minus -; CHECK-COMMON-NOT: uxt -define i16 @bitcast_i16_minus(i16 zeroext %arg0, i16 zeroext %arg1) { -entry: - %cast = bitcast i16 -12345 to i16 - %xor = xor i16 %arg0, 7 - %cmp = icmp eq i16 %xor, %arg1 - %res = select i1 %cmp, i16 %cast, i16 32657 - ret i16 %res -} - -; CHECK-COMMON-LABEL: bitcast_i8_minus -; CHECK-COMMON-NOT: uxt -define i8 @bitcast_i8_minus(i8 zeroext %arg0, i8 zeroext %arg1) { -entry: - %cast = bitcast i8 -127 to i8 - %and = and i8 %arg0, 3 - %cmp = icmp ne i8 %and, %arg1 - %res = select i1 %cmp, i8 %cast, i8 128 - ret i8 %res -} - -declare %class.x* @_ZNK2ae2afEv(%class.ae*) local_unnamed_addr -declare %class.v* @_ZN1x2acEv(%class.x*) local_unnamed_addr -declare i32 @dummy(i32, i32) - -@d_uch = hidden local_unnamed_addr global [16 x i8] zeroinitializer, align 1 -@sh1 = hidden local_unnamed_addr global i16 0, align 2 -@d_sh = hidden local_unnamed_addr global [16 x i16] zeroinitializer, align 2 Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-icmps.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-icmps.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-icmps.ll @@ -1,312 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; CHECK-COMMON-LABEL: test_ult_254_inc_imm: -; CHECK-DSP: adds r0, #1 -; CHECK-DSP-NEXT: uxtb r1, r0 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 - -; CHECK-DSP-IMM: movs r1, #1 -; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r0, #47 -; CHECK-DSP-IMM-NEXT: cmp r1, #254 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, #35 -define i32 @test_ult_254_inc_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, 1 - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_slt_254_inc_imm -; CHECK-COMMON: adds -; CHECK-COMMON: sxtb -define i32 @test_slt_254_inc_imm(i8 signext %x) { -entry: - %add = add i8 %x, 1 - %cmp = icmp slt i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ult_254_inc_var: -; CHECK-NODSP: add r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #254 -; CHECK-NODSP-NEXT: it lo -; CHECK-NODSP-NEXT: movlo r0, #35 - -; CHECK-DSP: uadd8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 -define i32 @test_ult_254_inc_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %add = add i8 %x, %y - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sle_254_inc_var -; CHECK-COMMON: add -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sle_254_inc_var(i8 %x, i8 %y) { -entry: - %add = add i8 %x, %y - %cmp = icmp sle i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ugt_1_dec_imm: -; CHECK-COMMON: subs r1, r0, #1 -; CHECK-COMMON-NEXT: movs r0, #47 -; CHECK-COMMON-NEXT: cmp r1, #1 -; CHECK-COMMON-NEXT: it hi -; CHECK-COMMON-NEXT: movhi r0, #35 -define i32 @test_ugt_1_dec_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp ugt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sgt_1_dec_imm -; CHECK-COMMON: subs -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sgt_1_dec_imm(i8 %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp sgt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_ugt_1_dec_var: -; CHECK-NODSP: subs r0, r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #1 -; CHECK-NODSP-NEXT: it hi -; CHECK-NODSP-NEXT: movhi r0, #35 - -; CHECK-DSP: usub8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r0, #35 -define i32 @test_ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp ugt i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: test_sge_1_dec_var -; CHECK-COMMON: sub -; CHECK-COMMON: sxtb -; CHECK-COMMON: cmp -define i32 @test_sge_1_dec_var(i8 %x, i8 %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp sge i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: dsp_imm1: -; CHECK-DSP: eors r1, r0 -; CHECK-DSP-NEXT: and r0, r0, #7 -; CHECK-DSP-NEXT: subs r0, r0, r1 -; CHECK-DSP-NEXT: adds r0, #1 -; CHECK-DSP-NEXT: uxtb r1, r0 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #254 -; CHECK-DSP-NEXT: it lo -; CHECK-DSP-NEXT: movlo r0, #35 - -; CHECK-DSP-IMM: eors r1, r0 -; CHECK-DSP-IMM-NEXT: and r0, r0, #7 -; CHECK-DSP-IMM-NEXT: usub8 r0, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r1, #1 -; CHECK-DSP-IMM-NEXT: uadd8 r1, r0, r1 -; CHECK-DSP-IMM-NEXT: movs r0, #47 -; CHECK-DSP-IMM-NEXT: cmp r1, #254 -; CHECK-DSP-IMM-NEXT: it lo -; CHECK-DSP-IMM-NEXT: movlo r0, #35 -define i32 @dsp_imm1(i8 zeroext %x, i8 zeroext %y) { -entry: - %xor = xor i8 %x, %y - %and = and i8 %x, 7 - %sub = sub i8 %and, %xor - %add = add i8 %sub, 1 - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: dsp_var: -; CHECK-COMMON: eors r1, r0 -; CHECK-COMMON: and r2, r0, #7 -; CHECK-NODSP: subs r1, r2, r1 -; CHECK-NODSP: add.w r0, r1, r0, lsl #1 -; CHECK-NODSP: uxtb r1, r0 -; CHECK-DSP: usub8 r1, r2, r1 -; CHECK-DSP: lsls r0, r0, #1 -; CHECK-DSP: uadd8 r1, r1, r0 -; CHECK-DSP-NOT: uxt -; CHECK-COMMON: movs r0, #47 -; CHECK-COMMON: cmp r1, #254 -; CHECK-COMMON: it lo -; CHECK-COMMON: movlo r0, #35 -define i32 @dsp_var(i8 zeroext %x, i8 zeroext %y) { - %xor = xor i8 %x, %y - %and = and i8 %x, 7 - %sub = sub i8 %and, %xor - %mul = shl nuw i8 %x, 1 - %add = add i8 %sub, %mul - %cmp = icmp ult i8 %add, 254 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: store_dsp_res -; CHECK-DSP: usub8 -; CHECK-DSP: strb -define void @store_dsp_res(i8* %in, i8* %out, i8 %compare) { - %first = getelementptr inbounds i8, i8* %in, i32 0 - %second = getelementptr inbounds i8, i8* %in, i32 1 - %ld0 = load i8, i8* %first - %ld1 = load i8, i8* %second - %xor = xor i8 %ld0, -1 - %cmp = icmp ult i8 %compare, %ld1 - %select = select i1 %cmp, i8 %compare, i8 %xor - %sub = sub i8 %ld0, %select - store i8 %sub, i8* %out, align 1 - ret void -} - -; CHECK-COMMON-LABEL: ugt_1_dec_imm: -; CHECK-COMMON: subs r1, r0, #1 -; CHECK-COMMON-NEXT: movs r0, #47 -; CHECK-COMMON-NEXT: cmp r1, #1 -; CHECK-COMMON-NEXT: it hi -; CHECK-COMMON-NEXT: movhi r0, #35 -define i32 @ugt_1_dec_imm(i8 zeroext %x) { -entry: - %add = add i8 %x, -1 - %cmp = icmp ugt i8 %add, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: ugt_1_dec_var: -; CHECK-NODSP: subs r0, r0, r1 -; CHECK-NODSP-NEXT: uxtb r1, r0 -; CHECK-NODSP-NEXT: movs r0, #47 -; CHECK-NODSP-NEXT: cmp r1, #1 -; CHECK-NODSP-NEXT: it hi -; CHECK-NODSP-NEXT: movhi r0, #35 - -; CHECK-DSP: usub8 r1, r0, r1 -; CHECK-DSP-NEXT: movs r0, #47 -; CHECK-DSP-NEXT: cmp r1, #1 -; CHECK-DSP-NEXT: it hi -; CHECK-DSP-NEXT: movhi r0, #35 -define i32 @ugt_1_dec_var(i8 zeroext %x, i8 zeroext %y) { -entry: - %sub = sub i8 %x, %y - %cmp = icmp ugt i8 %sub, 1 - %res = select i1 %cmp, i32 35, i32 47 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_eq_minus_one -; CHECK-COMMON: cmp r0, #255 -define i32 @icmp_eq_minus_one(i8* %ptr) { - %load = load i8, i8* %ptr, align 1 - %conv = zext i8 %load to i32 - %cmp = icmp eq i8 %load, -1 - %ret = select i1 %cmp, i32 %conv, i32 -1 - ret i32 %ret -} - -; CHECK-COMMON-LABEL: icmp_not -; CHECK-COMMON: movw r2, #65535 -; CHECK-COMMON: eors r2, r0 -; CHECK-COMMON: movs r0, #32 -; CHECK-COMMON: cmp r2, r1 -define i32 @icmp_not(i16 zeroext %arg0, i16 zeroext %arg1) { - %not = xor i16 %arg0, -1 - %cmp = icmp eq i16 %not, %arg1 - %res = select i1 %cmp, i32 16, i32 32 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i1 -; CHECK-NOT: uxt -define i32 @icmp_i1(i1* %arg0, i1 zeroext %arg1, i32 %a, i32 %b) { -entry: - %load = load i1, i1* %arg0 - %not = xor i1 %load, 1 - %cmp = icmp eq i1 %arg1, %not - %res = select i1 %cmp, i32 %a, i32 %b - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i7 -; CHECK-COMMON: ldrb -; CHECK-COMMON: cmp -define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) { -entry: - %load = load i7, i7* %arg0 - %add = add nuw i7 %load, 1 - %cmp = icmp ult i7 %arg1, %add - %res = select i1 %cmp, i32 %a, i32 %b - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_i15 -; CHECK-COMMON: movw [[MINUS_ONE:r[0-9]+]], #32767 -define i32 @icmp_i15(i15 zeroext %arg0, i15 zeroext %arg1) { - %xor = xor i15 %arg0, -1 - %cmp = icmp eq i15 %xor, %arg1 - %res = select i1 %cmp, i32 21, i32 42 - ret i32 %res -} - -; CHECK-COMMON-LABEL: icmp_minus_imm -; CHECK-NODSP: subs [[SUB:r[0-9]+]], -; CHECK-NODSP: uxtb [[UXT:r[0-9]+]], -; CHECK-NODSP: cmp [[UXT]], #251 - -; CHECK-DSP: subs [[SUB:r[0-9]+]], -; CHECK-DSP: uxtb [[UXT:r[0-9]+]], -; CHECK-DSP: cmp [[UXT]], #251 - -; CHECK-DSP-IMM: ldrb [[A:r[0-9]+]], -; CHECK-DSP-IMM: movs [[MINUS_7:r[0-9]+]], #249 -; CHECK-DSP-IMM: uadd8 [[RES:r[0-9]+]], [[A]], [[MINUS_7]] -; CHECK-DSP-IMM: cmp [[RES]], #251 -define i32 @icmp_minus_imm(i8* %a) { -entry: - %0 = load i8, i8* %a, align 1 - %add.i = add i8 %0, -7 - %cmp = icmp ugt i8 %add.i, -5 - %conv1 = zext i1 %cmp to i32 - ret i32 %conv1 -} - Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-overflow.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-overflow.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-overflow.ll @@ -1,180 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 %s -arm-disable-cgp=false -o - | FileCheck %s - -; CHECK: overflow_add -; CHECK: add -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { - %add = add i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_sub -; CHECK: sub -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { - %add = sub i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_mul -; CHECK: mul -; CHECK: uxth -; CHECK: cmp -define zeroext i16 @overflow_mul(i16 zeroext %a, i16 zeroext %b) { - %add = mul i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_shl -; CHECK-COMMON: lsl -; CHECK-COMMON: uxth -; CHECK-COMMON: cmp -define zeroext i16 @overflow_shl(i16 zeroext %a, i16 zeroext %b) { - %add = shl i16 %a, %b - %or = or i16 %add, 1 - %cmp = icmp ugt i16 %or, 1024 - %res = select i1 %cmp, i16 2, i16 5 - ret i16 %res -} - -; CHECK-LABEL: overflow_add_no_consts: -; CHECK: add r0, r1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], r2 -; CHECK: movhi r0, #8 -define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) { - %add = add i8 %a, %b - %cmp = icmp ugt i8 %add, %limit - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_add_const_limit: -; CHECK: add r0, r1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { - %add = add i8 %a, %b - %cmp = icmp ugt i8 %add, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_add_positive_const_limit: -; CHECK: adds r0, #1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { - %add = add i8 %a, 1 - %cmp = icmp ugt i8 %add, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: unsafe_add_underflow: -; CHECK: subs r0, #2 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #255 -; CHECK: moveq r0, #8 -define i32 @unsafe_add_underflow(i8 zeroext %a) { - %add = add i8 %a, -2 - %cmp = icmp ugt i8 %add, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_add_underflow: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #254 -; CHECK: movhi r0, #8 -define i32 @safe_add_underflow(i8 zeroext %a) { - %add = add i8 %a, -1 - %cmp = icmp ugt i8 %add, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_add_underflow_neg: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #2 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #251 -; CHECK: movlo r0, #8 -define i32 @safe_add_underflow_neg(i8 zeroext %a) { - %add = add i8 %a, -2 - %cmp = icmp ule i8 %add, -6 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: overflow_sub_negative_const_limit: -; CHECK: adds r0, #1 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #128 -; CHECK: movhi r0, #8 -define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { - %sub = sub i8 %a, -1 - %cmp = icmp ugt i8 %sub, 128 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: unsafe_sub_underflow: -; CHECK: subs r0, #6 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #250 -; CHECK: movhi r0, #8 -define i32 @unsafe_sub_underflow(i8 zeroext %a) { - %sub = sub i8 %a, 6 - %cmp = icmp ugt i8 %sub, 250 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_sub_underflow: -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #1 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #255 -; CHECK: movlo r0, #8 -define i32 @safe_sub_underflow(i8 zeroext %a) { - %sub = sub i8 %a, 1 - %cmp = icmp ule i8 %sub, 254 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK-LABEL: safe_sub_underflow_neg -; CHECK: subs [[MINUS_1:r[0-9]+]], r0, #4 -; CHECK-NOT: uxtb -; CHECK: cmp [[MINUS_1]], #250 -; CHECK: movhi r0, #8 -define i32 @safe_sub_underflow_neg(i8 zeroext %a) { - %sub = sub i8 %a, 4 - %cmp = icmp uge i8 %sub, -5 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} - -; CHECK: subs r0, #4 -; CHECK: uxtb [[EXT:r[0-9]+]], r0 -; CHECK: cmp [[EXT]], #253 -; CHECK: movlo r0, #8 -define i32 @unsafe_sub_underflow_neg(i8 zeroext %a) { - %sub = sub i8 %a, 4 - %cmp = icmp ult i8 %sub, -3 - %res = select i1 %cmp, i32 8, i32 16 - ret i32 %res -} Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll @@ -1,174 +0,0 @@ -; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; Test that ARMCodeGenPrepare can handle: -; - loops -; - call operands -; - call return values -; - ret instructions -; We use nuw on the arithmetic instructions to avoid complications. - -; Check that the arguments are extended but then nothing else is. -; This also ensures that the pass can handle loops. -; CHECK-COMMON-LABEL: phi_feeding_phi_args -; CHECK-COMMON: uxtb -; CHECK-COMMON: uxtb -; CHECK-NOT: uxtb -define void @phi_feeding_phi_args(i8 %a, i8 %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Same as above, but as the args are zeroext, we shouldn't see any uxts. -; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args -; CHECK-COMMON-NOT: uxt -define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Just check that phis also work with i16s. -; CHECK-COMMON-LABEL: phi_i16: -; CHECK-COMMON-NOT: uxt -define void @phi_i16() { -entry: - br label %loop - -loop: - %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i16 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i16 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i16 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i16 %inc2, 253 - br i1 %cmp1, label %loop, label %exit - -exit: - ret void -} - -; CHECK-COMMON-LABEL: ret_i8 -; CHECK-COMMON-NOT: uxt -define i8 @ret_i8() { -entry: - br label %loop - -loop: - %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i8 %inc2, 253 - br i1 %cmp1, label %exit, label %loop - -exit: - ret i8 %inc2 -} - -; CHECK-COMMON-LABEL: phi_multiple_undefs -; CHECK-COMMON-NOT: uxt -define i16 @phi_multiple_undefs(i16 zeroext %arg) { -entry: - br label %loop - -loop: - %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i16 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i16 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i16 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] - %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ] - %cmp1 = icmp ult i16 %inc2, 253 - br i1 %cmp1, label %loop, label %exit - -exit: - ret i16 %unrelated -} Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll @@ -1,135 +0,0 @@ -; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s - -; CHECK-LABEL: phi_pointers -; CHECK-NOT: uxt -define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { -entry: - %add = add nuw i8 %M, 1 - %and = and i8 %add, 1 - %cmp = icmp ugt i8 %add, %N - %base = select i1 %cmp, i16* %a, i16* %b - %other = select i1 %cmp, i16* %b, i16* %b - br label %loop - -loop: - %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ] - %idx = phi i8 [ %and, %entry ], [ %inc, %loop ] - %load = load i16, i16* %ptr, align 2 - %inc = add nuw nsw i8 %idx, 1 - %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc - %cond = icmp eq i16* %gep, %other - br i1 %cond, label %exit, label %loop - -exit: - ret void -} - -; CHECK-LABEL: phi_pointers_null -; CHECK-NOT: uxt -define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { -entry: - %add = add nuw i8 %M, 1 - %and = and i8 %add, 1 - %cmp = icmp ugt i8 %add, %N - %base = select i1 %cmp, i16* %a, i16* %b - %other = select i1 %cmp, i16* %b, i16* %b - %cmp.1 = icmp eq i16* %base, %other - br i1 %cmp.1, label %fail, label %loop - -fail: - br label %loop - -loop: - %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ] - %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ] - %undef = icmp eq i16* %ptr, undef - br i1 %undef, label %exit, label %if.then - -if.then: - %load = load i16, i16* %ptr, align 2 - %inc = add nuw nsw i8 %idx, 1 - %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc - %cond = icmp eq i16* %gep, %other - br i1 %cond, label %exit, label %loop - -exit: - ret void -} - -declare i8 @do_something_with_ptr(i8, i16*) - -; CHECK-LABEL: call_pointer -; CHECK-NOT: uxt -define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) { - %or = or i8 %x, %y - %shr = lshr i8 %or, 1 - %add = add nuw i8 %shr, 2 - %cmp = icmp ne i8 %add, 0 - %ptr = select i1 %cmp, i16* %a, i16* %b - %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr) - ret i8 %call -} - -; CHECK-LABEL: pointer_to_pointer -; CHECK-NOT: uxt -define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) { -entry: - %addr = load i16*, i16** %arg - %val = load i16, i16* %addr - %add = add nuw i16 %val, 7 - %cmp = icmp ult i16 %add, 256 - %res = select i1 %cmp, i16 128, i16 255 - ret i16 %res -} - -; CHECK-LABEL: gep_2d_array -; CHECK-NOT: uxt -define i8 @gep_2d_array(i8** %a, i8 zeroext %arg) { -entry: - %arrayidx.us = getelementptr inbounds i8*, i8** %a, i32 0 - %0 = load i8*, i8** %arrayidx.us, align 4 - %1 = load i8, i8* %0, align 1 - %sub = sub nuw i8 %1, 1 - %cmp = icmp ult i8 %sub, %arg - %res = select i1 %cmp, i8 27, i8 54 - ret i8 %res -} - -; CHECK-LABEL: gep_2d_array_loop -; CHECK-NOT: uxt -define void @gep_2d_array_loop(i16** nocapture readonly %a, i16** nocapture readonly %b, i32 %N) { -entry: - %cmp30 = icmp eq i32 %N, 0 - br i1 %cmp30, label %for.cond.cleanup, label %for.cond1.preheader.us - -for.cond1.preheader.us: - %y.031.us = phi i32 [ %inc13.us, %for.cond1.for.cond.cleanup3_crit_edge.us ], [ 0, %entry ] - br label %for.body4.us - -for.body4.us: - %x.029.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ] - %arrayidx.us = getelementptr inbounds i16*, i16** %a, i32 %x.029.us - %0 = load i16*, i16** %arrayidx.us, align 4 - %arrayidx5.us = getelementptr inbounds i16, i16* %0, i32 %y.031.us - %1 = load i16, i16* %arrayidx5.us, align 2 - %dec.us = add nuw i16 %1, -1 - %cmp6.us = icmp ult i16 %dec.us, 16383 - %shl.us = shl nuw i16 %dec.us, 2 - %spec.select.us = select i1 %cmp6.us, i16 %shl.us, i16 %dec.us - %arrayidx10.us = getelementptr inbounds i16*, i16** %b, i32 %x.029.us - %2 = load i16*, i16** %arrayidx10.us, align 4 - %arrayidx11.us = getelementptr inbounds i16, i16* %2, i32 %y.031.us - store i16 %spec.select.us, i16* %arrayidx11.us, align 2 - %inc.us = add nuw i32 %x.029.us, 1 - %exitcond = icmp eq i32 %inc.us, %N - br i1 %exitcond, label %for.cond1.for.cond.cleanup3_crit_edge.us, label %for.body4.us - -for.cond1.for.cond.cleanup3_crit_edge.us: - %inc13.us = add nuw i32 %y.031.us, 1 - %exitcond32 = icmp eq i32 %inc13.us, %N - br i1 %exitcond32, label %for.cond.cleanup, label %for.cond1.preheader.us - -for.cond.cleanup: - ret void -} Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-signed-icmps.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-signed-icmps.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-signed-icmps.ll @@ -1,109 +0,0 @@ -; RUN: llc -mtriple=thumbv8m.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; CHECK-COMMON-LABEL: eq_sgt -; CHECK-NODSP: add -; CHECK-NODSP: uxtb -; CHECK-NODSP: sxtb -; CHECK-NODSP: cmp -; CHECK-NODSP: sub -; CHECK-NODSP: sxtb -; CHECK-NODSP: cmp - -; CHECK-DSP: add -; CHECK-DSP: uxtb -; CHECK-DSP: cmp -; CHECK-DSP: sxtb -; CHECK-DSP: sub -; CHECK-DSP: sxtb -; CHECK-DSP: cmp - -; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]], -; CHECK-DSP-IMM: cmp [[ADD]], -; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]] -; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]], -; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]] -; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]] -define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) { -entry: - %load0 = load i8, i8* %x, align 1 - %load1 = load i8, i8* %y, align 1 - %add = add i8 %load0, %z - %sub = sub i8 %load1, 1 - %cmp = icmp eq i8 %add, 200 - %cmp1 = icmp sgt i8 %sub, %add - %res0 = select i1 %cmp, i8 35, i8 47 - %res1 = select i1 %cmp1, i8 %res0, i8 %sub - ret i8 %res1 -} - -; CHECK-COMMON-LABEL: ugt_slt -; CHECK-NODSP: sub -; CHECK-NODSP: sxth -; CHECK-NODSP: uxth -; CHECK-NODSP: add -; CHECK-NODSP: sxth -; CHECK-NODSP: cmp -; CHECK-NODSP: cmp - -; CHECK-DSP: sub -; CHECK-DSP: sxth -; CHECK-DSP: add -; CHECK-DSP: uxth -; CHECK-DSP: sxth -; CHECK-DSP: cmp -; CHECK-DSP: cmp - -; CHECK-DSP-IMM: sxth [[ARG:r[0-9]+]], r2 -; CHECK-DSP-IMM: uadd16 [[ADD:r[0-9]+]], -; CHECK-DSP-IMM: sxth.w [[SEXT:r[0-9]+]], [[ADD]] -; CHECK-DSP-IMM: cmp [[SEXT]], [[ARG]] -; CHECK-DSP-IMM-NOT: uxt -; CHECK-DSP-IMM: movs [[ONE:r[0-9]+]], #1 -; CHECK-DSP-IMM: usub16 [[SUB:r[0-9]+]], r1, [[ONE]] -; CHECK-DSP-IMM: cmp [[SUB]], r2 -define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) { -entry: - %load0 = load i16, i16* %x, align 1 - %add = add i16 %load0, %z - %sub = sub i16 %y, 1 - %cmp = icmp slt i16 %add, %z - %cmp1 = icmp ugt i16 %sub, %z - %res0 = select i1 %cmp, i16 35, i16 -1 - %res1 = select i1 %cmp1, i16 %res0, i16 0 - ret i16 %res1 -} - -; CHECK-COMMON-LABEL: urem_trunc_icmps -; CHECK-COMMON-NOT: uxt -; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]], -; CHECK-COMMON: cmp [[SEXT]], #7 -define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { -entry: - %ptr = load i16*, i16** %in, align 4 - %ld = load i16, i16* %ptr, align 2 - %cmp.i = icmp eq i16 %ld, 0 - br i1 %cmp.i, label %exit, label %cond.false.i - -cond.false.i: - %rem = urem i16 5, %ld - %extract.t = trunc i16 %rem to i8 - br label %body - -body: - %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] - %cmp = icmp sgt i8 %cond.in.i.off0, 7 - %conv5 = zext i1 %cmp to i32 - store i32 %conv5, i32* %g, align 4 - %.pr = load i32, i32* %k, align 4 - %tobool13150 = icmp eq i32 %.pr, 0 - br i1 %tobool13150, label %for.inc, label %exit - -for.inc: - %add = add nuw i8 %cond.in.i.off0, 1 - br label %body - -exit: - ret void -} Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-signed.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-signed.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-signed.ll @@ -1,45 +0,0 @@ -; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s -; RUN: llc -mtriple=thumbv7 %s -arm-disable-cgp=false -o - | FileCheck %s -; RUN: llc -mtriple=armv8 %s -arm-disable-cgp=false -o - | FileCheck %s - -; Test to check that ARMCodeGenPrepare doesn't optimised away sign extends. -; CHECK-LABEL: test_signed_load: -; CHECK: uxth -define i16 @test_signed_load(i16* %ptr) { - %load = load i16, i16* %ptr - %conv0 = zext i16 %load to i32 - %conv1 = sext i16 %load to i32 - %cmp = icmp eq i32 %conv0, %conv1 - %conv2 = zext i1 %cmp to i16 - ret i16 %conv2 -} - -; Don't allow sign bit generating opcodes. -; CHECK-LABEL: test_ashr: -; CHECK: sxth -define i16 @test_ashr(i16 zeroext %arg) { - %ashr = ashr i16 %arg, 1 - %cmp = icmp eq i16 %ashr, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: test_sdiv: -; CHECK: sxth -define i16 @test_sdiv(i16 zeroext %arg) { - %sdiv = sdiv i16 %arg, 2 - %cmp = icmp ne i16 %sdiv, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; CHECK-LABEL: test_srem -; CHECK: sxth -define i16 @test_srem(i16 zeroext %arg) { - %srem = srem i16 %arg, 4 - %cmp = icmp ne i16 %srem, 0 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} -