Index: llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ llvm/trunk/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -127,7 +127,9 @@ static bool isSupportedType(Value *V) { LLVM_DEBUG(dbgs() << "ARM CGP: isSupportedType: " << *V << "\n"); Type *Ty = V->getType(); - if (Ty->isVoidTy()) + + // Allow voids and pointers, these won't be promoted. + if (Ty->isVoidTy() || Ty->isPointerTy()) return true; if (auto *Ld = dyn_cast(V)) @@ -150,6 +152,8 @@ /// Many arguments will have the zeroext attribute too, so those would be free /// too. static bool isSource(Value *V) { + if (!isa(V->getType())) + return false; // TODO Allow truncs and zext to be sources. if (isa(V)) return true; @@ -222,8 +226,10 @@ } static bool shouldPromote(Value *V) { - if (!isa(V->getType()) || isSink(V)) + if (!isa(V->getType()) || isSink(V)) { + LLVM_DEBUG(dbgs() << "ARM CGP: Don't need to promote: " << *V << "\n"); return false; + } if (isSource(V)) return true; @@ -369,21 +375,19 @@ if (Leaves.count(V)) continue; - if (!isa(V)) - continue; - auto *I = cast(V); if (Roots.count(I)) continue; - for (auto &U : I->operands()) { - if ((U->getType() == ExtTy) || !isSupportedType(&*U)) + for (unsigned i = 0, e = I->getNumOperands(); i < e; ++i) { + Value *Op = I->getOperand(i); + if ((Op->getType() == ExtTy) || !isa(Op->getType())) continue; - if (auto *Const = dyn_cast(&*U)) + if (auto *Const = dyn_cast(Op)) FixConst(Const, I); - else if (isa(&*U)) - U->mutateType(ExtTy); + else if (isa(Op)) + I->setOperand(i, UndefValue::get(ExtTy)); } if (shouldPromote(I)) { @@ -398,9 +402,6 @@ if (Leaves.count(V)) continue; - if (!isa(V)) - continue; - if (!shouldPromote(V) || isPromotedResultSafe(V)) continue; @@ -424,6 +425,9 @@ for (unsigned i = 0; i < I->getNumOperands(); ++i) { Value *V = I->getOperand(i); + if (!isa(V->getType())) + continue; + if (Promoted.count(V) || NewInsts.count(V)) { if (auto *Op = dyn_cast(V)) { @@ -466,7 +470,7 @@ return true; // Non-instruction values that we can handle. - if (isa(V) || isa(V)) + if ((isa(V) && !isa(V)) || isa(V)) return isSupportedType(V); if (isa(V) || isa(V) || isa(V) || @@ -558,10 +562,6 @@ if (CurrentVisited.count(V)) return true; - // Ignore pointer value that aren't instructions. - if (!isa(V) && isa(V->getType())) - return true; - if (!isSupportedValue(V) || (shouldPromote(V) && !isLegalToPromote(V))) { LLVM_DEBUG(dbgs() << "ARM CGP: Can't handle: " << *V << "\n"); return false; @@ -578,6 +578,7 @@ if (CurrentVisited.count(V)) continue; + // Ignore non-instructions, other than arguments. if (!isa(V) && !isSource(V)) continue; @@ -620,6 +621,17 @@ for (auto *I : CurrentVisited) I->dump(); ); + unsigned ToPromote = 0; + for (auto *V : CurrentVisited) { + if (Leaves.count(V)) + continue; + if (Roots.count(cast(V))) + continue; + ++ToPromote; + } + + if (ToPromote < 2) + return false; Promoter->Mutate(OrigTy, CurrentVisited, Leaves, Roots); return true; Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-calls.ll @@ -0,0 +1,184 @@ +; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s + +; Check that the pass doesn't try to promote the immediate parameters. +; CHECK-COMMON-LABEL: call_with_imms +; CHECK-COMMON-NOT: uxt +define i8 @call_with_imms(i8* %arg) { + %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0) + %cmp = icmp eq i8 %call, 0 + %res = select i1 %cmp, i8 %call, i8 1 + ret i8 %res +} + +; Test that the call result is still extended. +; CHECK-COMMON-LABEL: test_call: +; CHECK-COMMON: bl +; CHECK-COMMONNEXT: sxtb r1, r0 +define i16 @test_call(i8 zeroext %arg) { + %call = call i8 @dummy_i8(i8 %arg) + %cmp = icmp ult i8 %call, 128 + %conv = zext i1 %cmp to i16 + ret i16 %conv +} + +; Test that the transformation bails when it finds that i16 is larger than i8. +; TODO: We should be able to remove the uxtb in these cases. +; CHECK-LABEL: promote_i8_sink_i16_1 +; CHECK-COMMON: bl dummy_i8 +; CHECK-COMMON: adds r0, #1 +; CHECK-COMMON: uxtb r0, r0 +; CHECK-COMMON: cmp r0 +define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { + %call = tail call zeroext i8 @dummy_i8(i8 %arg0) + %add = add nuw i8 %call, 1 + %conv = zext i8 %add to i16 + %cmp = icmp ne i16 %conv, %arg1 + %sel = select i1 %cmp, i16 %arg1, i16 %arg2 + %res = tail call zeroext i16 @dummy3(i16 %sel) + ret i16 %res +} + +; CHECK-COMMON-LABEL: promote_i8_sink_i16_2 +; CHECK-COMMON: bl dummy_i8 +; CHECK-COMMON: adds r0, #1 +; CHECK-COMMON: uxtb r0, r0 +; CHECK-COMMON: cmp r0 +define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) { + %call = tail call zeroext i8 @dummy_i8(i8 %arg0) + %add = add nuw i8 %call, 1 + %cmp = icmp ne i8 %add, %arg1 + %conv = zext i8 %arg1 to i16 + %sel = select i1 %cmp, i16 %conv, i16 %arg2 + %res = tail call zeroext i16 @dummy3(i16 %sel) + ret i16 %res +} + +@uc = global i8 42, align 1 +@LL = global i64 0, align 8 + +; CHECK-COMMON-LABEL: zext_i64 +; CHECK-COMMON: ldrb +; CHECK-COMMON: strd +define void @zext_i64() { +entry: + %0 = load i8, i8* @uc, align 1 + %conv = zext i8 %0 to i64 + store i64 %conv, i64* @LL, align 8 + %cmp = icmp eq i8 %0, 42 + %conv1 = zext i1 %cmp to i32 + %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1) + ret void +} + +@a = global i16* null, align 4 +@b = global i32 0, align 4 + +; CHECK-COMMON-LABEL: constexpr +; CHECK-COMMON: uxth +define i32 @constexpr() { +entry: + store i32 ptrtoint (i32* @b to i32), i32* @b, align 4 + %0 = load i16*, i16** @a, align 4 + %1 = load i16, i16* %0, align 2 + %or = or i16 %1, ptrtoint (i32* @b to i16) + store i16 %or, i16* %0, align 2 + %cmp = icmp ne i16 %or, 4 + %conv3 = zext i1 %cmp to i32 + %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2 + ret i32 undef +} + +; The call to safe_lshift_func takes two parameters, but they're the same value just one is zext. +; The transform won't happen because of the zext. +; CHECK-COMMON-LABEL: call_zext_i8_i32 +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: cmp +; CHECK-COMMON: uxtb +define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) { +for.cond8.preheader: + %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef) + %tobool219 = icmp eq i8 %call217, 0 + br i1 %tobool219, label %for.end411, label %for.cond273.preheader + +for.cond273.preheader: ; preds = %for.cond8.preheader + %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ] + %conv218.le = zext i8 %call217.lcssa to i32 + %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le) + unreachable + +for.end411: ; preds = %for.cond8.preheader + %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4) + unreachable +} + +%struct.anon = type { i32 } + +@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 +@g_82 = hidden local_unnamed_addr global i32 0, align 4 + +; Test that the transform bails on finding %conv4, a trunc +; CHECK-COMMON-LABEL: call_return_pointer +; CHECK-COMMON: sxth +; CHECK-COMMON-NOT: uxt +define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 { +entry: + %conv1 = zext i8 %p_13 to i16 + %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef) + %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4 + %conv2 = trunc i32 %0 to i16 + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ] + %tobool = icmp eq i8 %p_13.addr.0, 0 + br i1 %tobool, label %for.cond.backedge, label %if.then + +for.cond.backedge: ; preds = %for.cond, %if.then + %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ] + br label %for.cond + +if.then: ; preds = %for.cond + %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2) + %conv4 = trunc i16 %call3 to i8 + br label %for.cond.backedge +} + +; Transform will bail because of the zext +; Check that d.sroa.0.0.be is promoted passed directly into the tail call. +; CHECK-COMMON-LABEL: check_zext_phi_call_arg +; CHECK-COMMON: uxt +; CHECK-COMMON: uxt +define i32 @check_zext_phi_call_arg() { +entry: + br label %for.cond + +for.cond: ; preds = %for.cond.backedge, %entry + %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ] + %tobool = icmp eq i16 %d.sroa.0.0, 0 + br i1 %tobool, label %for.cond.backedge, label %if.then + +for.cond.backedge: ; preds = %for.cond, %if.then + %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ] + br label %for.cond + +if.then: ; preds = %for.cond + %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32 + %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2 + br label %for.cond.backedge +} + +declare i32 @assert(...) +declare i8 @dummy_i8(i8) +declare i8 @dummy2(i8*, i8, i8) +declare i16 @dummy3(i16) + +declare dso_local i32 @e(...) local_unnamed_addr #1 +declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 + +declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66) +declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2) +declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64) +declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32) +declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext) Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-calls-ret.ll @@ -1,323 +0,0 @@ -; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP -; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP -; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM - -; Test that ARMCodeGenPrepare can handle: -; - loops -; - call operands -; - call return values -; - ret instructions -; We use nuw on the arithmetic instructions to avoid complications. - -; Check that the arguments are extended but then nothing else is. -; This also ensures that the pass can handle loops. -; CHECK-COMMON-LABEL: phi_feeding_phi_args -; CHECK-COMMON: uxtb -; CHECK-COMMON: uxtb -; CHECK-NOT: uxtb -define void @phi_feeding_phi_args(i8 %a, i8 %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Same as above, but as the args are zeroext, we shouldn't see any uxts. -; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args -; CHECK-COMMON-NOT: uxt -define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) { -entry: - %0 = icmp ugt i8 %a, %b - br i1 %0, label %preheader, label %empty - -empty: - br label %preheader - -preheader: - %1 = phi i8 [ %a, %entry ], [ %b, %empty ] - br label %loop - -loop: - %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 254 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = sub nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = shl nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp eq i8 %inc2, 255 - br i1 %cmp1, label %exit, label %loop - -exit: - ret void -} - -; Just check that phis also work with i16s. -; CHECK-COMMON-LABEL: phi_i16: -; CHECK-COMMON-NOT: uxt -define void @phi_i16() { -entry: - br label %loop - -loop: - %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i16 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i16 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i16 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i16 %inc2, 253 - br i1 %cmp1, label %loop, label %exit - -exit: - ret void -} - -; CHECK-COMMON-LABEL: ret_i8 -; CHECK-COMMON-NOT: uxt -define i8 @ret_i8() { -entry: - br label %loop - -loop: - %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ] - %cmp = icmp ult i8 %val, 128 - br i1 %cmp, label %if.then, label %if.else - -if.then: - %inc = add nuw i8 %val, 2 - br label %if.end - -if.else: - %inc1 = add nuw i8 %val, 1 - br label %if.end - -if.end: - %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] - %cmp1 = icmp ult i8 %inc2, 253 - br i1 %cmp1, label %exit, label %loop - -exit: - ret i8 %inc2 -} - -; Check that the pass doesn't try to promote the immediate parameters. -; CHECK-COMMON-LABEL: call_with_imms -; CHECK-COMMON-NOT: uxt -define i8 @call_with_imms(i8* %arg) { - %call = tail call arm_aapcs_vfpcc zeroext i8 @dummy2(i8* nonnull %arg, i8 zeroext 0, i8 zeroext 0) - %cmp = icmp eq i8 %call, 0 - %res = select i1 %cmp, i8 %call, i8 1 - ret i8 %res -} - -; Test that the call result is still extended. -; CHECK-COMMON-LABEL: test_call: -; CHECK-COMMON: bl -; CHECK-COMMONNEXT: sxtb r1, r0 -define i16 @test_call(i8 zeroext %arg) { - %call = call i8 @dummy_i8(i8 %arg) - %cmp = icmp ult i8 %call, 128 - %conv = zext i1 %cmp to i16 - ret i16 %conv -} - -; Test that the transformation bails when it finds that i16 is larger than i8. -; TODO: We should be able to remove the uxtb in these cases. -; CHECK-LABEL: promote_i8_sink_i16_1 -; CHECK-COMMON: bl dummy_i8 -; CHECK-COMMON: adds r0, #1 -; CHECK-COMMON: uxtb r0, r0 -; CHECK-COMMON: cmp r0 -define i16 @promote_i8_sink_i16_1(i8 zeroext %arg0, i16 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %conv = zext i8 %add to i16 - %cmp = icmp ne i16 %conv, %arg1 - %sel = select i1 %cmp, i16 %arg1, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -; CHECK-COMMON-LABEL: promote_i8_sink_i16_2 -; CHECK-COMMON: bl dummy_i8 -; CHECK-COMMON: adds r0, #1 -; CHECK-COMMON: uxtb r0, r0 -; CHECK-COMMON: cmp r0 -define i16 @promote_i8_sink_i16_2(i8 zeroext %arg0, i8 zeroext %arg1, i16 zeroext %arg2) { - %call = tail call zeroext i8 @dummy_i8(i8 %arg0) - %add = add nuw i8 %call, 1 - %cmp = icmp ne i8 %add, %arg1 - %conv = zext i8 %arg1 to i16 - %sel = select i1 %cmp, i16 %conv, i16 %arg2 - %res = tail call zeroext i16 @dummy3(i16 %sel) - ret i16 %res -} - -@uc = global i8 42, align 1 -@LL = global i64 0, align 8 - -; CHECK-COMMON-LABEL: zext_i64 -; CHECK-COMMON: ldrb -; CHECK-COMMON: strd -define void @zext_i64() { -entry: - %0 = load i8, i8* @uc, align 1 - %conv = zext i8 %0 to i64 - store i64 %conv, i64* @LL, align 8 - %cmp = icmp eq i8 %0, 42 - %conv1 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @assert to i32 (i32)*)(i32 %conv1) - ret void -} - -@a = global i16* null, align 4 -@b = global i32 0, align 4 - -; CHECK-COMMON-LABEL: constexpr -; CHECK-COMMON: uxth -define i32 @constexpr() { -entry: - store i32 ptrtoint (i32* @b to i32), i32* @b, align 4 - %0 = load i16*, i16** @a, align 4 - %1 = load i16, i16* %0, align 2 - %or = or i16 %1, ptrtoint (i32* @b to i16) - store i16 %or, i16* %0, align 2 - %cmp = icmp ne i16 %or, 4 - %conv3 = zext i1 %cmp to i32 - %call = tail call i32 bitcast (i32 (...)* @e to i32 (i32)*)(i32 %conv3) #2 - ret i32 undef -} - -; Transform will bail because of the zext -; Check that d.sroa.0.0.be is promoted passed directly into the tail call. -; CHECK-COMMON-LABEL: check_zext_phi_call_arg -; CHECK-COMMON: uxt -define i32 @check_zext_phi_call_arg() { -entry: - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %d.sroa.0.0 = phi i16 [ 30, %entry ], [ %d.sroa.0.0.be, %for.cond.backedge ] - %tobool = icmp eq i16 %d.sroa.0.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %d.sroa.0.0.be = phi i16 [ %call, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %d.sroa.0.0.insert.ext = zext i16 %d.sroa.0.0 to i32 - %call = tail call zeroext i16 bitcast (i16 (...)* @f to i16 (i32)*)(i32 %d.sroa.0.0.insert.ext) #2 - br label %for.cond.backedge -} - - -; The call to safe_lshift_func takes two parameters, but they're the same value just one is zext. -; CHECK-COMMON-LABEL: call_zext_i8_i32 -define fastcc i32 @call_zext_i8_i32(i32 %p_45, i8 zeroext %p_46) { -for.cond8.preheader: - %call217 = call fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 zeroext undef) - %tobool219 = icmp eq i8 %call217, 0 - br i1 %tobool219, label %for.end411, label %for.cond273.preheader - -for.cond273.preheader: ; preds = %for.cond8.preheader - %call217.lcssa = phi i8 [ %call217, %for.cond8.preheader ] - %conv218.le = zext i8 %call217.lcssa to i32 - %call346 = call fastcc zeroext i8 @safe_lshift_func(i8 zeroext %call217.lcssa, i32 %conv218.le) - unreachable - -for.end411: ; preds = %for.cond8.preheader - %call452 = call fastcc i64 @safe_sub_func_int64_t_s_s(i64 undef, i64 4) - unreachable -} - -%struct.anon = type { i32 } - -@g_57 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_893 = hidden local_unnamed_addr global %struct.anon zeroinitializer, align 4 -@g_82 = hidden local_unnamed_addr global i32 0, align 4 - -; Test that the transform bails on finding a call which returns a i16** -; CHECK-COMMON-LABEL: call_return_pointer -; CHECK-COMMON: sxth -; CHECK-COMMON-NOT: uxt -define hidden i32 @call_return_pointer(i8 zeroext %p_13) local_unnamed_addr #0 { -entry: - %conv1 = zext i8 %p_13 to i16 - %call = tail call i16** @func_62(i8 zeroext undef, i32 undef, i16 signext %conv1, i32* undef) - %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @g_893, i32 0, i32 0), align 4 - %conv2 = trunc i32 %0 to i16 - br label %for.cond - -for.cond: ; preds = %for.cond.backedge, %entry - %p_13.addr.0 = phi i8 [ %p_13, %entry ], [ %p_13.addr.0.be, %for.cond.backedge ] - %tobool = icmp eq i8 %p_13.addr.0, 0 - br i1 %tobool, label %for.cond.backedge, label %if.then - -for.cond.backedge: ; preds = %for.cond, %if.then - %p_13.addr.0.be = phi i8 [ %conv4, %if.then ], [ 0, %for.cond ] - br label %for.cond - -if.then: ; preds = %for.cond - %call3 = tail call fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %conv2) - %conv4 = trunc i16 %call3 to i8 - br label %for.cond.backedge -} - -declare noalias i16** @func_62(i8 zeroext %p_63, i32 %p_64, i16 signext %p_65, i32* nocapture readnone %p_66) -declare fastcc signext i16 @safe_sub_func_int16_t_s_s(i16 signext %si2) -declare dso_local fastcc i64 @safe_sub_func_int64_t_s_s(i64, i64) -declare dso_local fastcc zeroext i8 @safe_lshift_func(i8 zeroext, i32) -declare dso_local fastcc zeroext i8 @safe_mul_func_uint8_t_u_u(i8 returned zeroext) - -declare dso_local i32 @e(...) local_unnamed_addr #1 -declare dso_local zeroext i16 @f(...) local_unnamed_addr #1 - -declare i8 @dummy_i8(i8) -declare i8 @dummy2(i8*, i8, i8) -declare i16 @dummy3(i16) -declare i32 @assert(...) Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-phis-ret.ll @@ -0,0 +1,174 @@ +; RUN: llc -mtriple=thumbv7m -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv8m.main -arm-disable-cgp=false -arm-enable-scalar-dsp=true -mcpu=cortex-m33 %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; Test that ARMCodeGenPrepare can handle: +; - loops +; - call operands +; - call return values +; - ret instructions +; We use nuw on the arithmetic instructions to avoid complications. + +; Check that the arguments are extended but then nothing else is. +; This also ensures that the pass can handle loops. +; CHECK-COMMON-LABEL: phi_feeding_phi_args +; CHECK-COMMON: uxtb +; CHECK-COMMON: uxtb +; CHECK-NOT: uxtb +define void @phi_feeding_phi_args(i8 %a, i8 %b) { +entry: + %0 = icmp ugt i8 %a, %b + br i1 %0, label %preheader, label %empty + +empty: + br label %preheader + +preheader: + %1 = phi i8 [ %a, %entry ], [ %b, %empty ] + br label %loop + +loop: + %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 254 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = sub nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = shl nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp eq i8 %inc2, 255 + br i1 %cmp1, label %exit, label %loop + +exit: + ret void +} + +; Same as above, but as the args are zeroext, we shouldn't see any uxts. +; CHECK-COMMON-LABEL: phi_feeding_phi_zeroext_args +; CHECK-COMMON-NOT: uxt +define void @phi_feeding_phi_zeroext_args(i8 zeroext %a, i8 zeroext %b) { +entry: + %0 = icmp ugt i8 %a, %b + br i1 %0, label %preheader, label %empty + +empty: + br label %preheader + +preheader: + %1 = phi i8 [ %a, %entry ], [ %b, %empty ] + br label %loop + +loop: + %val = phi i8 [ %1, %preheader ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 254 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = sub nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = shl nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp eq i8 %inc2, 255 + br i1 %cmp1, label %exit, label %loop + +exit: + ret void +} + +; Just check that phis also work with i16s. +; CHECK-COMMON-LABEL: phi_i16: +; CHECK-COMMON-NOT: uxt +define void @phi_i16() { +entry: + br label %loop + +loop: + %val = phi i16 [ 0, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i16 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i16 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i16 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp ult i16 %inc2, 253 + br i1 %cmp1, label %loop, label %exit + +exit: + ret void +} + +; CHECK-COMMON-LABEL: ret_i8 +; CHECK-COMMON-NOT: uxt +define i8 @ret_i8() { +entry: + br label %loop + +loop: + %val = phi i8 [ 0, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i8 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i8 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i8 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i8 [ %inc, %if.then], [ %inc1, %if.else ] + %cmp1 = icmp ult i8 %inc2, 253 + br i1 %cmp1, label %exit, label %loop + +exit: + ret i8 %inc2 +} + +; CHECK-COMMON-LABEL: phi_multiple_undefs +; CHECK-COMMON-NOT: uxt +define i16 @phi_multiple_undefs(i16 zeroext %arg) { +entry: + br label %loop + +loop: + %val = phi i16 [ undef, %entry ], [ %inc2, %if.end ] + %cmp = icmp ult i16 %val, 128 + br i1 %cmp, label %if.then, label %if.else + +if.then: + %inc = add nuw i16 %val, 2 + br label %if.end + +if.else: + %inc1 = add nuw i16 %val, 1 + br label %if.end + +if.end: + %inc2 = phi i16 [ %inc, %if.then], [ %inc1, %if.else ] + %unrelated = phi i16 [ undef, %if.then ], [ %arg, %if.else ] + %cmp1 = icmp ult i16 %inc2, 253 + br i1 %cmp1, label %loop, label %exit + +exit: + ret i16 %unrelated +} Index: llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll +++ llvm/trunk/test/CodeGen/ARM/arm-cgp-pointers.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=thumbv8 -arm-disable-cgp=false %s -o - | FileCheck %s +; RUN: llc -mtriple=armv8 -arm-disable-cgp=false %s -o - | FileCheck %s + +; CHECK-LABEL: phi_pointers +; CHECK-NOT: uxt +define void @phi_pointers(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { +entry: + %add = add nuw i8 %M, 1 + %and = and i8 %add, 1 + %cmp = icmp ugt i8 %add, %N + %base = select i1 %cmp, i16* %a, i16* %b + %other = select i1 %cmp, i16* %b, i16* %b + br label %loop + +loop: + %ptr = phi i16* [ %base, %entry ], [ %gep, %loop ] + %idx = phi i8 [ %and, %entry ], [ %inc, %loop ] + %load = load i16, i16* %ptr, align 2 + %inc = add nuw nsw i8 %idx, 1 + %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc + %cond = icmp eq i16* %gep, %other + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; CHECK-LABEL: phi_pointers_null +; CHECK-NOT: uxt +define void @phi_pointers_null(i16* %a, i16* %b, i8 zeroext %M, i8 zeroext %N) { +entry: + %add = add nuw i8 %M, 1 + %and = and i8 %add, 1 + %cmp = icmp ugt i8 %add, %N + %base = select i1 %cmp, i16* %a, i16* %b + %other = select i1 %cmp, i16* %b, i16* %b + %cmp.1 = icmp eq i16* %base, %other + br i1 %cmp.1, label %fail, label %loop + +fail: + br label %loop + +loop: + %ptr = phi i16* [ %base, %entry ], [ null, %fail ], [ %gep, %if.then ] + %idx = phi i8 [ %and, %entry ], [ 0, %fail ], [ %inc, %if.then ] + %undef = icmp eq i16* %ptr, undef + br i1 %undef, label %exit, label %if.then + +if.then: + %load = load i16, i16* %ptr, align 2 + %inc = add nuw nsw i8 %idx, 1 + %gep = getelementptr inbounds i16, i16* %ptr, i8 %inc + %cond = icmp eq i16* %gep, %other + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +declare i8 @do_something_with_ptr(i8, i16*) + +; CHECK-LABEL: call_pointer +; CHECK-NOT: uxt +define i8 @call_pointer(i8 zeroext %x, i8 zeroext %y, i16* %a, i16* %b) { + %or = or i8 %x, %y + %shr = lshr i8 %or, 1 + %add = add nuw i8 %shr, 2 + %cmp = icmp ne i8 %add, 0 + %ptr = select i1 %cmp, i16* %a, i16* %b + %call = tail call zeroext i8 @do_something_with_ptr(i8 %shr, i16* %ptr) + ret i8 %call +} + +; CHECK-LABEL: pointer_to_pointer +; CHECK-NOT: uxt +define i16 @pointer_to_pointer(i16** %arg, i16 zeroext %limit) { +entry: + %addr = load i16*, i16** %arg + %val = load i16, i16* %addr + %add = add nuw i16 %val, 7 + %cmp = icmp ult i16 %add, 256 + %res = select i1 %cmp, i16 128, i16 255 + ret i16 %res +}