Index: lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- lib/CodeGen/CodeGenPrepare.cpp +++ lib/CodeGen/CodeGenPrepare.cpp @@ -4390,6 +4390,20 @@ AddrMode.Scale = 0; } + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + // + // (See below for code to add the scale.) + if (AddrMode.Scale) { + Type *ScaledRegTy = AddrMode.ScaledReg->getType(); + if (cast(IntPtrTy)->getBitWidth() > + cast(ScaledRegTy)->getBitWidth()) + return false; + } + if (AddrMode.BaseGV) { if (ResultPtr) return false; @@ -4438,19 +4452,11 @@ Value *V = AddrMode.ScaledReg; if (V->getType() == IntPtrTy) { // done. - } else if (cast(IntPtrTy)->getBitWidth() < - cast(V->getType())->getBitWidth()) { - V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } else { - // It is only safe to sign extend the BaseReg if we know that the math - // required to create it did not overflow before we extend it. Since - // the original IR value was tossed in favor of a constant back when - // the AddrMode was created we need to bail out gracefully if widths - // do not match instead of extending it. - Instruction *I = dyn_cast_or_null(ResultIndex); - if (I && (ResultIndex != AddrMode.BaseReg)) - I->eraseFromParent(); - return false; + assert(cast(IntPtrTy)->getBitWidth() < + cast(V->getType())->getBitWidth() && + "We can't transform if ScaledReg is too narrow"); + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); } if (AddrMode.Scale != 1) Index: test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll =================================================================== --- test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll +++ test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll @@ -4,6 +4,8 @@ "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-unknown-linux-gnu" +@x = external global [1 x [2 x <4 x float>]] + ; Can we sink single addressing mode computation to use? define void @test1(i1 %cond, i64* %base) { ; CHECK-LABEL: @test1 @@ -194,3 +196,25 @@ declare void @slowpath(i32, i32*) + +; Make sure we don't end up in an infinite loop after we fail to sink. +; CHECK-LABEL: define void @test8 +; CHECK: %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef +define void @test8() { +allocas: + %aFOO_load = load float*, float** undef + %aFOO_load_ptr2int = ptrtoint float* %aFOO_load to i64 + %aFOO_load_ptr2int_broadcast_init = insertelement <4 x i64> undef, i64 %aFOO_load_ptr2int, i32 0 + %aFOO_load_ptr2int_2void = inttoptr i64 %aFOO_load_ptr2int to i8* + %ptr = getelementptr i8, i8* %aFOO_load_ptr2int_2void, i32 undef + br label %load.i145 + +load.i145: + %ptr.i143 = bitcast i8* %ptr to <4 x float>* + %valall.i144 = load <4 x float>, <4 x float>* %ptr.i143, align 4 + %x_offset = getelementptr [1 x [2 x <4 x float>]], [1 x [2 x <4 x float>]]* @x, i32 0, i64 0 + br label %pl_loop.i.i122 + +pl_loop.i.i122: + br label %pl_loop.i.i122 +}