Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -151,6 +151,14 @@ if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. + + // Scan to the end of the allocation instructions, to avoid inserting + // in the middle of a block of allocas. + BasicBlock::iterator It(New); + while (isa(*It)) + ++It; + Builder.SetInsertPoint(&*It); + Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast"); replaceInstUsesWith(AI, NewCast); eraseInstFromFunction(AI); Index: llvm/test/Transforms/InstCombine/alloca.ll =================================================================== --- llvm/test/Transforms/InstCombine/alloca.ll +++ llvm/test/Transforms/InstCombine/alloca.ll @@ -177,3 +177,33 @@ call void (...) @use(i32* nonnull @int) [ "blah"(i32* %y) ] ret void } + +%struct.pluto = type { [4 x float] } +%struct.bar = type { [4 x i32*] } + +; Make sure we put the TMPCAST bitcast after the allocas not between. +define void @test12(<2 x float> %arg) { +; CHECK-LABEL: @test12( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP1:%.*]] = alloca { <2 x float>, <2 x float> }, align 8 +; CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_BAR:%.*]], align 8 +; CHECK-NEXT: [[TMPCAST:%.*]] = bitcast { <2 x float>, <2 x float> }* [[TMP1]] to %struct.pluto* +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* [[TMP1]], i64 0, i32 0 +; CHECK-NEXT: store <2 x float> [[ARG:%.*]], <2 x float>* [[TMP5]], align 8 +; CHECK-NEXT: call void @snork(%struct.bar* nonnull [[TMP]], %struct.pluto* nonnull [[TMPCAST]]) +; CHECK-NEXT: ret void +bb: + %tmp1 = alloca %struct.pluto, align 4 + %tmp = alloca %struct.bar, align 8 + %tmp2 = bitcast %struct.pluto* %tmp1 to i8* + %tmp3 = getelementptr inbounds %struct.pluto, %struct.pluto* %tmp1, i32 0, i32 0 + %tmp4 = bitcast [4 x float]* %tmp3 to { <2 x float>, <2 x float> }* + %tmp5 = getelementptr inbounds { <2 x float>, <2 x float> }, { <2 x float>, <2 x float> }* %tmp4, i32 0, i32 0 + store <2 x float> %arg, <2 x float>* %tmp5, align 4 + call void @snork(%struct.bar* %tmp, %struct.pluto* %tmp1) + ret void +} + +declare { <2 x float>, <2 x float> } @zot(<2 x float>) + +declare void @snork(%struct.bar* sret, %struct.pluto*)