Index: lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- lib/Transforms/Scalar/Scalarizer.cpp +++ lib/Transforms/Scalar/Scalarizer.cpp @@ -227,10 +227,15 @@ if (!Idx) break; unsigned J = Idx->getZExtValue(); - CV[J] = Insert->getOperand(1); V = Insert->getOperand(0); - if (I == J) + if (I == J) { + CV[J] = Insert->getOperand(1); return CV[J]; + } else if (!CV[J]) { + // Only cache the first entry we find for each index. This prevents us + // going back too far up the chain. + CV[J] = Insert->getOperand(1); + } } CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I), V->getName() + ".i" + Twine(I)); Index: test/Transforms/Scalarizer/cache-bug.ll =================================================================== --- /dev/null +++ test/Transforms/Scalarizer/cache-bug.ll @@ -0,0 +1,48 @@ +; RUN: opt -scalarizer -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + + +; Check that vector element 1 is scalarized correctly from a chain of +; insertelement instructions +define void @func(i32 %x) { +; CHECK-LABEL: @func( +; CHECK: entry: +; CHECK: %vecinit = insertelement <2 x i32> zeroinitializer, i32 %x, i32 1 +; CHECK: %inc = add i32 %x, 1 +; CHECK: %0 = insertelement <2 x i32> %vecinit, i32 %inc, i32 1 +; CHECK: br label %loop +; CHECK: loop: +; CHECK: %pos.i0 = phi i32 [ 0, %entry ], [ %pos.i01, %loop ] +; CHECK: %pos.i1 = phi i32 [ %inc, %entry ], [ %inc.pos.y, %loop ] +; CHECK: %i = phi i32 [ 0, %entry ], [ %new.i, %loop ] +; CHECK: %pos.upto0 = insertelement <2 x i32> undef, i32 %pos.i0, i32 0 +; CHECK: %pos = insertelement <2 x i32> %pos.upto0, i32 %pos.i1, i32 1 +; CHECK: %pos.y = extractelement <2 x i32> %pos, i32 1 +; CHECK: %inc.pos.y = add i32 %pos.y, 1 +; CHECK: %new.pos.y = insertelement <2 x i32> %pos, i32 %inc.pos.y, i32 1 +; CHECK: %pos.i01 = extractelement <2 x i32> %pos, i32 0 +; CHECK: %new.i = add i32 %i, 1 +; CHECK: %cmp2 = icmp slt i32 %new.i, 1 +; CHECK: br i1 %cmp2, label %loop, label %exit +; CHECK: exit: +; CHECK: ret void + +entry: + %vecinit = insertelement <2 x i32> , i32 %x, i32 1 + %inc = add i32 %x, 1 + %0 = insertelement <2 x i32> %vecinit, i32 %inc, i32 1 + br label %loop + +loop: + %pos = phi <2 x i32> [ %0, %entry ], [ %new.pos.y, %loop ] + %i = phi i32 [ 0, %entry ], [ %new.i, %loop ] + %pos.y = extractelement <2 x i32> %pos, i32 1 + %inc.pos.y = add i32 %pos.y, 1 + %new.pos.y = insertelement <2 x i32> %pos, i32 %inc.pos.y, i32 1 + %new.i = add i32 %i, 1 + %cmp2 = icmp slt i32 %new.i, 1 + br i1 %cmp2, label %loop, label %exit + +exit: + ret void +}