Index: llvm/trunk/lib/Transforms/Scalar/Scalarizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/Scalarizer.cpp +++ llvm/trunk/lib/Transforms/Scalar/Scalarizer.cpp @@ -520,12 +520,25 @@ unsigned NumElems = VT->getNumElements(); unsigned NumIndices = GEPI.getNumIndices(); - Scatterer Base = scatter(&GEPI, GEPI.getOperand(0)); + // The base pointer might be scalar even if it's a vector GEP. In those cases, + // splat the pointer into a vector value, and scatter that vector. + Value *Op0 = GEPI.getOperand(0); + if (!Op0->getType()->isVectorTy()) + Op0 = Builder.CreateVectorSplat(NumElems, Op0); + Scatterer Base = scatter(&GEPI, Op0); SmallVector Ops; Ops.resize(NumIndices); - for (unsigned I = 0; I < NumIndices; ++I) - Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1)); + for (unsigned I = 0; I < NumIndices; ++I) { + Value *Op = GEPI.getOperand(I + 1); + + // The indices might be scalars even if it's a vector GEP. In those cases, + // splat the scalar into a vector value, and scatter that vector. + if (!Op->getType()->isVectorTy()) + Op = Builder.CreateVectorSplat(NumElems, Op); + + Ops[I] = scatter(&GEPI, Op); + } ValueVector Res; Res.resize(NumElems); Index: llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll =================================================================== --- llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll +++ llvm/trunk/test/Transforms/Scalarizer/vector-gep.ll @@ -0,0 +1,122 @@ +; RUN: opt -S -scalarizer %s | FileCheck %s + +; Check that the scalarizer can handle vector GEPs with scalar indices + +@vec = global <4 x i16*> +@index = global i16 1 +@ptr = global [4 x i16] [i16 1, i16 2, i16 3, i16 4] +@ptrptr = global i16* null + +; constant index +define void @test1() { +bb: + %0 = load <4 x i16*>, <4 x i16*>* @vec + %1 = getelementptr i16, <4 x i16*> %0, i16 1 + + ret void +} + +;CHECK-LABEL: @test1 +;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0 +;CHECK: getelementptr i16, i16* %[[I0]], i16 1 +;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1 +;CHECK: getelementptr i16, i16* %[[I1]], i16 1 +;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2 +;CHECK: getelementptr i16, i16* %[[I2]], i16 1 +;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3 +;CHECK: getelementptr i16, i16* %[[I3]], i16 1 + +; non-constant index +define void @test2() { +bb: + %0 = load <4 x i16*>, <4 x i16*>* @vec + %index = load i16, i16* @index + %1 = getelementptr i16, <4 x i16*> %0, i16 %index + + ret void +} + +;CHECK-LABEL: @test2 +;CHECK: %0 = load <4 x i16*>, <4 x i16*>* @vec +;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0 +;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1 +;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2 +;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3 +;CHECK: %index = load i16, i16* @index +;CHECK: %.splatinsert = insertelement <4 x i16> undef, i16 %index, i32 0 +;CHECK: %.splat = shufflevector <4 x i16> %.splatinsert, <4 x i16> undef, <4 x i32> zeroinitializer +;CHECK: %.splat[[I0]] = extractelement <4 x i16> %.splat, i32 0 +;CHECK: getelementptr i16, i16* %[[I0]], i16 %.splat[[I0]] +;CHECK: %.splat[[I1]] = extractelement <4 x i16> %.splat, i32 1 +;CHECK: getelementptr i16, i16* %[[I1]], i16 %.splat[[I1]] +;CHECK: %.splat[[I2]] = extractelement <4 x i16> %.splat, i32 2 +;CHECK: getelementptr i16, i16* %[[I2]], i16 %.splat[[I2]] +;CHECK: %.splat[[I3]] = extractelement <4 x i16> %.splat, i32 3 +;CHECK: getelementptr i16, i16* %[[I3]], i16 %.splat[[I3]] + + +; Check that the scalarizer can handle vector GEPs with scalar pointer + +; constant pointer +define void @test3() { +bb: + %0 = bitcast [4 x i16]* @ptr to i16* + %1 = getelementptr i16, i16* %0, <4 x i16> + + ret void +} + +;CHECK-LABEL: @test3 +;CHECK: %0 = bitcast [4 x i16]* @ptr to i16* +;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0 +;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer +;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0 +;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0 +;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1 +;CHECK: getelementptr i16, i16* %.splat[[I1]], i16 1 +;CHECK: %.splat[[I2:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 2 +;CHECK: getelementptr i16, i16* %.splat[[I2]], i16 2 +;CHECK: %.splat[[I3:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 3 +;CHECK: getelementptr i16, i16* %.splat[[I3]], i16 3 + +; non-constant pointer +define void @test4() { +bb: + %0 = load i16*, i16** @ptrptr + %1 = getelementptr i16, i16* %0, <4 x i16> + + ret void +} + +;CHECK-LABEL: @test4 +;CHECK: %0 = load i16*, i16** @ptrptr +;CHECK: %.splatinsert = insertelement <4 x i16*> undef, i16* %0, i32 0 +;CHECK: %.splat = shufflevector <4 x i16*> %.splatinsert, <4 x i16*> undef, <4 x i32> zeroinitializer +;CHECK: %.splat[[I0:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 0 +;CHECK: getelementptr i16, i16* %.splat[[I0]], i16 0 +;CHECK: %.splat[[I1:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 1 +;CHECK: getelementptr i16, i16* %.splat[[I1]], i16 1 +;CHECK: %.splat[[I2:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 2 +;CHECK: getelementptr i16, i16* %.splat[[I2]], i16 2 +;CHECK: %.splat[[I3:.i[0-9]*]] = extractelement <4 x i16*> %.splat, i32 3 +;CHECK: getelementptr i16, i16* %.splat[[I3]], i16 3 + +; constant index, inbounds +define void @test5() { +bb: + %0 = load <4 x i16*>, <4 x i16*>* @vec + %1 = getelementptr inbounds i16, <4 x i16*> %0, i16 1 + + ret void +} + +;CHECK-LABEL: @test5 +;CHECK: %[[I0:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 0 +;CHECK: getelementptr inbounds i16, i16* %[[I0]], i16 1 +;CHECK: %[[I1:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 1 +;CHECK: getelementptr inbounds i16, i16* %[[I1]], i16 1 +;CHECK: %[[I2:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 2 +;CHECK: getelementptr inbounds i16, i16* %[[I2]], i16 1 +;CHECK: %[[I3:.i[0-9]*]] = extractelement <4 x i16*> %0, i32 3 +;CHECK: getelementptr inbounds i16, i16* %[[I3]], i16 1 +