diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp --- a/llvm/lib/IR/ConstantFold.cpp +++ b/llvm/lib/IR/ConstantFold.cpp @@ -792,13 +792,36 @@ if (isa(Val) || isa(Idx)) return UndefValue::get(Val->getType()->getVectorElementType()); - if (ConstantInt *CIdx = dyn_cast(Idx)) { - // ee({w,x,y,z}, wrong_value) -> undef - if (CIdx->uge(Val->getType()->getVectorNumElements())) - return UndefValue::get(Val->getType()->getVectorElementType()); - return Val->getAggregateElement(CIdx->getZExtValue()); + auto *CIdx = dyn_cast(Idx); + if (!CIdx) + return nullptr; + + // ee({w,x,y,z}, wrong_value) -> undef + if (CIdx->uge(Val->getType()->getVectorNumElements())) + return UndefValue::get(Val->getType()->getVectorElementType()); + + // ee (gep (ptr, idx0, ...), idx) -> gep (ee (ptr, idx), ee (idx0, idx), ...) + if (auto *CE = dyn_cast(Val)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + SmallVector Ops; + Ops.reserve(CE->getNumOperands()); + for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { + Constant *Op = CE->getOperand(i); + if (Op->getType()->isVectorTy()) { + Constant *ScalarOp = ConstantFoldExtractElementInstruction(Op, Idx); + if (!ScalarOp) + return nullptr; + Ops.push_back(ScalarOp); + } else + Ops.push_back(Op); + } + return CE->getWithOperands(Ops, CE->getType()->getVectorElementType(), + false, + Ops[0]->getType()->getPointerElementType()); + } } - return nullptr; + + return Val->getAggregateElement(CIdx); } Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val, diff --git a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll --- a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll +++ b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll @@ -9,7 +9,7 @@ define <2 x i16*> @test_gep() { ; CHECK-LABEL: @test_gep( -; CHECK-NEXT: ret <2 x i16*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)> +; CHECK-NEXT: ret <2 x i16*> ; %A = getelementptr [1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer %B = bitcast <2 x %rec8*> %A to <2 x i16*> diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -569,7 +569,7 @@ define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) { ; CHECK-LABEL: @gep_cvbase_w_cv_idx( -; CHECK-NEXT: ret i32* extractelement (<2 x i32*> getelementptr (i32, <2 x i32*> , <2 x i64> ), i32 1) +; CHECK-NEXT: ret i32* getelementptr inbounds (i32, i32* @GLOBAL, i64 1) ; %gep = getelementptr i32, <2 x i32*> , <2 x i64> %ee = extractelement <2 x i32*> %gep, i32 1 diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -27,7 +27,7 @@ ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16*, i16** [[TMP2]], i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16** [[TMP3]] to <2 x i16*>* -; CHECK-NEXT: store <2 x i16*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8 +; CHECK-NEXT: store <2 x i16*> , <2 x i16*>* [[TMP4]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2 ; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0