Index: llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -913,10 +913,20 @@ // Since we're changing the parameter to the callsite, we need to make sure // that what would be the new parameter dominates the callsite. - // TODO: Support moving instructions like GEPs upwards. - if (Instruction *cpyDestInst = dyn_cast(cpyDest)) - if (!DT->dominates(cpyDestInst, C)) + auto DominatesCall = [this, C](Value *V) { + if (auto *I = dyn_cast(V)) + return DT->dominates(I, C); + return true; + }; + if (!DominatesCall(cpyDest)) { + // Support moving a constant index GEP before the call. + auto *GEP = dyn_cast(cpyDest); + if (GEP && GEP->hasAllConstantIndices() && + DominatesCall(GEP->getPointerOperand())) + GEP->moveBefore(C); + else return false; + } // In addition to knowing that the call does not access src in some // unexpected manner, for example via a global, which we deduce from Index: llvm/test/Transforms/MemCpyOpt/callslot.ll =================================================================== --- llvm/test/Transforms/MemCpyOpt/callslot.ll +++ llvm/test/Transforms/MemCpyOpt/callslot.ll @@ -150,9 +150,10 @@ ; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 1 ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 1 ; CHECK-NEXT: [[SRC_I8:%.*]] = bitcast [8 x i8]* [[SRC]] to i8* -; CHECK-NEXT: call void @accept_ptr(i8* [[SRC_I8]]) [[ATTR3]] ; CHECK-NEXT: [[DEST_I8:%.*]] = getelementptr [16 x i8], [16 x i8]* [[DEST]], i64 0, i64 8 -; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[DEST_I8]], i8* [[SRC_I8]], i64 8, i1 false) +; CHECK-NEXT: [[DEST_I81:%.*]] = bitcast i8* [[DEST_I8]] to [8 x i8]* +; CHECK-NEXT: [[DEST_I812:%.*]] = bitcast [8 x i8]* [[DEST_I81]] to i8* +; CHECK-NEXT: call void @accept_ptr(i8* [[DEST_I812]]) [[ATTR3]] ; CHECK-NEXT: ret void ; %dest = alloca [16 x i8]