diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -178,9 +178,11 @@ unsigned DstAddrSp = cast(MI->getArgOperand(0)->getType())->getAddressSpace(); - IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); - Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); - Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp); + auto *AI = dyn_cast(MI->getSource()); + auto *SrcTy = AI ? AI->getAllocatedType() + : IntegerType::get(MI->getContext(), Size << 3); + Type *NewSrcPtrTy = PointerType::get(SrcTy, SrcAddrSp); + Type *NewDstPtrTy = PointerType::get(SrcTy, DstAddrSp); // If the memcpy has metadata describing the members, see if we can get the // TBAA tag describing our copy. @@ -201,7 +203,7 @@ Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); - LoadInst *L = Builder.CreateLoad(IntType, Src); + LoadInst *L = Builder.CreateLoad(SrcTy, Src); // Alignment from the mem intrinsic will be better, so use it. L->setAlignment(*CopySrcAlign); if (CopyMD) diff --git a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll --- a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll +++ b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll @@ -71,4 +71,18 @@ ret void } +define void @test7(ptr %dest) { +; CHECK-LABEL: @test7( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca half, align 2, addrspace(5) +; CHECK-NEXT: [[ALLOCA_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr +; CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[ALLOCA_ASCAST]], align 2 +; CHECK-NEXT: store half [[TMP1]], ptr [[DEST:%.*]], align 1 +; CHECK-NEXT: ret void +; + %alloca = alloca half, align 2, addrspace(5) + %alloca.ascast = addrspacecast ptr addrspace(5) %alloca to ptr + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr align 2 %alloca.ascast, i64 2, i1 false) + ret void +} + declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll --- a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll @@ -25,30 +25,33 @@ ; CHECK-NEXT: [[I9:%.*]] = sdiv i32 [[ARG:%.*]], 128 ; CHECK-NEXT: [[I10:%.*]] = shl nsw i32 [[I9]], 7 ; CHECK-NEXT: [[ARG_OFF:%.*]] = add i32 [[ARG]], 127 -; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255 -; CHECK-NEXT: br i1 [[TMP0]], label [[BB12:%.*]], label [[BB13:%.*]] +; CHECK-NEXT: [[I11_NOT9:%.*]] = icmp ult i32 [[ARG_OFF]], 255 +; CHECK-NEXT: br i1 [[I11_NOT9]], label [[BB12:%.*]], label [[BB13:%.*]] ; CHECK: bb12.loopexit: -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 [[I3_SROA_8_0_INSERT_EXT]], 32 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64 -; CHECK-NEXT: [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or i64 [[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64 [[I3_SROA_0_4_INSERT_INSERT32:%.*]] to <2 x i32> ; CHECK-NEXT: br label [[BB12]] ; CHECK: bb12: -; CHECK-NEXT: [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], [[BB12_LOOPEXIT:%.*]] ], [ 180388626456, [[BB:%.*]] ] -; CHECK-NEXT: store i64 [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA5:![0-9]+]] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ [[TMP0]], [[BB12_LOOPEXIT:%.*]] ], [ , [[BB:%.*]] ] +; CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[ARG1:%.*]], align 4, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: ret void ; CHECK: bb13: -; CHECK-NEXT: [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], [[BB13]] ], [ 42, [[BB]] ] -; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], [[BB13]] ], [ 24, [[BB]] ] -; CHECK-NEXT: [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], [[BB13]] ], [ 0, [[BB]] ] -; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]] -; CHECK-NEXT: [[I24:%.*]] = or i32 [[I4_05]], 1 -; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]] -; CHECK-NEXT: [[I24_1:%.*]] = or i32 [[I4_05]], 2 -; CHECK-NEXT: [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]] -; CHECK-NEXT: [[I24_2:%.*]] = or i32 [[I4_05]], 3 -; CHECK-NEXT: [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]] -; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_05]], 4 +; CHECK-NEXT: [[I3_SROA_0_0:%.*]] = phi i64 [ [[I3_SROA_0_4_INSERT_INSERT32]], [[BB13]] ], [ 180388626456, [[BB]] ] +; CHECK-NEXT: [[I4_010:%.*]] = phi i32 [ [[I24_3:%.*]], [[BB13]] ], [ 0, [[BB]] ] +; CHECK-NEXT: [[I3_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[I3_SROA_0_0]] to i32 +; CHECK-NEXT: [[I21:%.*]] = mul nsw i32 [[I4_010]], [[I3_SROA_0_0_EXTRACT_TRUNC]] +; CHECK-NEXT: [[I24:%.*]] = or i32 [[I4_010]], 1 +; CHECK-NEXT: [[I3_SROA_0_4_EXTRACT_SHIFT:%.*]] = lshr i64 [[I3_SROA_0_0]], 32 +; CHECK-NEXT: [[I3_SROA_0_4_EXTRACT_TRUNC:%.*]] = trunc i64 [[I3_SROA_0_4_EXTRACT_SHIFT]] to i32 +; CHECK-NEXT: [[I21_1:%.*]] = mul nsw i32 [[I24]], [[I3_SROA_0_4_EXTRACT_TRUNC]] +; CHECK-NEXT: [[I24_1:%.*]] = or i32 [[I4_010]], 2 +; CHECK-NEXT: [[I21_2:%.*]] = mul nsw i32 [[I21]], [[I24_1]] +; CHECK-NEXT: [[I3_SROA_0_0_INSERT_EXT20:%.*]] = zext i32 [[I21_2]] to i64 +; CHECK-NEXT: [[I24_2:%.*]] = or i32 [[I4_010]], 3 +; CHECK-NEXT: [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]] +; CHECK-NEXT: [[I3_SROA_0_4_INSERT_EXT29:%.*]] = zext i32 [[I21_3]] to i64 +; CHECK-NEXT: [[I3_SROA_0_4_INSERT_SHIFT30:%.*]] = shl nuw i64 [[I3_SROA_0_4_INSERT_EXT29]], 32 +; CHECK-NEXT: [[I3_SROA_0_4_INSERT_INSERT32]] = or i64 [[I3_SROA_0_4_INSERT_SHIFT30]], [[I3_SROA_0_0_INSERT_EXT20]] +; CHECK-NEXT: [[I24_3]] = add nuw nsw i32 [[I4_010]], 4 ; CHECK-NEXT: [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]] ; CHECK-NEXT: br i1 [[I11_NOT_3]], label [[BB12_LOOPEXIT]], label [[BB13]], !llvm.loop [[LOOP8:![0-9]+]] ; diff --git a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll --- a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll +++ b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll @@ -6,9 +6,14 @@ define void @swap(ptr %p1, ptr %p2) { ; CHECK-LABEL: @swap( ; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 1 +; CHECK-NEXT: [[TMP_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i64 [[TMP1]], 32 +; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i64 [[TMP_SROA_2_0_EXTRACT_SHIFT]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[P2:%.*]], align 1 ; CHECK-NEXT: store i64 [[TMP2]], ptr [[P1]], align 1 -; CHECK-NEXT: store i64 [[TMP1]], ptr [[P2]], align 1 +; CHECK-NEXT: store i32 [[TMP_SROA_0_0_EXTRACT_TRUNC]], ptr [[P2]], align 1 +; CHECK-NEXT: [[P2_REPACK3:%.*]] = getelementptr inbounds [2 x i32], ptr [[P2]], i64 0, i64 1 +; CHECK-NEXT: store i32 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[P2_REPACK3]], align 1 ; CHECK-NEXT: ret void ; %tmp = alloca [2 x i32]