Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4779,9 +4779,10 @@ DAG.getMachineFunction()); if (VT == MVT::Other) { + EVT PointerVT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); + TLI.allowsMisalignedMemoryAccesses(PointerVT, DstAS, DstAlign)) { + VT = PointerVT; } else { switch (DstAlign & 7) { case 0: VT = MVT::i64; break; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -12147,12 +12147,6 @@ } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } Index: test/CodeGen/ARM/memcpy-inline.ll =================================================================== --- test/CodeGen/ARM/memcpy-inline.ll +++ test/CodeGen/ARM/memcpy-inline.ll @@ -35,6 +35,8 @@ ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0], [[INC]] ; CHECK: vld1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0] +; CHECK-T1-LABEL: t1: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str1, i64 0, i64 0), i64 31, i32 1, i1 false) ret void } @@ -51,6 +53,8 @@ ; CHECK: str [[REG2]], [r0] ; CHECK: vld1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r3] +; CHECK-T1-LABEL: t2: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str2, i64 0, i64 0), i64 36, i32 1, i1 false) ret void } @@ -62,6 +66,8 @@ ; CHECK: vst1.8 {d{{[0-9]+}}, d{{[0-9]+}}}, [r0]! ; CHECK: vldr d{{[0-9]+}}, [r1] ; CHECK: vst1.8 {d{{[0-9]+}}}, [r0] +; CHECK-T1-LABEL: t3: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str3, i64 0, i64 0), i64 24, i32 1, i1 false) ret void } @@ -72,6 +78,8 @@ ; CHECK: vld1.64 {[[REG3:d[0-9]+]], [[REG4:d[0-9]+]]}, [r1] ; CHECK: vst1.8 {[[REG3]], [[REG4]]}, [r0]! ; CHECK: strh [[REG5:r[0-9]+]], [r0] +; CHECK-T1-LABEL: t4: +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str4, i64 0, i64 0), i64 18, i32 1, i1 false) ret void } @@ -87,10 +95,7 @@ ; CHECK: movt [[REG7:r[0-9]+]], #22866 ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: -; CHECK-T1: movs [[TREG3:r[0-9]]], -; CHECK-T1: strb [[TREG3]], -; CHECK-T1: movs [[TREG4:r[0-9]]], -; CHECK-T1: strb [[TREG4]], +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) ret void } Index: test/CodeGen/ARM/memset-inline.ll =================================================================== --- test/CodeGen/ARM/memset-inline.ll +++ test/CodeGen/ARM/memset-inline.ll @@ -1,22 +1,36 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -pre-RA-sched=source -disable-post-ra | FileCheck %s -check-prefix=CHECK-7A +; RUN: llc < %s -mtriple=thumbv6m -pre-RA-sched=source -disable-post-ra -mattr=+strict-align | FileCheck %s -check-prefix=CHECK-6M define void @t1(i8* nocapture %c) nounwind optsize { entry: -; CHECK-LABEL: t1: -; CHECK: movs r1, #0 -; CHECK: strd r1, r1, [r0] -; CHECK: str r1, [r0, #8] +; CHECK-7A-LABEL: t1: +; CHECK-7A: movs r1, #0 +; CHECK-7A: strd r1, r1, [r0] +; CHECK-7A: str r1, [r0, #8] +; CHECK-6M-LABEL: t1: +; CHECK-6M: movs r1, #0 +; CHECK-6M: str r1, [r0] +; CHECK-6M: str r1, [r0, #4] +; CHECK-6M: str r1, [r0, #8] call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false) ret void } define void @t2() nounwind ssp { entry: -; CHECK-LABEL: t2: -; CHECK: vmov.i32 {{q[0-9]+}}, #0x0 -; CHECK: movs r1, #10 -; CHECK: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1 -; CHECK: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2] +; CHECK-7A-LABEL: t2: +; CHECK-7A: vmov.i32 {{q[0-9]+}}, #0x0 +; CHECK-7A: movs r1, #10 +; CHECK-7A: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2], r1 +; CHECK-7A: vst1.16 {d{{[0-9]+}}, d{{[0-9]+}}}, [r2] +; CHECK-6M-LABEL: t2: +; CHECK-6M: movs [[REG:r[0-9]+]], #0 +; CHECK-6M: str [[REG]], [sp, #20] +; CHECK-6M: str [[REG]], [sp, #16] +; CHECK-6M: str [[REG]], [sp, #12] +; CHECK-6M: str [[REG]], [sp, #8] +; CHECK-6M: str [[REG]], [sp, #4] +; CHECK-6M: str [[REG]], [sp] %buf = alloca [26 x i8], align 1 %0 = getelementptr inbounds [26 x i8], [26 x i8]* %buf, i32 0, i32 0 call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 26, i32 1, i1 false) @@ -24,6 +38,31 @@ ret void } +define void @t3(i8* %p) { +entry: +; CHECK-7A-LABEL: t3: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t3: +; CHECK-6M: strb [[REG:r[0-9]+]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + declare void @something(i8*) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind