Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4779,23 +4779,23 @@ DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); - } else { - switch (DstAlign & 7) { - case 0: VT = MVT::i64; break; - case 4: VT = MVT::i32; break; - case 2: VT = MVT::i16; break; - default: VT = MVT::i8; break; - } - } + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + // Find the largest legal integer type. MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. if (VT.bitsGT(LVT)) VT = LVT; } Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -12147,12 +12147,6 @@ } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } Index: llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll +++ llvm/trunk/test/CodeGen/ARM/memcpy-inline.ll @@ -95,10 +95,7 @@ ; CHECK: movt [[REG7:r[0-9]+]], #22866 ; CHECK: str [[REG7]] ; CHECK-T1-LABEL: t5: -; CHECK-T1: movs [[TREG3:r[0-9]]], -; CHECK-T1: strb [[TREG3]], -; CHECK-T1: movs [[TREG4:r[0-9]]], -; CHECK-T1: strb [[TREG4]], +; CHECK-T1: bl _memcpy tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %C, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str5, i64 0, i64 0), i64 7, i32 1, i1 false) ret void } Index: llvm/trunk/test/CodeGen/ARM/memset-inline.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/memset-inline.ll +++ llvm/trunk/test/CodeGen/ARM/memset-inline.ll @@ -38,6 +38,56 @@ ret void } +define void @t3(i8* %p) { +entry: +; CHECK-7A-LABEL: t3: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t3: +; CHECK-6M-NOT: muls +; CHECK-6M: strb [[REG:r[0-9]+]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], +; CHECK-6M: strb [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 1, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + +define void @t4(i8* %p) { +entry: +; CHECK-7A-LABEL: t4: +; CHECK-7A: muls [[REG:r[0-9]+]], +; CHECK-7A: str [[REG]], +; CHECK-6M-LABEL: t4: +; CHECK-6M: muls [[REG:r[0-9]+]], +; CHECK-6M: strh [[REG]], +; CHECK-6M: strh [[REG]], + br label %for.body + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %0 = trunc i32 %i to i8 + call void @llvm.memset.p0i8.i32(i8* %p, i8 %0, i32 4, i32 2, i1 false) + call void @something(i8* %p) + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp eq i32 %inc, 255 + br i1 %exitcond, label %for.end, label %for.body + +for.end: + ret void +} + declare void @something(i8*) nounwind declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind