Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -10448,11 +10448,6 @@ if (isa(U->getOperand(0)) || isa(U->getOperand(1))) return SDValue(); - - // Check that its not already using a shift. - if (U->getOperand(0).getOpcode() == ISD::SHL || - U->getOperand(1).getOpcode() == ISD::SHL) - return SDValue(); break; } } Index: test/CodeGen/ARM/load-combine-big-endian.ll =================================================================== --- test/CodeGen/ARM/load-combine-big-endian.ll +++ test/CodeGen/ARM/load-combine-big-endian.ll @@ -38,11 +38,13 @@ ; BSWAP is not supported by 32 bit target ; CHECK-LABEL: load_i32_by_i8_bswap: ; CHECK: ldr r0, [r0] -; CHECK: and -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: orr +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: and r2, r0, #65280 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 +; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap: @@ -159,17 +161,19 @@ ; CHECK-LABEL: load_i64_by_i8_bswap: ; CHECK: ldr{{.*}}r0 ; CHECK: ldr{{.*}}r0 -; CHECK: and -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK: mov pc, lr +; CHECK-NEXT: and r2, r0, #65280 +; CHECK-NEXT: and r3, r12, r0, lsr #8 +; CHECK-NEXT: orr r3, r3, r0, lsr #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 +; CHECK-NEXT: and r2, r12, r1, lsr #8 +; CHECK-NEXT: orr r0, r0, r3 +; CHECK-NEXT: and r3, r1, #65280 +; CHECK-NEXT: orr r2, r2, r1, lsr #24 +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r1, r1, r3, lsl #8 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: mov pc, lr ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: ; CHECK-ARMv6: ldrd r2, r3, [r0] @@ -276,11 +280,11 @@ ; CHECK-LABEL: load_i32_by_i8_nonzero_offset: ; CHECK: ldr r0, [r0, #1] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr @@ -317,11 +321,11 @@ ; CHECK-LABEL: load_i32_by_i8_neg_offset: ; CHECK: ldr r0, [r0, #-4] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr @@ -426,11 +430,11 @@ ; CHECK-LABEL: load_i32_by_bswap_i16: ; CHECK: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr @@ -480,12 +484,12 @@ ; CHECK-LABEL: load_i32_by_i8_base_offset_index: ; CHECK: add r0, r0, r1 ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 ; CHECK-NEXT: ldr r0, [r0, #12] +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; @@ -530,12 +534,12 @@ ; CHECK-LABEL: load_i32_by_i8_base_offset_index_2: ; CHECK: add r0, r0, r1 ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 ; CHECK-NEXT: ldr r0, [r0, #13] +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr ; Index: test/CodeGen/ARM/load-combine.ll =================================================================== --- test/CodeGen/ARM/load-combine.ll +++ test/CodeGen/ARM/load-combine.ll @@ -79,11 +79,13 @@ ; BSWAP is not supported by 32 bit target ; CHECK-LABEL: load_i32_by_i8_bswap: ; CHECK: ldr r0, [r0] -; CHECK: and -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: orr +; CHECK-NEXT: mov r1, #65280 +; CHECK-NEXT: and r2, r0, #65280 +; CHECK-NEXT: and r1, r1, r0, lsr #8 +; CHECK-NEXT: orr r1, r1, r0, lsr #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 +; CHECK-NEXT: orr r0, r0, r1 ; CHECK: mov pc, lr ; CHECK-ARMv6-LABEL: load_i32_by_i8_bswap: @@ -171,16 +173,18 @@ ; CHECK: ldr{{.*}}r0 ; CHECK: ldr{{.*}}r0 ; CHECK: and -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: and -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK-NEXT: orr -; CHECK: mov pc, lr +; CHECK-NEXT: and r3, r12, r0, lsr #8 +; CHECK-NEXT: orr r3, r3, r0, lsr #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 +; CHECK-NEXT: and r2, r12, r1, lsr #8 +; CHECK-NEXT: orr r0, r0, r3 +; CHECK-NEXT: and r3, r1, #65280 +; CHECK-NEXT: orr r2, r2, r1, lsr #24 +; CHECK-NEXT: lsl r1, r1, #24 +; CHECK-NEXT: orr r1, r1, r3, lsl #8 +; CHECK-NEXT: orr r1, r1, r2 +; CHECK-NEXT: mov pc, lr ; CHECK-ARMv6-LABEL: load_i64_by_i8_bswap: ; CHECK-ARMv6: ldrd r2, r3, [r0] @@ -300,11 +304,11 @@ ; CHECK-LABEL: load_i32_by_i8_nonzero_offset_bswap: ; CHECK: ldr r0, [r0, #1] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr @@ -341,11 +345,11 @@ ; CHECK-LABEL: load_i32_by_i8_neg_offset_bswap: ; CHECK: ldr r0, [r0, #-4] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr @@ -384,11 +388,11 @@ ; CHECK-LABEL: load_i32_by_bswap_i16: ; CHECK: ldr r0, [r0] ; CHECK-NEXT: mov r1, #65280 -; CHECK-NEXT: mov r2, #16711680 +; CHECK-NEXT: and r2, r0, #65280 ; CHECK-NEXT: and r1, r1, r0, lsr #8 -; CHECK-NEXT: and r2, r2, r0, lsl #8 ; CHECK-NEXT: orr r1, r1, r0, lsr #24 -; CHECK-NEXT: orr r0, r2, r0, lsl #24 +; CHECK-NEXT: lsl r0, r0, #24 +; CHECK-NEXT: orr r0, r0, r2, lsl #8 ; CHECK-NEXT: orr r0, r0, r1 ; CHECK-NEXT: mov pc, lr Index: test/CodeGen/ARM/unfold-shifts.ll =================================================================== --- test/CodeGen/ARM/unfold-shifts.ll +++ test/CodeGen/ARM/unfold-shifts.ll @@ -1,15 +1,14 @@ -; RUN: llc -mtriple armv6t2 %s -o - | FileCheck %s -; RUN: llc -mtriple thumbv6t2 %s -o - | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc -mtriple armv7 %s -o - | FileCheck %s -; RUN: llc -mtriple thumbv7 %s -o - | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc -mtriple thumbv7m %s -o - | FileCheck %s --check-prefix=CHECK-T2 -; RUN: llc -mtriple thumbv8m.main %s -o - | FileCheck %s --check-prefix=CHECK-T2 +; RUN: llc -mtriple armv6t2 %s -o - | FileCheck %s --check-prefix=CHECK-A32 --check-prefix=CHECK +; RUN: llc -mtriple thumbv6t2 %s -o - | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK +; RUN: llc -mtriple armv7 %s -o - | FileCheck %s --check-prefix=CHECK-A32 --check-prefix=CHECK +; RUN: llc -mtriple thumbv7 %s -o - | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK +; RUN: llc -mtriple thumbv7m %s -o - | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK +; RUN: llc -mtriple thumbv8m.main %s -o - | FileCheck %s --check-prefix=CHECK-T2 --check-prefix=CHECK ; CHECK-LABEL: unfold1 ; CHECK-NOT: mov -; CHECK: orr r0, r0, #255 -; CHECK: add r0, r1, r0, lsl #1 -; CHECK-T2-NOT: mov +; CHECK-A32: orr r0, r0, #255 +; CHECK-A32: add r0, r1, r0, lsl #1 ; CHECK-T2: orr r0, r0, #255 ; CHECK-T2: add.w r0, r1, r0, lsl #1 define arm_aapcscc i32 @unfold1(i32 %a, i32 %b) { @@ -22,9 +21,8 @@ ; CHECK-LABEL: unfold2 ; CHECK-NOT: mov -; CHECK: orr r0, r0, #4080 -; CHECK: sub r0, r1, r0, lsl #2 -; CHECK-T2-NOT: mov +; CHECK-A32: orr r0, r0, #4080 +; CHECK-A32: sub r0, r1, r0, lsl #2 ; CHECK-T2: orr r0, r0, #4080 ; CHECK-T2: sub.w r0, r1, r0, lsl #2 define arm_aapcscc i32 @unfold2(i32 %a, i32 %b) { @@ -37,9 +35,8 @@ ; CHECK-LABEL: unfold3 ; CHECK-NOT: mov -; CHECK: orr r0, r0, #65280 -; CHECK: and r0, r1, r0, lsl #4 -; CHECK-T2-NOT: mov +; CHECK-A32: orr r0, r0, #65280 +; CHECK-A32: and r0, r1, r0, lsl #4 ; CHECK-T2: orr r0, r0, #65280 ; CHECK-T2: and.w r0, r1, r0, lsl #4 define arm_aapcscc i32 @unfold3(i32 %a, i32 %b) { @@ -52,9 +49,8 @@ ; CHECK-LABEL: unfold4 ; CHECK-NOT: mov -; CHECK: orr r0, r0, #1044480 -; CHECK: eor r0, r1, r0, lsl #5 -; CHECK-T2-NOT: mov +; CHECK-A32: orr r0, r0, #1044480 +; CHECK-A32: eor r0, r1, r0, lsl #5 ; CHECK-T2: orr r0, r0, #1044480 ; CHECK-T2: eor.w r0, r1, r0, lsl #5 define arm_aapcscc i32 @unfold4(i32 %a, i32 %b) { @@ -67,8 +63,8 @@ ; CHECK-LABEL: unfold5 ; CHECK-NOT: mov -; CHECK: add r0, r0, #496 -; CHECK: orr r0, r1, r0, lsl #6 +; CHECK-A32: add r0, r0, #496 +; CHECK-A32: orr r0, r1, r0, lsl #6 ; CHECK-T2: add.w r0, r0, #496 ; CHECK-T2: orr.w r0, r1, r0, lsl #6 define arm_aapcscc i32 @unfold5(i32 %a, i32 %b) { @@ -81,9 +77,8 @@ ; CHECK-LABEL: unfold6 ; CHECK-NOT: mov -; CHECK: add r0, r0, #7936 -; CHECK: and r0, r1, r0, lsl #8 -; CHECK-T2-NOT: mov +; CHECK-A32: add r0, r0, #7936 +; CHECK-A32: and r0, r1, r0, lsl #8 ; CHECK-T2: add.w r0, r0, #7936 ; CHECK-T2: and.w r0, r1, r0, lsl #8 define arm_aapcscc i32 @unfold6(i32 %a, i32 %b) { @@ -96,9 +91,8 @@ ; CHECK-LABEL: unfold7 ; CHECK-NOT: mov -; CHECK: and r0, r0, #256 -; CHECK: add r0, r1, r0, lsl #1 -; CHECK-T2-NOT: mov +; CHECK-A32: and r0, r0, #256 +; CHECK-A32: add r0, r1, r0, lsl #1 ; CHECK-T2: and r0, r0, #256 ; CHECK-T2: add.w r0, r1, r0, lsl #1 define arm_aapcscc i32 @unfold7(i32 %a, i32 %b) { @@ -111,9 +105,8 @@ ; CHECK-LABEL: unfold8 ; CHECK-NOT: mov -; CHECK: add r0, r0, #126976 -; CHECK: eor r0, r1, r0, lsl #9 -; CHECK-T2-NOT: mov +; CHECK-A32: add r0, r0, #126976 +; CHECK-A32: eor r0, r1, r0, lsl #9 ; CHECK-T2: add.w r0, r0, #126976 ; CHECK-T2: eor.w r0, r1, r0, lsl #9 define arm_aapcscc i32 @unfold8(i32 %a, i32 %b) { @@ -126,9 +119,8 @@ ; CHECK-LABEL: unfold9 ; CHECK-NOT: mov -; CHECK: eor r0, r0, #255 -; CHECK: add r0, r1, r0, lsl #1 -; CHECK-T2-NOT: mov +; CHECK-A32: eor r0, r0, #255 +; CHECK-A32: add r0, r1, r0, lsl #1 ; CHECK-T2: eor r0, r0, #255 ; CHECK-T2: add.w r0, r1, r0, lsl #1 define arm_aapcscc i32 @unfold9(i32 %a, i32 %b) { @@ -141,9 +133,8 @@ ; CHECK-LABEL: unfold10 ; CHECK-NOT: mov r2 -; CHECK: orr r2, r0, #4080 -; CHECK: cmp r1, r2, lsl #10 -; CHECK-T2-NOT: mov.w r2 +; CHECK-A32: orr r2, r0, #4080 +; CHECK-A32: cmp r1, r2, lsl #10 ; CHECK-T2: orr r2, r0, #4080 ; CHECK-T2: cmp.w r1, r2, lsl #10 define arm_aapcscc i32 @unfold10(i32 %a, i32 %b) { @@ -157,9 +148,8 @@ ; CHECK-LABEL: unfold11 ; CHECK-NOT: mov r2 -; CHECK: add r2, r0, #7936 -; CHECK: cmp r1, r2, lsl #11 -; CHECK-T2-NOT: mov.w r2 +; CHECK-A32: add r2, r0, #7936 +; CHECK-A32: cmp r1, r2, lsl #11 ; CHECK-T2: add.w r2, r0, #7936 ; CHECK-T2: cmp.w r1, r2, lsl #11 define arm_aapcscc i32 @unfold11(i32 %a, i32 %b) { @@ -171,3 +161,53 @@ ret i32 %conv } +; CHECK-LABEL: using_shl +; CHECK-NOT: mov +; CHECK-A32: orr r0, r0, #255 +; CHECK-A32: lsl r0, r0, #1 +; CHECK-A32: add r0, r0, r1, lsl r2 +; CHECK-T2: orr r0, r0, #255 +; CHECK-T2: lsls r1, r2 +; CHECK-T2: add.w r0, r1, r0, lsl #1 +define arm_aapcscc i32 @using_shl(i32 %a, i32 %b, i32 %c) { +entry: + %lsl0 = shl i32 %a, 1 + %lsl1 = shl i32 %b, %c + %or = or i32 %lsl0, 510 + %add = add nsw i32 %or, %lsl1 + ret i32 %add +} + +; CHECK-LABEL: using_srl +; CHECK-NOT: mov +; CHECK-A32: add r0, r0, #255 +; CHECK-A32: lsl r0, r0, #1 +; CHECK-A32: orr r0, r0, r1, lsr r2 +; CHECK-T2: adds r0, #255 +; CHECK-T2: lsrs r1, r2 +; CHECK-T2: orr.w r0, r1, r0, lsl #1 +define arm_aapcscc i32 @using_srl(i32 %a, i32 %b, i32 %c) { +entry: + %lsl = shl i32 %a, 1 + %lsr = lshr i32 %b, %c + %add = add nsw i32 %lsl, 510 + %or = or i32 %add, %lsr + ret i32 %or +} + +; CHECK-LABEL: using_sra +; CHECK-NOT: mov +; CHECK-A32: orr r0, r0, #255 +; CHECK-A32: lsl r0, r0, #1 +; CHECK-A32: and r0, r0, r1, asr r2 +; CHECK-T2: orr r0, r0, #255 +; CHECK-T2: asrs r1, r2 +; CHECK-T2: and.w r0, r1, r0, lsl #1 +define arm_aapcscc i32 @using_sra(i32 %a, i32 %b, i32 %c) { +entry: + %lsl = shl i32 %a, 1 + %asr = ashr i32 %b, %c + %or = or i32 %lsl, 510 + %and = and i32 %or, %asr + ret i32 %and +}