Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3788,6 +3788,16 @@ if (LoadN->getNumValues() > 2) return false; + // Only allow byte offsets. + if (ShAmt % 8) + return false; + + // Ensure that this isn't going to produce an unsupported unaligned access. + if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + ExtVT, LoadN->getAddressSpace(), + ShAmt / 8)) + return false; + // If the load that we're shrinking is an extload and we're not just // discarding the extension we can't simply shrink the load. Bail. // TODO: It would be possible to merge the extensions in some cases. @@ -8274,6 +8284,19 @@ // then the result of the shift+trunc is zero/undef (handled elsewhere). if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(N->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast(Mask->getOperand(1))->getAPIntValue(); + // Recompute the type. + if (ShiftMask.isMask() && (ShiftMask.countTrailingOnes() % 8 == 0)) + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + } } } Index: test/CodeGen/ARM/shift-combine.ll =================================================================== --- test/CodeGen/ARM/shift-combine.ll +++ test/CodeGen/ARM/shift-combine.ll @@ -217,10 +217,23 @@ ret i32 %conv } -; CHECK-LABEL: test_shift8_mask8 +; CHECK-LABEL: test_shift7_mask8 ; CHECK-BE: ldr r1, [r0] ; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #8 +; CHECK-COMMON: ubfx r1, r1, #7, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift7_mask8(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 7 + %and = and i32 %shl, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask8 +; CHECK-BE: ldrb r1, [r0, #2] +; CHECK-COMMON: ldrb r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) { entry: @@ -231,10 +244,40 @@ ret void } -; CHECK-LABEL: test_shift8_mask16 +; CHECK-LABEL: test_shift8_mask7 ; CHECK-BE: ldr r1, [r0] ; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #16 +; CHECK-COMMON: ubfx r1, r1, #8, #7 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift8_mask7(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 8 + %and = and i32 %shl, 127 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift9_mask8 +; CHECK-BE: ldr r1, [r0] +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #9, #8 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift9_mask8(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 9 + %and = and i32 %shl, 255 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift8_mask16 +; CHECK-ALIGN: ldr r1, [r0] +; CHECK-ALIGN: ubfx r1, r1, #8, #16 +; CHECK-BE: ldrh r1, [r0, #1] +; CHECK-ARM: ldrh r1, [r0, #1] +; CHECK-THUMB: ldrh.w r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) { entry: @@ -245,6 +288,33 @@ ret void } +; CHECK-LABEL: test_shift15_mask16 +; CHECK-COMMON: ldr r1, [r0] +; CHECK-COMMON: ubfx r1, r1, #15, #16 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift15_mask16(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 15 + %and = and i32 %shl, 65535 + store i32 %and, i32* %p, align 4 + ret void +} + +; CHECK-LABEL: test_shift16_mask15 +; CHECK-BE: ldrh r1, [r0] +; CHECK-COMMON: ldrh r1, [r0, #2] +; CHECK-COMMON: bfc r1, #15, #17 +; CHECK-COMMON: str r1, [r0] +define arm_aapcscc void @test_shift16_mask15(i32* nocapture %p) { +entry: + %0 = load i32, i32* %p, align 4 + %shl = lshr i32 %0, 16 + %and = and i32 %shl, 32767 + store i32 %and, i32* %p, align 4 + ret void +} + ; CHECK-LABEL: test_sext_shift8_mask8 ; CHECK-BE: ldrb r0, [r0] ; CHECK-COMMON: ldrb r0, [r0, #1] Index: test/CodeGen/X86/h-registers-1.ll =================================================================== --- test/CodeGen/X86/h-registers-1.ll +++ test/CodeGen/X86/h-registers-1.ll @@ -9,11 +9,12 @@ ; CHECK: foo: ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %rax, %r ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %r8, %r ; CHECK: movzbl %{{[abcd]}}h, %e -; CHECK: movzbl %{{[abcd]}}h, %e -; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %r9, %r ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: ret