Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3788,6 +3788,13 @@ if (LoadN->getNumValues() > 2) return false; + // If a shift is used, an offset will be used by the load so ensure that + // this isn't going to produce an unsupported unaligned access. + if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), + ExtVT, LoadN->getAddressSpace(), + ShAmt / 8)) + return false; + // If the load that we're shrinking is an extload and we're not just // discarding the extension we can't simply shrink the load. Bail. // TODO: It would be possible to merge the extensions in some cases. @@ -8274,6 +8281,19 @@ // then the result of the shift+trunc is zero/undef (handled elsewhere). if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(N->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast(Mask->getOperand(1))->getAPIntValue(); + // Recompute the type. + if (ShiftMask.isMask()) + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + } } } Index: test/CodeGen/ARM/shift-combine.ll =================================================================== --- test/CodeGen/ARM/shift-combine.ll +++ test/CodeGen/ARM/shift-combine.ll @@ -218,9 +218,8 @@ } ; CHECK-LABEL: test_shift8_mask8 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #8 +; CHECK-BE: ldrb r1, [r0, #2] +; CHECK-COMMON: ldrb r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask8(i32* nocapture %p) { entry: @@ -232,9 +231,11 @@ } ; CHECK-LABEL: test_shift8_mask16 -; CHECK-BE: ldr r1, [r0] -; CHECK-COMMON: ldr r1, [r0] -; CHECK-COMMON: ubfx r1, r1, #8, #16 +; CHECK-ALIGN: ldr r1, [r0] +; CHECK-ALIGN: ubfx r1, r1, #8, #16 +; CHECK-BE: ldrh r1, [r0, #1] +; CHECK-ARM: ldrh r1, [r0, #1] +; CHECK-THUMB: ldrh.w r1, [r0, #1] ; CHECK-COMMON: str r1, [r0] define arm_aapcscc void @test_shift8_mask16(i32* nocapture %p) { entry: Index: test/CodeGen/X86/2009-06-05-VZextByteShort.ll =================================================================== --- test/CodeGen/X86/2009-06-05-VZextByteShort.ll +++ test/CodeGen/X86/2009-06-05-VZextByteShort.ll @@ -15,9 +15,8 @@ define <8 x i16> @b(i32* %x1) nounwind { ; CHECK-LABEL: b: -; CHECK: shrl %e[[R:.]]x -; CHECK-NEXT: movzwl %[[R]]x, %e[[R]]x -; CHECK-NEXT: movd %e[[R]]x, %xmm0 +; CHECK: movzwl (%eax), %eax +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retl %x2 = load i32, i32* %x1 @@ -29,9 +28,8 @@ define <8 x i8> @c(i32* %x1) nounwind { ; CHECK-LABEL: c: -; CHECK: shrl %e[[R:.]]x -; CHECK-NEXT: movzwl %[[R]]x, %e[[R]]x -; CHECK-NEXT: movd %e[[R]]x, %xmm0 +; CHECK: movzwl (%eax), %eax +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retl %x2 = load i32, i32* %x1 @@ -43,9 +41,8 @@ define <16 x i8> @d(i32* %x1) nounwind { ; CHECK-LABEL: d: -; CHECK: shrl %e[[R:.]]x -; CHECK-NEXT: movzbl %[[R]]l, %e[[R]]x -; CHECK-NEXT: movd %e[[R]]x, %xmm0 +; CHECK: movzbl (%eax), %eax +; CHECK-NEXT: movd %eax, %xmm0 ; CHECK-NEXT: retl %x2 = load i32, i32* %x1 Index: test/CodeGen/X86/h-registers-1.ll =================================================================== --- test/CodeGen/X86/h-registers-1.ll +++ test/CodeGen/X86/h-registers-1.ll @@ -9,11 +9,12 @@ ; CHECK: foo: ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %rax, %r ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %r8, %r ; CHECK: movzbl %{{[abcd]}}h, %e -; CHECK: movzbl %{{[abcd]}}h, %e -; CHECK: movzbl %{{[abcd]}}h, %e +; CHECK: movq %r9, %r ; CHECK: movzbl %{{[abcd]}}h, %e ; CHECK: ret