Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4724,9 +4724,8 @@ if (SDValue Res = ReduceLoadWidth(N)) { LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND ? cast(N0.getOperand(0)) : cast(N0); - AddToWorklist(N); - CombineTo(LN0, Res, Res.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); return SDValue(N, 0); } } @@ -9486,18 +9485,15 @@ if (DAG.getDataLayout().isBigEndian()) ShAmt = AdjustBigEndianShift(ShAmt); - // We're using a shifted mask, so the load now has an offset. This means we - // now need to shift right the mask to match the new load and then shift - // right the result of the AND. - const APInt &Mask = cast(N->getOperand(1))->getAPIntValue(); - APInt ShiftedMask = Mask.lshr(ShAmt); - DAG.UpdateNodeOperands(N, Result, DAG.getConstant(ShiftedMask, DL, VT)); + // We're using a shifted mask, so the load now has an offset. This means + // that data has been loaded into the lower bytes than it would have been + // before, so we need to shl the loaded data into the correct position in the + // register. SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT); - SDValue Shifted = DAG.getNode(ISD::SHL, DL, VT, SDValue(N, 0), - ShiftC); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Shifted); - DAG.UpdateNodeOperands(Shifted.getNode(), SDValue(N, 0), ShiftC); + Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); } + // Return the new loaded value. return Result; } Index: llvm/trunk/test/CodeGen/ARM/and-load-combine.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/and-load-combine.ll +++ llvm/trunk/test/CodeGen/ARM/and-load-combine.ll @@ -1549,3 +1549,39 @@ %and = and i64 %1, -281474976710656 ret i64 %and } + +; ARM-LABEL: test27: +; ARM: @ %bb.0: +; ARM-NEXT: ldrb r1, [r0, #1] +; ARM-NEXT: lsl r1, r1, #16 +; ARM-NEXT: str r1, [r0] +; ARM-NEXT: bx lr +; +; ARMEB-LABEL: test27: +; ARMEB: @ %bb.0: +; ARMEB-NEXT: ldrb r1, [r0, #2] +; ARMEB-NEXT: lsl r1, r1, #16 +; ARMEB-NEXT: str r1, [r0] +; ARMEB-NEXT: bx lr +; +; THUMB1-LABEL: test27: +; THUMB1: @ %bb.0: +; THUMB1-NEXT: ldrb r1, [r0, #1] +; THUMB1-NEXT: lsls r1, r1, #16 +; THUMB1-NEXT: str r1, [r0] +; THUMB1-NEXT: bx lr +; +; THUMB2-LABEL: test27: +; THUMB2: @ %bb.0: +; THUMB2-NEXT: ldrb r1, [r0, #1] +; THUMB2-NEXT: lsls r1, r1, #16 +; THUMB2-NEXT: str r1, [r0] +; THUMB2-NEXT: bx lr +define void @test27(i32* nocapture %ptr) { +entry: + %0 = load i32, i32* %ptr, align 4 + %and = and i32 %0, 65280 + %shl = shl i32 %and, 8 + store i32 %shl, i32* %ptr, align 4 + ret void +}