Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5491,6 +5491,8 @@ // Some constants may need fixing up later if they are too large. if (auto *C = dyn_cast(Op)) { + if (Mask->getValueType(0) != C->getValueType(0)) + return false; if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) NodesWithConsts.insert(N); @@ -5524,9 +5526,9 @@ case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - EVT VT = Op.getOpcode() == ISD::AssertZext ? - cast(Op.getOperand(1))->getVT() : - Op.getOperand(0).getValueType(); + EVT VT = Op.getOpcode() == ISD::AssertZext + ? cast(Op.getOperand(1))->getVT() + : Op.getOperand(0).getValueType(); // We can accept extending nodes if the mask is wider or an equal // width to the original type. @@ -5534,6 +5536,7 @@ continue; break; } + case ISD::ANY_EXTEND: case ISD::OR: case ISD::XOR: case ISD::AND: @@ -5593,12 +5596,14 @@ // masking. if (FixupNode) { LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); - SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), - FixupNode->getValueType(0), - SDValue(FixupNode, 0), MaskOp); + SDValue MaskOpT = DAG.getZExtOrTrunc(MaskOp, SDLoc(FixupNode), + FixupNode->getValueType(0)); + SDValue And = + DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOpT); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); if (And.getOpcode() == ISD ::AND) - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOpT); } // Narrow any constants that need it. @@ -5607,10 +5612,12 @@ SDValue Op1 = LogicN->getOperand(1); if (isa(Op0)) - std::swap(Op0, Op1); + std::swap(Op0, Op1); - SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), - Op1, MaskOp); + SDValue MaskOpT = + DAG.getZExtOrTrunc(MaskOp, SDLoc(Op1), Op1.getValueType()); + SDValue And = + DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOpT); DAG.UpdateNodeOperands(LogicN, Op0, And); } @@ -5618,12 +5625,14 @@ // Create narrow loads. for (auto *Load : Loads) { LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); + SDValue MaskOpT = + DAG.getZExtOrTrunc(MaskOp, SDLoc(Load), Load->getValueType(0)); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), - SDValue(Load, 0), MaskOp); + SDValue(Load, 0), MaskOpT); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); if (And.getOpcode() == ISD ::AND) And = SDValue( - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOpT), 0); SDValue NewLoad = ReduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); Index: llvm/test/CodeGen/AArch64/combine-andintoload.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-andintoload.ll +++ llvm/test/CodeGen/AArch64/combine-andintoload.ll @@ -5,16 +5,14 @@ define i64 @load32_and16_and(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_and: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_and: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -26,16 +24,14 @@ define i64 @load32_and16_andr(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_andr: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_andr: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -47,16 +43,14 @@ define i64 @load32_and16_and_sext(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_and_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_and_sext: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = sext i32 %x to i64 @@ -68,16 +62,16 @@ define i64 @load32_and16_or(i32* %p, i64 %y) { ; CHECK-LABEL: load32_and16_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: orr w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_or: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: orr w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w9, w1, #0xffff +; CHECKBE-NEXT: orr w0, w9, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -170,16 +164,14 @@ define i64 @load16_and8(i16* %p, i64 %y) { ; CHECK-LABEL: load16_and8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load16_and8: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xff +; CHECKBE-NEXT: ldrb w8, [x0, #1] +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -232,15 +224,13 @@ ; CHECK-LABEL: load8_and16_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load8_and16_zext: ; CHECKBE: // %bb.0: ; CHECKBE-NEXT: ldrb w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xff +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i8, i8* %p, align 4 %xz = zext i8 %x to i64 @@ -296,16 +286,14 @@ define i64 @load16_and8_manyext(i16* %p, i32 %y) { ; CHECK-LABEL: load16_and8_manyext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: and w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: and w0, w1, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load16_and8_manyext: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0] -; CHECKBE-NEXT: and w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xff +; CHECKBE-NEXT: ldrb w8, [x0, #1] +; CHECKBE-NEXT: and w0, w1, w8 ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i32 @@ -318,18 +306,16 @@ define i64 @multiple_load(i16* %p, i32* %q) { ; CHECK-LABEL: multiple_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: multiple_load: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0] -; CHECKBE-NEXT: ldr w9, [x1] -; CHECKBE-NEXT: and w8, w9, w8 -; CHECKBE-NEXT: and x0, x8, #0xff +; CHECKBE-NEXT: ldrb w8, [x0, #1] +; CHECKBE-NEXT: ldrb w9, [x1, #3] +; CHECKBE-NEXT: and w0, w9, w8 ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -343,18 +329,16 @@ define i64 @multiple_load_or(i16* %p, i32* %q) { ; CHECK-LABEL: multiple_load_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: and x0, x8, #0xff +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: multiple_load_or: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldrh w8, [x0] -; CHECKBE-NEXT: ldr w9, [x1] -; CHECKBE-NEXT: orr w8, w9, w8 -; CHECKBE-NEXT: and x0, x8, #0xff +; CHECKBE-NEXT: ldrb w8, [x0, #1] +; CHECKBE-NEXT: ldrb w9, [x1, #3] +; CHECKBE-NEXT: orr w0, w9, w8 ; CHECKBE-NEXT: ret %x = load i16, i16* %p, align 4 %xz = zext i16 %x to i64 @@ -368,16 +352,16 @@ define i64 @load32_and16_zexty(i32* %p, i32 %y) { ; CHECK-LABEL: load32_and16_zexty: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: orr w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_zexty: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: orr w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w9, w1, #0xffff +; CHECKBE-NEXT: orr w0, w9, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 @@ -390,16 +374,16 @@ define i64 @load32_and16_sexty(i32* %p, i32 %y) { ; CHECK-LABEL: load32_and16_sexty: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: orr w8, w1, w8 -; CHECK-NEXT: and x0, x8, #0xffff +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; ; CHECKBE-LABEL: load32_and16_sexty: ; CHECKBE: // %bb.0: -; CHECKBE-NEXT: ldr w8, [x0] -; CHECKBE-NEXT: orr w8, w1, w8 -; CHECKBE-NEXT: and x0, x8, #0xffff +; CHECKBE-NEXT: ldrh w8, [x0, #2] +; CHECKBE-NEXT: and w9, w1, #0xffff +; CHECKBE-NEXT: orr w0, w9, w8 ; CHECKBE-NEXT: ret %x = load i32, i32* %p, align 4 %xz = zext i32 %x to i64 Index: llvm/test/CodeGen/X86/pr35763.ll =================================================================== --- llvm/test/CodeGen/X86/pr35763.ll +++ llvm/test/CodeGen/X86/pr35763.ll @@ -10,10 +10,10 @@ define dso_local void @PR35763() { ; CHECK-LABEL: PR35763: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl z(%rip), %eax -; CHECK-NEXT: orl z+2(%rip), %eax -; CHECK-NEXT: movzwl %ax, %eax -; CHECK-NEXT: movq %rax, tf_3_var_136(%rip) +; CHECK-NEXT: movzwl z(%rip), %eax +; CHECK-NEXT: movzwl z+2(%rip), %ecx +; CHECK-NEXT: orl %eax, %ecx +; CHECK-NEXT: movq %rcx, tf_3_var_136(%rip) ; CHECK-NEXT: movl z+6(%rip), %eax ; CHECK-NEXT: movzbl z+10(%rip), %ecx ; CHECK-NEXT: shlq $32, %rcx Index: llvm/test/CodeGen/X86/pr35765.ll =================================================================== --- llvm/test/CodeGen/X86/pr35765.ll +++ llvm/test/CodeGen/X86/pr35765.ll @@ -13,14 +13,13 @@ ; CHECK-NEXT: addb $-118, %cl ; CHECK-NEXT: movl $4, %eax ; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: movzwl x(%rip), %ecx -; CHECK-NEXT: movzwl s2(%rip), %edx -; CHECK-NEXT: notl %edx -; CHECK-NEXT: orl $63488, %edx # imm = 0xF800 -; CHECK-NEXT: movzwl %dx, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: xorl %eax, %edx -; CHECK-NEXT: movslq %edx, %rax +; CHECK-NEXT: movzwl s2(%rip), %ecx +; CHECK-NEXT: notl %ecx +; CHECK-NEXT: orl x(%rip), %ecx +; CHECK-NEXT: orl $63488, %ecx # imm = 0xF800 +; CHECK-NEXT: movzwl %cx, %ecx +; CHECK-NEXT: xorl %eax, %ecx +; CHECK-NEXT: movslq %ecx, %rax ; CHECK-NEXT: movq %rax, ll(%rip) ; CHECK-NEXT: retq entry: