Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -448,6 +448,7 @@ SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue ReduceLoadWidth(SDNode *N); + SDValue foldRedudantShiftedMasks(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); @@ -4007,6 +4008,112 @@ return false; } +// fold expressions x1 and x2 alike: +// x1 = ( and, x, 0x00FF ) +// x2 = (( shl x, 8 ) and 0xFF00 ) +// into +// x2 = shl x1, 8 ; reuse the computation of x1 +SDValue DAGCombiner::foldRedudantShiftedMasks(SDNode *AND) { + const SDValue &SHIFT = AND->getOperand(0); + if ((SHIFT.getNumOperands() != 2) || (!SHIFT.hasOneUse())) + return SDValue(); + + const ConstantSDNode *ShiftAmount = + dyn_cast(SHIFT.getOperand(1)); + if (!ShiftAmount) + return SDValue(); + + const ConstantSDNode *Mask = dyn_cast(AND->getOperand(1)); + if (!Mask) + return SDValue(); + + SDValue MASKED = SHIFT.getOperand(0); + const auto &MaskedValue = dyn_cast(MASKED); + unsigned N0Opcode = SHIFT.getOpcode(); + for (SDNode *OtherUser : MaskedValue->uses()) { + if ((&(*OtherUser) == ShiftAmount) || (OtherUser->getOpcode() != ISD::AND)) + continue; + + ConstantSDNode *OtherMask = + dyn_cast(OtherUser->getOperand(1)); + + if (!OtherMask) + continue; + + bool CanReduce = false; + + const APInt &MaskValue = Mask->getAPIntValue(); + const APInt &ShiftValue = ShiftAmount->getAPIntValue(); + const APInt &OtherMaskValue = OtherMask->getAPIntValue(); + + KnownBits MaskedValueBits; + DAG.computeKnownBits(MASKED, MaskedValueBits); + KnownBits ShiftedValueBits; + DAG.computeKnownBits(SHIFT, ShiftedValueBits); + + const APInt EffectiveOtherMask = OtherMaskValue & ~MaskedValueBits.Zero; + const APInt EffectiveMask = MaskValue & ~ShiftedValueBits.Zero; + + LLVM_DEBUG( + dbgs() << "\tMasked value: "; MASKED.dump(); + dbgs() << "\t\tMasked value zero bits: 0x" + << MaskedValueBits.Zero.toString(16, false) + << "\n\n\t\tApplied mask: 0x" + << OtherMaskValue.toString(16, false) << " : "; + OtherUser->dump(); + dbgs() << "\t\tEffective mask: 0x" + << EffectiveOtherMask.toString(16, false) + << "\n\n\tShifted by: " << ShiftValue.getZExtValue() << " : "; + SHIFT.dump(); dbgs() << "\t\tAnd masked by: 0x" + << MaskValue.toString(16, false) << " : "; + AND->dump(); dbgs() << "\t\tEffective mask to shifted value: 0x" + << EffectiveMask.toString(16, false) << '\n';); + + switch (N0Opcode) { + case ISD::SHL: + CanReduce = (EffectiveOtherMask.shl(EffectiveMask) == EffectiveMask) || + (EffectiveMask.lshr(ShiftValue) == EffectiveOtherMask); + break; + case ISD::SRA: + if (!MaskedValueBits.Zero.isSignBitSet()) { + CanReduce = (EffectiveOtherMask.ashr(ShiftValue) == EffectiveMask); + break; + } else // Same as SRL + N0Opcode = ISD::SRL; + /* fall-thru */ + case ISD::SRL: + CanReduce = (EffectiveOtherMask.lshr(ShiftValue) == EffectiveMask) || + (EffectiveMask.shl(ShiftValue) == EffectiveOtherMask); + break; + case ISD::ROTL: + CanReduce = (EffectiveOtherMask.rotl(ShiftValue) == EffectiveMask); + break; + case ISD::ROTR: + CanReduce = (EffectiveOtherMask.rotr(ShiftValue) == EffectiveMask); + break; + // TODO: + // case ISD::SHL_PARTS: + // case ISD::SRA_PARTS: + // case ISD::SRL_PARTS: + break; + default: + return SDValue(); + } + if (CanReduce) { + LLVM_DEBUG(dbgs() << "\tCan replace it\n"); + + SDValue ShiftTheAND(OtherUser, 0); + const SDLoc DL(SHIFT); + EVT VT = AND->getValueType(0); + SDValue NewShift = + DAG.getNode(N0Opcode, DL, VT, ShiftTheAND, SHIFT.getOperand(1)); + AddToWorklist(OtherUser); + return NewShift; + } + } + return SDValue(); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4207,6 +4314,9 @@ (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { if (SDValue Res = ReduceLoadWidth(N)) { + if (Res.getOpcode() == ISD::SHL) + return Res; + LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND ? cast(N0.getOperand(0)) : cast(N0); @@ -4216,6 +4326,9 @@ } } + if (SDValue r = foldRedudantShiftedMasks(N)) + return r; + if (Level >= AfterLegalizeTypes) { // Attempt to propagate the AND back up to the leaves which, if they're // loads, can be combined to narrow loads and the AND node can be removed. @@ -6271,13 +6384,36 @@ } // fold (srl (shl x, c), c) -> (and x, cst2) - if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - isConstantOrConstantVector(N1, /* NoOpaques */ true)) { - SDLoc DL(N); - SDValue Mask = - DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); - AddToWorklist(Mask.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + if ((N0.getOpcode() == ISD::SHL) && + (isConstantOrConstantVector(N1, /* NoOpaques */ true))) { + bool CanFold = N0.getOperand(1) == N1; + if (!CanFold) { + const ConstantSDNode *CN0N1 = dyn_cast(N0.getOperand(1)); + if (CN0N1 && N1C) + CanFold = CN0N1->getZExtValue() == N1C->getZExtValue(); + } + + if (CanFold) { + // fold (srl (shl X, c), c) -> (c) if the X upper bits of c are known to + // be 0 + // TODO: Add more instructions that produce known upper bits zero masks, + // other than zext loads + if (N1C) { + if (LoadSDNode *X = dyn_cast(N0.getOperand(0))) { + const unsigned XSize = X->getValueSizeInBits(0); + const unsigned XMemSize = X->getMemOperand()->getSize() * 8; + if ((XSize > XMemSize) && + ((XSize - XMemSize) >= N1C->getZExtValue()) && + (X->getExtensionType() == ISD::LoadExtType::ZEXTLOAD)) + return N0.getOperand(0); + } + } + SDLoc DL(N); + SDValue Mask = + DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); + AddToWorklist(Mask.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + } } // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) @@ -8492,6 +8628,9 @@ if (VT.isVector()) return SDValue(); + unsigned ShAmt = 0; + unsigned ShLeftAmt = 0; + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then // extended to VT. if (Opc == ISD::SIGN_EXTEND_INREG) { @@ -8519,15 +8658,65 @@ } else if (Opc == ISD::AND) { // An AND with a constant mask is the same as a truncate + zero-extend. auto AndC = dyn_cast(N->getOperand(1)); - if (!AndC || !AndC->getAPIntValue().isMask()) + if (!AndC) return SDValue(); - unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + // TODO: Not only [shifted] masks should be accepted. + //(and ld.16 [M], 0x00AB) can be replaced by (and ld.8.zext16 [M], 0x00AB). + const APInt &MaskAPInt = AndC->getAPIntValue(); + if (!(MaskAPInt.isMask() || MaskAPInt.isShiftedMask())) + return SDValue(); + + unsigned MaxBit = MaskAPInt.getBitWidth() - MaskAPInt.countLeadingZeros(); + const unsigned MinBit = MaskAPInt.countTrailingZeros(); + // Only accepts multiples of 8 bits, and power of 2 sizes + if (!MaxBit && (0 != (MaxBit | MinBit) % 8)) + return SDValue(); + + unsigned ActiveBits = MaxBit - MinBit; + if (ActiveBits & (ActiveBits - 1)) + return SDValue(); + + LLVM_DEBUG(dbgs() << "\tMask: 0x" << MaskAPInt.toString(16, false) << " : "; + AndC->dump(); + dbgs() << "\t\tmaxActiveBit: " << MaxBit - 1 + << "\n\t\tminActiveBit: " << MinBit << '\n'); + + LoadSDNode *LN0 = dyn_cast(N0); ExtType = ISD::ZEXTLOAD; + if (MinBit != 0) { + // How to treat if it was not a load? + if (LN0 == nullptr) + return SDValue(); + + const auto &mvt = LN0->getMemoryVT(); + if (MinBit >= mvt.getSizeInBits()) { + // The (and) is filtering what was extended, not the actual data + // value... + if (ISD::LoadExtType::ZEXTLOAD == LN0->getExtensionType()) { + // We only read the zero values + return DAG.getConstant(0, SDLoc(N), AndC->getValueType(0)); + } + // We access the sign extension, not known here + return SDValue(); + } + if (MaxBit > mvt.getSizeInBits()) + ExtType = LN0->getExtensionType(); + } + // TODO: Accept SEXT if the architecture accepts doing a LD?SH (load + shl) + // An (and (ld.32bit.sext.from16 [M]), 0x00FFFF00) can be replaced by + // (and (shl (ld.32bit.sext.from8 [M+1]), 8), 0x00FFFF00) + if (ExtType != ISD::ZEXTLOAD) + return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + ShAmt = MinBit; + ShLeftAmt = MinBit; + LLVM_DEBUG(dbgs() << "\tCan replace load: "; LN0->dump(); + dbgs() << "\tBy a load of width " << ActiveBits + << " and with offset of " << ShAmt / 8 << '\n'); } - unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { SDValue SRL = N0; if (auto *ConstShift = dyn_cast(SRL.getOperand(1))) { @@ -8578,7 +8767,6 @@ // If the load is shifted left (and the result isn't shifted back right), // we can fold the truncate through the shift. - unsigned ShLeftAmt = 0; if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { Index: test/CodeGen/ARM/2018_05_29_FoldRedundantMask.ll =================================================================== --- test/CodeGen/ARM/2018_05_29_FoldRedundantMask.ll +++ test/CodeGen/ARM/2018_05_29_FoldRedundantMask.ll @@ -0,0 +1,102 @@ +; RUN: llc -O3 -march=arm %s -o - | FileCheck %s + +define i16 @LSR8(i16* %a){ +entry: + %data16 = getelementptr inbounds i16, i16* %a, i64 0 + %0 = load i16, i16* %data16, align 2 + %and = and i16 %0, 65280 + %1 = lshr i16 %0, 8 + %and3 = and i16 %1, 255 + %or = or i16 %and3, %and + ret i16 %or +} + +; CHECK-LABEL: LSR8 +; CHECK: ldrb{{.*}} +; CHECK-NEXT: orr{{.*}}lsl #8 +; CHECK-NEXT: mov{{.*}} + +define i16 @LSR12A(i16* %a){ +entry: + %data16 = getelementptr inbounds i16, i16* %a, i64 0 + %0 = load i16, i16* %data16, align 2 + %and = and i16 %0, 61440 + %1 = lshr i16 %0, 12 + %and3 = and i16 %1, 15 + %or = or i16 %and3, %and + ret i16 %or +} +; CHECK-LABEL: LSR12A +; CHECK: ldr{{.*}} +; CHECK-NEXT: and{{.*}}#61440 +; CHECK-NEXT: orr{{.*}}lsr #12 +; CHECK-NEXT: mov{{.*}} + +define i16 @LSR12B(i16* %a){ +entry: + %data16 = getelementptr inbounds i16, i16* %a, i64 0 + %0 = load i16, i16* %data16, align 2 + %and = and i16 %0, 61440 + %1 = lshr i16 %0, 12 + %and3 = and i16 %1, 255 + %or = or i16 %and3, %and + ret i16 %or +} +; CHECK-LABEL: LSR12B +; CHECK: ldrh{{.*}} +; CHECK-NEXT: and{{.*}}#61440 +; CHECK-NEXT: orr{{.*}}lsr #12 +; CHECK-NEXT: mov{{.*}} + +define i16 @LSR12C(i16* %a){ +entry: + %data16 = getelementptr inbounds i16, i16* %a, i64 0 + %0 = load i16, i16* %data16, align 2 + %and = and i16 %0, 65280 + %1 = lshr i16 %0, 12 + %and3 = and i16 %1, 15 + %or = or i16 %and3, %and + ret i16 %or +} +; CHECK-LABEL: LSR12C +; CHECK: ldrh{{.*}} +; CHECK-NEXT: and{{.*}} +; CHECK-NEXT: orr{{.*}}lsr #12 +; CHECK-NEXT: mov{{.*}} + +define i32 @ASR(i16* %a){ +entry: + %data16 = getelementptr inbounds i16, i16* %a, i64 0 + %l = load i16, i16* %data16, align 2 + %0 = zext i16 %l to i32 + %and = and i32 %0, 64512 + %1 = ashr i32 %0, 8 + %and3 = and i32 %1, 65532 + %or = or i32 %and3, %and + ret i32 %or +} +; CHECK-LABEL: ASR +; CHECK: ldrh{{.*}} +; CHECK-NEXT: and{{.*}} #64512 +; CHECK-NEXT: orr{{.*}}lsr #8 +; CHECK-NEXT: mov pc, lr +; ModuleID = 'test2.cpp' + +define i32 @main(i32 %ar, i8** nocapture readonly %v){ +entry: + %0 = bitcast i8** %v to i16* + %idxprom = sext i32 %ar to i64 + %arrayidx = getelementptr inbounds i16, i16* %0, i64 %idxprom + %1 = load i16, i16* %arrayidx, align 2 + %conv = sext i16 %1 to i32 + %and = and i32 %conv, 65280 + %2 = lshr i32 %conv, 8 + %and4 = and i32 %2, 255 + %or = or i32 %and4, %and + ret i32 %or +} +; CHECK-LABEL: main +; CHECK: add r0, r1, r0, lsl #1 +; CHECK-NEXT: ldrb r0, [r0, #1] +; CHECK-NEXT: orr r0, r0, r0, lsl #8 +; CHECK-NEXT: mov pc, lr Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -43,18 +43,18 @@ ; } define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; CHECK-LABEL: TestUnionLD1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF -; CHECK-NEXT: andq %rdi, %rcx -; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 -; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: jmp foo # TAILCALL +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movaps %xmm0, -24(%rsp) +; CHECK-NEXT: movzwl -10(%rsp), %eax +; CHECK-NEXT: shlq $48, %rax +; CHECK-NEXT: movq -24(%rsp), %rcx +; CHECK-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF +; CHECK-NEXT: andq %rdi, %rdx +; CHECK-NEXT: orq %rax, %rdx +; CHECK-NEXT: movq %rcx, -40(%rsp) +; CHECK-NEXT: movq %rdx, -32(%rsp) +; CHECK-NEXT: movaps -40(%rsp), %xmm0 +; CHECK-NEXT: jmp foo # TAILCALL entry: %0 = bitcast fp128 %s to i128 %1 = zext i64 %n to i128 Index: test/CodeGen/X86/pr32329.ll =================================================================== --- test/CodeGen/X86/pr32329.ll +++ test/CodeGen/X86/pr32329.ll @@ -16,80 +16,80 @@ define void @foo() local_unnamed_addr { ; X86-LABEL: foo: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: pushl %ebx -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: pushl %edi -; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: pushl %esi -; X86-NEXT: .cfi_def_cfa_offset 20 -; X86-NEXT: .cfi_offset %esi, -20 -; X86-NEXT: .cfi_offset %edi, -16 -; X86-NEXT: .cfi_offset %ebx, -12 -; X86-NEXT: .cfi_offset %ebp, -8 -; X86-NEXT: movl obj, %edx -; X86-NEXT: movsbl var_27, %eax -; X86-NEXT: movzwl var_2, %esi -; X86-NEXT: movl var_310, %ecx -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: addl var_24, %ecx -; X86-NEXT: andl $4194303, %edx # imm = 0x3FFFFF -; X86-NEXT: leal (%edx,%edx), %ebx -; X86-NEXT: subl %eax, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: subl %esi, %edi -; X86-NEXT: imull %edi, %ecx -; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 -; X86-NEXT: movl $9, %esi -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: shldl %cl, %esi, %ebp -; X86-NEXT: shll %cl, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %esi, %ebp -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovnel %ecx, %esi -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %ebp, var_50+4 -; X86-NEXT: movl %esi, var_50 -; X86-NEXT: setge var_205 -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: movb %bl, var_218 -; X86-NEXT: popl %esi -; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: popl %edi -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: popl %ebx -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: popl %ebp -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: pushl %ebx +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: pushl %edi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: pushl %esi +; X86-NEXT: .cfi_def_cfa_offset 20 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movsbl var_27, %eax +; X86-NEXT: movzwl var_2, %esi +; X86-NEXT: movl var_310, %ecx +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: addl var_24, %ecx +; X86-NEXT: movl $4194303, %edi # imm = 0x3FFFFF +; X86-NEXT: andl obj, %edi +; X86-NEXT: leal (%edi,%edi), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: subl %esi, %ebx +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 +; X86-NEXT: movl $9, %esi +; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: shldl %cl, %esi, %ebp +; X86-NEXT: shll %cl, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %esi, %ebp +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovnel %ecx, %esi +; X86-NEXT: cmpl %edi, %ebx +; X86-NEXT: movl %ebp, var_50+4 +; X86-NEXT: movl %esi, var_50 +; X86-NEXT: setge var_205 +; X86-NEXT: imull %eax, %edx +; X86-NEXT: movb %dl, var_218 +; X86-NEXT: popl %esi +; X86-NEXT: .cfi_def_cfa_offset 16 +; X86-NEXT: popl %edi +; X86-NEXT: .cfi_def_cfa_offset 12 +; X86-NEXT: popl %ebx +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl ; ; X64-LABEL: foo: -; X64: # %bb.0: # %entry -; X64-NEXT: movl {{.*}}(%rip), %eax -; X64-NEXT: movsbl {{.*}}(%rip), %r9d -; X64-NEXT: movzwl {{.*}}(%rip), %r8d -; X64-NEXT: movl {{.*}}(%rip), %ecx -; X64-NEXT: imull %r9d, %ecx -; X64-NEXT: addl {{.*}}(%rip), %ecx -; X64-NEXT: andl $4194303, %eax # imm = 0x3FFFFF -; X64-NEXT: leal (%rax,%rax), %edi -; X64-NEXT: subl %r9d, %edi -; X64-NEXT: movl %edi, %esi -; X64-NEXT: subl %r8d, %esi -; X64-NEXT: imull %esi, %ecx -; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 -; X64-NEXT: movl $9, %edx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlq %cl, %rdx -; X64-NEXT: movq %rdx, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, %esi -; X64-NEXT: setge {{.*}}(%rip) -; X64-NEXT: imull %r9d, %edi -; X64-NEXT: movb %dil, {{.*}}(%rip) -; X64-NEXT: retq +; X64: # %bb.0: # %entry +; X64-NEXT: movsbl var_27(%rip), %r9d +; X64-NEXT: movzwl var_2(%rip), %r8d +; X64-NEXT: movl var_310(%rip), %ecx +; X64-NEXT: imull %r9d, %ecx +; X64-NEXT: addl var_24(%rip), %ecx +; X64-NEXT: movl $4194303, %esi # imm = 0x3FFFFF +; X64-NEXT: andl obj(%rip), %esi +; X64-NEXT: leal (%rsi,%rsi), %edi +; X64-NEXT: subl %r9d, %edi +; X64-NEXT: movl %edi, %edx +; X64-NEXT: subl %r8d, %edx +; X64-NEXT: imull %edx, %ecx +; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 +; X64-NEXT: movl $9, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax +; X64-NEXT: movq %rax, var_50(%rip) +; X64-NEXT: cmpl %esi, %edx +; X64-NEXT: setge var_205(%rip) +; X64-NEXT: imull %r9d, %edi +; X64-NEXT: movb %dil, var_218(%rip) +; X64-NEXT: retq entry: %bf.load = load i32, i32* bitcast (%struct.AA* @obj to i32*), align 8 %bf.clear = shl i32 %bf.load, 1 Index: test/CodeGen/X86/pr32588.ll =================================================================== --- test/CodeGen/X86/pr32588.ll +++ test/CodeGen/X86/pr32588.ll @@ -4,11 +4,11 @@ @b = external local_unnamed_addr global i32, align 4 @d = external local_unnamed_addr global i32, align 4 -; CHECK: cmpl $1, c(%rip) -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %eax, d(%rip) -; CHECK-NEXT: retq +; CHECK: xorl %eax, %eax +; CHECK-NEXT: cmpl $0, c(%rip) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movl %eax, d(%rip) +; CHECK-NEXT: retq define void @fn1() { entry: