Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -280,0 +281 @@ + SDValue combine_AND_ShiftAND(SDNode *N, SDValue &N0, SDValue &N1); @@ -455,0 +457,3 @@ + SDNode *ShrinkLoadShiftOrStoreWithLoadNewStore(StoreSDNode *storeOp, + const SDNode *orOp); + @@ -4007,0 +4012,79 @@ +// fold expressions x1 and x2 alike: +// x1 = ( and, x, 0x00FF ) +// x2 = (( shl x, 8 ) and 0xFF00 ) +// into +// x2 = shl x1, 8 ; reuse the computation of x1 +SDValue DAGCombiner::combine_AND_ShiftAND(SDNode *N, SDValue &N0, SDValue &N1) { + ConstantSDNode *mask = dyn_cast(N1); + if (!mask) + return SDValue(); + + if ((N0.getNumOperands() != 2) || (!N0.hasOneUse())) + return SDValue(); + + ConstantSDNode *shiftAmount = dyn_cast(N0.getOperand(1)); + if (!shiftAmount) + return SDValue(); + + const ISD::NodeType N0Opcode = (ISD::NodeType)N0.getOpcode(); + if (((N0Opcode < ISD::SHL) || (N0Opcode > ISD::ROTR)) && + ((N0Opcode < ISD::SHL_PARTS) || (N0Opcode > ISD::SRL_PARTS))) + return SDValue(); + + const auto &maskedValue = dyn_cast(N0.getOperand(0)); + for (SDNode *otherUser : maskedValue->uses()) { + SDNode *shiftOperand = dyn_cast(N0); + if ((shiftOperand == nullptr) || (&(*otherUser) == shiftOperand) or + (otherUser->getOpcode() != ISD::AND)) + continue; + + ConstantSDNode *otherMask = + dyn_cast(otherUser->getOperand(1)); + if (!otherMask) + continue; + + bool canReduce = false; + + const APInt &maskValue = mask->getAPIntValue(); + const APInt &shiftValue = shiftAmount->getAPIntValue(); + const APInt &otherMaskValue = otherMask->getAPIntValue(); + switch (N0Opcode) { + case ISD::SHL: + canReduce = (maskValue.lshr(shiftValue) == otherMaskValue); + break; + case ISD::SRA: + case ISD::SRL: + canReduce = (maskValue.shl(shiftValue) == otherMaskValue); + break; + case ISD::ROTL: + canReduce = (maskValue.rotr(shiftValue) == otherMaskValue); + break; + case ISD::ROTR: + canReduce = (maskValue.rotl(shiftValue) == otherMaskValue); + break; + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + DEBUG(dbgs() << "Todo\n"); + break; + default: + llvm_unreachable("This opcode is not accepted!"); + break; + } + if (canReduce) { + DEBUG(dbgs() << " with: "; N0.getNode()->dump(); + dbgs() << " and : "; otherUser->dump();); + + SDValue shiftTheAND(otherUser, 0); + const SDLoc DL(N0); + EVT VT = N->getValueType(0); + SDValue newShift = + DAG.getNode(N0Opcode, DL, VT, shiftTheAND, N0.getOperand(1)); + AddToWorklist(maskedValue); + AddToWorklist(otherUser); + return newShift; + } + } + return SDValue(); +} + @@ -4207,0 +4291,3 @@ + if (Res.getOpcode() == ISD::SHL) + return Res; + @@ -4216,0 +4303,3 @@ + if (SDValue r = combine_AND_ShiftAND(N, N0, N1)) + return r; + @@ -6265,7 +6354,30 @@ - if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && - isConstantOrConstantVector(N1, /* NoOpaques */ true)) { - SDLoc DL(N); - SDValue Mask = - DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); - AddToWorklist(Mask.getNode()); - return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + if ((N0.getOpcode() == ISD::SHL) && + (isConstantOrConstantVector(N1, /* NoOpaques */ true))) { + bool canFold = N0.getOperand(1) == N1; + if (!canFold) { + const ConstantSDNode *CN0N1 = dyn_cast(N0.getOperand(1)); + if (CN0N1 && N1C) + canFold = CN0N1->getZExtValue() == N1C->getZExtValue(); + } + + if (canFold) { + // fold (srl (shl x, c), c) -> (c) if the x upper bits of c are known to + // be 0 + // TODO: Add more instructions that produce known upper bits zero masks, + // other than zext loads + if (N1C) { + if (LoadSDNode *x = dyn_cast(N0.getOperand(0))) { + const unsigned xSize = x->getValueSizeInBits(0); + const unsigned xMemSize = x->getMemOperand()->getSize() * 8; + if ((xSize > xMemSize) && + ((xSize - xMemSize) >= N1C->getZExtValue()) && + (x->getExtensionType() == ISD::LoadExtType::ZEXTLOAD)) + return N0.getOperand(0); + } + } + SDLoc DL(N); + SDValue Mask = + DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1); + AddToWorklist(Mask.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask); + } @@ -8506,0 +8619,3 @@ + unsigned ShAmt = 0; + unsigned ShLeftAmt = 0; + @@ -8534 +8649,14 @@ - if (!AndC || !AndC->getAPIntValue().isMask()) + const APInt &maskAPInt = AndC->getAPIntValue(); + // TODO: Not only [shifted] masks should be accepted. + //(and ld.16 [M], 0x00AB) can be replaced by (and ld.8.zext16 [M], 0x00AB). + if (!AndC || !(maskAPInt.isMask() || maskAPInt.isShiftedMask())) + return SDValue(); + + unsigned maxBit = maskAPInt.getBitWidth() - maskAPInt.countLeadingZeros(); + const unsigned minBit = maskAPInt.countTrailingZeros(); + // Only accepts multiples of 8 bits, and power of 2 sizes + if ((maxBit | minBit) % 8) + return SDValue(); + + unsigned ActiveBits = maxBit - minBit; + if (ActiveBits & (ActiveBits - 1)) @@ -8537 +8665,5 @@ - unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + DEBUG(dbgs() << "\n\tMask: "; AndC->dump(); + dbgs() << "\n\t\tmaxActiveBit: " << maxBit - 1 + << "\n\t\tminActiveBit: " << minBit << '\n'); + + LoadSDNode *LN0 = dyn_cast(N0); @@ -8538,0 +8671,25 @@ + if (minBit != 0) { + // How to treat if it was not a load? + if (LN0 == nullptr) + return SDValue(); + + const auto &mvt = LN0->getMemoryVT(); + if (minBit >= mvt.getSizeInBits()) { + // The (and) is filtering what was extended, not the actual data + // value... + if (ISD::LoadExtType::ZEXTLOAD == LN0->getExtensionType()) { + // We only read the zero values + return DAG.getConstant(0, SDLoc(N), AndC->getValueType(0)); + } + // We access the sign extension, not known here + return SDValue(); + } + if (maxBit > mvt.getSizeInBits()) + ExtType = LN0->getExtensionType(); + } + // TODO: Accept SEXT if the architecture accepts doing a LD?SH (load + shl) + // An (and (ld.32bit.sext.from16 [M]), 0x00FFFF00) can be replaced by + // (and (shl (ld.32bit.sext.from8 [M+1]), 8), 0x00FFFF00) + if (ExtType != ISD::ZEXTLOAD) + return SDValue(); + @@ -8539,0 +8697,2 @@ + ShAmt = minBit; + ShLeftAmt = minBit; @@ -8542 +8700,0 @@ - unsigned ShAmt = 0; @@ -8593 +8750,0 @@ - unsigned ShLeftAmt = 0; @@ -12833,0 +12991,79 @@ +/// Detects operations such as M[i] = M[i] | M[i] << K, or +/// M[i] = M[i] | M[i] >> K, +/// where K is half size of the store width. If we can prove it only copies +/// bytes from one position to another, it might be possible to reduce the widht +/// of the store and remove the 'or + shift' operations. +SDNode * +DAGCombiner::ShrinkLoadShiftOrStoreWithLoadNewStore(StoreSDNode *storeOp, + const SDNode *orOp) { + + SDValue loadSD = orOp->getOperand(0); + LoadSDNode *load = dyn_cast(loadSD); + SDNode *other = orOp->getOperand(1).getNode(); + if (!load) { + loadSD = orOp->getOperand(1); + load = dyn_cast(loadSD); + other = orOp->getOperand(0).getNode(); + } + + if (!load) + return nullptr; + + unsigned shiftedBytes = 0; + if ((other->getOpcode() == ISD::SHL) && + (other->getOperand(0).getNode() == load) && + isa(other->getOperand(1))) + shiftedBytes = cast(other->getOperand(1).getNode()) + ->getAPIntValue() + .getSExtValue() / + 8; + // TODO: Accept other shifting operations such as srl, sra. Can use a negative + // value for shiftedBytes + + unsigned storeMemSz = storeOp->getMemoryVT().getStoreSize(); + // For now we only accept chains that moves half of the loaded value to the + // other half. + if (2 * shiftedBytes != storeMemSz) + return nullptr; + + const SDValue loadPtr = load->getBasePtr(); + SDValue Ptr = storeOp->getBasePtr(); + // TODO: Detect when both LOAD and STORE memory addresses are ADD instructions + // but with known difference + bool samePtr = loadPtr == Ptr; + if (!samePtr && (loadPtr.getOpcode() != ISD::ADD)) + return nullptr; + + // Detect if we are moving M[A+k] to M[A]: + if (!((loadPtr.getOperand(0) == Ptr) || (loadPtr.getOperand(1) == Ptr))) + return nullptr; + + ConstantSDNode *offset = dyn_cast(loadPtr.getOperand(1)); + if (!offset) + offset = dyn_cast(loadPtr.getOperand(0)); + + if (!offset) + return nullptr; + + long loadByteOffset = offset->getAPIntValue().getSExtValue(); + if (loadByteOffset < + 0) // TODO: Accept negative offsets. How othen does it happen? + return nullptr; + + unsigned loadMemSz = load->getMemoryVT().getStoreSize(); + bool upperHalfLoad = + ((loadByteOffset == loadMemSz) && (2 * loadMemSz == storeMemSz)); + + if (!(upperHalfLoad || samePtr)) + return nullptr; + + if (samePtr) { + // TODO: Store the lower loaded value to the upper half + if ((loadMemSz == storeMemSz) && (load->use_size() == 2)) { + DEBUG(dbgs() << "Reduce load width\n"); + } + DEBUG(dbgs() << "Move lower to upper half\n"); + } + DEBUG(dbgs() << "Reduce store width to half width!!!!"); + return ShrinkLoadReplaceStoreWithStore({loadMemSz, 0}, loadSD, storeOp, this); +} @@ -12846 +13082 @@ - SDValue Ptr = ST->getBasePtr(); + SDValue Ptr = ST->getBasePtr(); @@ -12849 +13085 @@ - if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + if (VT.isVector() || !Value.hasOneUse()) @@ -12853,0 +13090,9 @@ + if (Opc == ISD::OR) { + if (SDNode *NewSt = + ShrinkLoadShiftOrStoreWithLoadNewStore(ST, Value.getNode())) + return SDValue(NewSt, 0); + } + + if (ST->isTruncatingStore()) + return SDValue(); + Index: test/CodeGen/ARM/stld-width-reduction1.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stld-width-reduction1.ll @@ -0,0 +1,33 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-arm-none-eabi" +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; CHECK-LABEL: test_1x2: +; CHECK: %bb.0: +; CHECK: ldrh +; CHECK-NEXT: strb + +; Function Attrs: norecurse nounwind +define dso_local void @test_1x2(i16* nocapture, i32) local_unnamed_addr #0 { + %3 = getelementptr inbounds i16, i16* %0, i32 %1 + %4 = load i16, i16* %3, align 2, !tbaa !3 + %5 = and i16 %4, 255 + %6 = shl i16 %4, 8 + %7 = or i16 %5, %6 + store i16 %7, i16* %3, align 2, !tbaa !3 + ret void +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+armv4t,+strict-align,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.0 (trunk 331513)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"short", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} Index: test/CodeGen/ARM/stld-width-reduction2.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stld-width-reduction2.ll @@ -0,0 +1,34 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-arm-none-eabi" + +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; CHECK-LABEL: test_1x2p1: +; CHECK: %bb.0: +; CHECK: ldrb +; CHECK-NEXT: strb + +; Function Attrs: norecurse nounwind +define dso_local void @test_1x2p1(i16* nocapture, i32) local_unnamed_addr #0 { + %3 = getelementptr inbounds i16, i16* %0, i32 %1 + %4 = load i16, i16* %3, align 2, !tbaa !3 + %5 = and i16 %4, -256 + %6 = lshr i16 %4, 8 + %7 = or i16 %5, %6 + store i16 %7, i16* %3, align 2, !tbaa !3 + ret void +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+armv4t,+strict-align,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.0 (trunk 331513)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"short", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} Index: test/CodeGen/ARM/stld-width-reduction3.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stld-width-reduction3.ll @@ -0,0 +1,35 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-arm-none-eabi" + +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; CHECK-LABEL: test_1x4p1: +; CHECK: ldrb +; CHECK-NEXT: orr +; CHECK-NEXT: str + +; Function Attrs: norecurse nounwind +define dso_local void @test_1x4p1(i32* nocapture, i32) local_unnamed_addr #0 { + %3 = getelementptr inbounds i32, i32* %0, i32 %1 + %4 = load i32, i32* %3, align 4, !tbaa !7 + %5 = and i32 %4, 65280 + %6 = lshr i32 %4, 8 + %7 = and i32 %6, 255 + %8 = or i32 %7, %5 + store i32 %8, i32* %3, align 4, !tbaa !7 + ret void +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+armv4t,+strict-align,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.0 (trunk 331513)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"short", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} Index: test/CodeGen/ARM/stld-width-reduction4.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/stld-width-reduction4.ll @@ -0,0 +1,36 @@ +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv4t-arm-none-eabi" + +; RUN: llc -mtriple=arm %s -o - | FileCheck %s +; CHECK-LABEL: test_1x4p1_shl: +; CHECK: ldrb +; CHECK-NEXT: lsl +; CHECK-NEXT: orr +; CHECK-NEXT: str + +; Function Attrs: norecurse nounwind +define dso_local void @test_1x4p1_shl(i32* nocapture, i32) local_unnamed_addr #0 { + %3 = getelementptr inbounds i32, i32* %0, i32 %1 + %4 = load i32, i32* %3, align 4, !tbaa !7 + %5 = and i32 %4, 65280 + %6 = shl i32 %4, 8 + %7 = and i32 %6, 16711680 + %8 = or i32 %7, %5 + store i32 %8, i32* %3, align 4, !tbaa !7 + ret void +} + +attributes #0 = { norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="arm7tdmi" "target-features"="+armv4t,+strict-align,-thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"min_enum_size", i32 4} +!2 = !{!"clang version 7.0.0 (trunk 331513)"} +!3 = !{!4, !4, i64 0} +!4 = !{!"short", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!8, !8, i64 0} +!8 = !{!"int", !5, i64 0} Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -48,7 +48,7 @@ -; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movabsq $281474976710655, %rcx # imm = 0xFFFFFFFFFFFF -; CHECK-NEXT: andq %rdi, %rcx -; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 -; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax +; CHECK-NEXT: shlq $48, %rax +; CHECK-NEXT: movq -24(%rsp), %rcx +; CHECK-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF +; CHECK-NEXT: andq %rdi, %rdx +; CHECK-NEXT: orq %rax, %rdx +; CHECK-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) Index: test/CodeGen/X86/pr32329.ll =================================================================== --- test/CodeGen/X86/pr32329.ll +++ test/CodeGen/X86/pr32329.ll @@ -32,28 +32,28 @@ -; X86-NEXT: movl obj, %edx -; X86-NEXT: movsbl var_27, %eax -; X86-NEXT: movzwl var_2, %esi -; X86-NEXT: movl var_310, %ecx -; X86-NEXT: imull %eax, %ecx -; X86-NEXT: addl var_24, %ecx -; X86-NEXT: andl $4194303, %edx # imm = 0x3FFFFF -; X86-NEXT: leal (%edx,%edx), %ebx -; X86-NEXT: subl %eax, %ebx -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: subl %esi, %edi -; X86-NEXT: imull %edi, %ecx -; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 -; X86-NEXT: movl $9, %esi -; X86-NEXT: xorl %ebp, %ebp -; X86-NEXT: shldl %cl, %esi, %ebp -; X86-NEXT: shll %cl, %esi -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %esi, %ebp -; X86-NEXT: movl $0, %ecx -; X86-NEXT: cmovnel %ecx, %esi -; X86-NEXT: cmpl %edx, %edi -; X86-NEXT: movl %ebp, var_50+4 -; X86-NEXT: movl %esi, var_50 -; X86-NEXT: setge var_205 -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: movb %bl, var_218 -; X86-NEXT: popl %esi +; X86-NEXT: movsbl var_27, %eax +; X86-NEXT: movzwl var_2, %esi +; X86-NEXT: movl var_310, %ecx +; X86-NEXT: imull %eax, %ecx +; X86-NEXT: addl var_24, %ecx +; X86-NEXT: movl $4194303, %edi # imm = 0x3FFFFF +; X86-NEXT: andl obj, %edi +; X86-NEXT: leal (%edi,%edi), %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: subl %esi, %ebx +; X86-NEXT: imull %ebx, %ecx +; X86-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 +; X86-NEXT: movl $9, %esi +; X86-NEXT: xorl %ebp, %ebp +; X86-NEXT: shldl %cl, %esi, %ebp +; X86-NEXT: shll %cl, %esi +; X86-NEXT: testb $32, %cl +; X86-NEXT: cmovnel %esi, %ebp +; X86-NEXT: movl $0, %ecx +; X86-NEXT: cmovnel %ecx, %esi +; X86-NEXT: cmpl %edi, %ebx +; X86-NEXT: movl %ebp, var_50+4 +; X86-NEXT: movl %esi, var_50 +; X86-NEXT: setge var_205 +; X86-NEXT: imull %eax, %edx +; X86-NEXT: movb %dl, var_218 +; X86-NEXT: popl %esi @@ -61 +61 @@ -; X86-NEXT: popl %edi +; X86-NEXT: popl %edi @@ -63 +63 @@ -; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebx @@ -65 +65 @@ -; X86-NEXT: popl %ebp +; X86-NEXT: popl %ebp @@ -71,22 +71,22 @@ -; X64-NEXT: movl {{.*}}(%rip), %eax -; X64-NEXT: movsbl {{.*}}(%rip), %r9d -; X64-NEXT: movzwl {{.*}}(%rip), %r8d -; X64-NEXT: movl {{.*}}(%rip), %ecx -; X64-NEXT: imull %r9d, %ecx -; X64-NEXT: addl {{.*}}(%rip), %ecx -; X64-NEXT: andl $4194303, %eax # imm = 0x3FFFFF -; X64-NEXT: leal (%rax,%rax), %edi -; X64-NEXT: subl %r9d, %edi -; X64-NEXT: movl %edi, %esi -; X64-NEXT: subl %r8d, %esi -; X64-NEXT: imull %esi, %ecx -; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 -; X64-NEXT: movl $9, %edx -; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlq %cl, %rdx -; X64-NEXT: movq %rdx, {{.*}}(%rip) -; X64-NEXT: cmpl %eax, %esi -; X64-NEXT: setge {{.*}}(%rip) -; X64-NEXT: imull %r9d, %edi -; X64-NEXT: movb %dil, {{.*}}(%rip) -; X64-NEXT: retq +; X64-NEXT: movsbl var_27(%rip), %r9d +; X64-NEXT: movzwl var_2(%rip), %r8d +; X64-NEXT: movl var_310(%rip), %ecx +; X64-NEXT: imull %r9d, %ecx +; X64-NEXT: addl var_24(%rip), %ecx +; X64-NEXT: movl $4194303, %esi # imm = 0x3FFFFF +; X64-NEXT: andl obj(%rip), %esi +; X64-NEXT: leal (%rsi,%rsi), %edi +; X64-NEXT: subl %r9d, %edi +; X64-NEXT: movl %edi, %edx +; X64-NEXT: subl %r8d, %edx +; X64-NEXT: imull %edx, %ecx +; X64-NEXT: addl $-1437483407, %ecx # imm = 0xAA51BE71 +; X64-NEXT: movl $9, %eax +; X64-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NEXT: shlq %cl, %rax +; X64-NEXT: movq %rax, var_50(%rip) +; X64-NEXT: cmpl %esi, %edx +; X64-NEXT: setge var_205(%rip) +; X64-NEXT: imull %r9d, %edi +; X64-NEXT: movb %dil, var_218(%rip) +; X64-NEXT: retq Index: test/CodeGen/X86/pr32588.ll =================================================================== --- test/CodeGen/X86/pr32588.ll +++ test/CodeGen/X86/pr32588.ll @@ -7,4 +7,4 @@ -; CHECK: cmpl $1, c(%rip) -; CHECK-NEXT: sbbl %eax, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %eax, d(%rip) +; CHECK: xorl %eax, %eax +; CHECK-NEXT: cmpl $0, c(%rip) +; CHECK-NEXT: sete %al +; CHECK-NEXT: movl %eax, d(%rip)