Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1932,42 +1932,6 @@ return true; } -/// Get the appropriate X86 opcode for an in-memory arithmetic operation that -/// also sets flags. -/// -/// FIXME: This is essentially re-implemneting a subset of the patterns for -/// these instructions. Instead, we should compute this from the patterns -/// somehow. -/// -/// FIXME: Currently we only support integer operations. -/// -/// If there is no X86 opcode, returns none. -static Optional getFusedLdStWithFlagsOpcode(EVT LdVT, unsigned Opc) { - auto SelectSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, - unsigned Opc8) -> Optional { - switch (LdVT.getSimpleVT().SimpleTy) { - case MVT::i64: - return Opc64; - case MVT::i32: - return Opc32; - case MVT::i16: - return Opc16; - case MVT::i8: - return Opc8; - default: - return None; - } - }; - switch (Opc) { - default: - return None; - case X86ISD::DEC: - return SelectSize(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); - case X86ISD::INC: - return SelectSize(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m); - } -} - /// Check whether or not the chain ending in StoreNode is suitable for doing /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, @@ -2047,15 +2011,16 @@ return true; } -// Change a chain of {load; incr or dec; store} of the same value into -// a simple increment or decrement through memory of that value, if the -// uses of the modified value and its address are suitable. -// The DEC64m tablegen pattern is currently not able to match the case where -// the EFLAGS on the original DEC are used. (This also applies to -// {INC,DEC}X{64,32,16,8}.) -// We'll need to improve tablegen to allow flags to be transferred from a -// node in the pattern to the result node. probably with a new keyword -// for example, we have this +// Change a chain of {load; op; store} of the same value into a simple op +// through memory of that value, if the uses of the modified value and its +// address are suitable. +// +// The tablegen pattern memory operand pattern is currently not able to match +// the case where the EFLAGS on the original operation are used. +// +// To move this to tablegen, we'll need to improve tablegen to allow flags to +// be transferred from a node in the pattern to the result node, probably with +// a new keyword. For example, we have this // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (implicit EFLAGS)]>; @@ -2064,19 +2029,29 @@ // [(store (add (loadi64 addr:$dst), -1), addr:$dst), // (transferrable EFLAGS)]>; // -// FIXME: This should handle a wide range of operations which support RMW -// memory operands, not just inc and dec. +// Until then, we manually fold these and instruction select the operation +// here. bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { StoreSDNode *StoreNode = cast(Node); SDValue StoredVal = StoreNode->getOperand(1); unsigned Opc = StoredVal->getOpcode(); + // Before we try to select anything, make sure this is memory operand size + // and opcode we can handle. Note that this must match the code below that + // actually lowers the opcodes. EVT MemVT = StoreNode->getMemoryVT(); - if (!MemVT.isSimple()) + if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && + MemVT != MVT::i8) return false; - Optional NewOpc = getFusedLdStWithFlagsOpcode(MemVT, Opc); - if (!NewOpc) + switch (Opc) { + default: return false; + case X86ISD::INC: + case X86ISD::DEC: + case X86ISD::ADD: + case X86ISD::SUB: + break; + } LoadSDNode *LoadNode = nullptr; SDValue InputChain; @@ -2089,12 +2064,57 @@ Segment)) return false; + auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, + unsigned Opc8) { + switch (MemVT.getSimpleVT().SimpleTy) { + case MVT::i64: + return Opc64; + case MVT::i32: + return Opc32; + case MVT::i16: + return Opc16; + case MVT::i8: + return Opc8; + default: + llvm_unreachable("Invalid size!"); + } + }; + + MachineSDNode *Result; + switch (Opc) { + case X86ISD::INC: + case X86ISD::DEC: { + unsigned NewOpc = Opc == X86ISD::INC + ? SelectOpcodeForSize(X86::INC64m, X86::INC32m, + X86::INC16m, X86::INC8m) + : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m, + X86::DEC16m, X86::DEC8m); + const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; + Result = + CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); + break; + } + case X86ISD::ADD: + case X86ISD::SUB: { + unsigned NewOpc = Opc == X86ISD::ADD + ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr, + X86::ADD16mr, X86::ADD8mr) + : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr, + X86::SUB16mr, X86::SUB8mr); + const SDValue Ops[] = {Base, Scale, Index, + Disp, Segment, StoredVal->getOperand(1), + InputChain}; + Result = + CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); + break; + } + default: + llvm_unreachable("Invalid opcode!"); + } + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2); MemOp[0] = StoreNode->getMemOperand(); MemOp[1] = LoadNode->getMemOperand(); - const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; - MachineSDNode *Result = - CurDAG->getMachineNode(*NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); Result->setMemRefs(MemOp, MemOp + 2); ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); Index: llvm/trunk/test/CodeGen/X86/add.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/add.ll +++ llvm/trunk/test/CodeGen/X86/add.ll @@ -341,9 +341,8 @@ ; X32-LABEL: test12: ; X32: # BB#0: # %entry ; X32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X32-NEXT: addl (%eax), %ecx +; X32-NEXT: addl %ecx, (%eax) ; X32-NEXT: adcl $0, 4(%eax) -; X32-NEXT: movl %ecx, (%eax) ; X32-NEXT: retl ; ; X64-LINUX-LABEL: test12: @@ -366,9 +365,8 @@ ; X32-LABEL: test13: ; X32: # BB#0: # %entry ; X32-NEXT: movl $128, %ecx -; X32-NEXT: addl (%eax), %ecx +; X32-NEXT: addl %ecx, (%eax) ; X32-NEXT: adcl $0, 4(%eax) -; X32-NEXT: movl %ecx, (%eax) ; X32-NEXT: retl ; ; X64-LINUX-LABEL: test13: Index: llvm/trunk/test/CodeGen/X86/addcarry.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/addcarry.ll +++ llvm/trunk/test/CodeGen/X86/addcarry.ll @@ -171,8 +171,7 @@ ; CHECK: # BB#0: # %entry ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: mulq %rsi -; CHECK-NEXT: addq (%rdi), %rax -; CHECK-NEXT: movq %rax, (%rdi) +; CHECK-NEXT: addq %rax, (%rdi) ; CHECK-NEXT: adcq 8(%rdi), %rdx ; CHECK-NEXT: movq %rdx, 8(%rdi) ; CHECK-NEXT: adcl $0, 16(%rdi) Index: llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll +++ llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll @@ -0,0 +1,420 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs | FileCheck %s + +target triple = "x86_64-unknown-unknown" + +@g64 = external global i64, align 8 +@g32 = external global i32, align 4 +@g16 = external global i16, align 2 +@g8 = external global i8, align 1 + +declare void @a() +declare void @b() + +define void @add64_imm_br() nounwind { +; CHECK-LABEL: add64_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: addq %rax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB0_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i64, i64* @g64 + %add = add nsw i64 %load1, 42 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add32_imm_br() nounwind { +; CHECK-LABEL: add32_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: addl %eax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB1_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB1_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i32, i32* @g32 + %add = add nsw i32 %load1, 42 + store i32 %add, i32* @g32 + %cond = icmp slt i32 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add16_imm_br() nounwind { +; CHECK-LABEL: add16_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movw $42, %ax +; CHECK-NEXT: addw %ax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB2_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB2_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i16, i16* @g16 + %add = add nsw i16 %load1, 42 + store i16 %add, i16* @g16 + %cond = icmp slt i16 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add8_imm_br() nounwind { +; CHECK-LABEL: add8_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movb $42, %al +; CHECK-NEXT: addb %al, {{.*}}(%rip) +; CHECK-NEXT: js .LBB3_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB3_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i8, i8* @g8 + %add = add nsw i8 %load1, 42 + store i8 %add, i8* @g8 + %cond = icmp slt i8 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_reg_br(i64 %arg) nounwind { +; CHECK-LABEL: add64_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq %rdi, {{.*}}(%rip) +; CHECK-NEXT: js .LBB4_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB4_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i64, i64* @g64 + %add = add nsw i64 %load1, %arg + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add32_reg_br(i32 %arg) nounwind { +; CHECK-LABEL: add32_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addl %edi, {{.*}}(%rip) +; CHECK-NEXT: js .LBB5_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB5_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i32, i32* @g32 + %add = add nsw i32 %load1, %arg + store i32 %add, i32* @g32 + %cond = icmp slt i32 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add16_reg_br(i16 %arg) nounwind { +; CHECK-LABEL: add16_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addw %di, {{.*}}(%rip) +; CHECK-NEXT: js .LBB6_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB6_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i16, i16* @g16 + %add = add nsw i16 %load1, %arg + store i16 %add, i16* @g16 + %cond = icmp slt i16 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add8_reg_br(i8 %arg) nounwind { +; CHECK-LABEL: add8_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addb %dil, {{.*}}(%rip) +; CHECK-NEXT: js .LBB7_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB7_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i8, i8* @g8 + %add = add nsw i8 %load1, %arg + store i8 %add, i8* @g8 + %cond = icmp slt i8 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub64_imm_br() nounwind { +; CHECK-LABEL: sub64_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movq $-42, %rax +; CHECK-NEXT: addq %rax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB8_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB8_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i64, i64* @g64 + %sub = sub nsw i64 %load1, 42 + store i64 %sub, i64* @g64 + %cond = icmp slt i64 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub32_imm_br() nounwind { +; CHECK-LABEL: sub32_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $-42, %eax +; CHECK-NEXT: addl %eax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB9_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB9_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i32, i32* @g32 + %sub = sub nsw i32 %load1, 42 + store i32 %sub, i32* @g32 + %cond = icmp slt i32 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub16_imm_br() nounwind { +; CHECK-LABEL: sub16_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movw $-42, %ax +; CHECK-NEXT: addw %ax, {{.*}}(%rip) +; CHECK-NEXT: js .LBB10_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB10_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i16, i16* @g16 + %sub = sub nsw i16 %load1, 42 + store i16 %sub, i16* @g16 + %cond = icmp slt i16 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub8_imm_br() nounwind { +; CHECK-LABEL: sub8_imm_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movb $-42, %al +; CHECK-NEXT: addb %al, {{.*}}(%rip) +; CHECK-NEXT: js .LBB11_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB11_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i8, i8* @g8 + %sub = sub nsw i8 %load1, 42 + store i8 %sub, i8* @g8 + %cond = icmp slt i8 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub64_reg_br(i64 %arg) nounwind { +; CHECK-LABEL: sub64_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subq %rdi, {{.*}}(%rip) +; CHECK-NEXT: js .LBB12_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB12_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i64, i64* @g64 + %sub = sub nsw i64 %load1, %arg + store i64 %sub, i64* @g64 + %cond = icmp slt i64 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub32_reg_br(i32 %arg) nounwind { +; CHECK-LABEL: sub32_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subl %edi, {{.*}}(%rip) +; CHECK-NEXT: js .LBB13_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB13_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i32, i32* @g32 + %sub = sub nsw i32 %load1, %arg + store i32 %sub, i32* @g32 + %cond = icmp slt i32 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub16_reg_br(i16 %arg) nounwind { +; CHECK-LABEL: sub16_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subw %di, {{.*}}(%rip) +; CHECK-NEXT: js .LBB14_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB14_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i16, i16* @g16 + %sub = sub nsw i16 %load1, %arg + store i16 %sub, i16* @g16 + %cond = icmp slt i16 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub8_reg_br(i8 %arg) nounwind { +; CHECK-LABEL: sub8_reg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subb %dil, {{.*}}(%rip) +; CHECK-NEXT: js .LBB15_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: .LBB15_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +entry: + %load1 = load i8, i8* @g8 + %sub = sub nsw i8 %load1, %arg + store i8 %sub, i8* @g8 + %cond = icmp slt i8 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} Index: llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -65,10 +65,9 @@ define i1 @plus_forty_two() nounwind { ; CHECK32-LABEL: plus_forty_two: ; CHECK32: # BB#0: # %entry -; CHECK32-NEXT: movl L, %ecx ; CHECK32-NEXT: movb M, %al -; CHECK32-NEXT: addl $42, %ecx -; CHECK32-NEXT: movl %ecx, L +; CHECK32-NEXT: movl $42, %ecx +; CHECK32-NEXT: addl %ecx, L ; CHECK32-NEXT: jne .LBB1_2 ; CHECK32-NEXT: # BB#1: # %entry ; CHECK32-NEXT: andb $8, %al @@ -82,10 +81,9 @@ ; ; CHECK64-LABEL: plus_forty_two: ; CHECK64: # BB#0: # %entry -; CHECK64-NEXT: movl {{.*}}(%rip), %ecx ; CHECK64-NEXT: movb {{.*}}(%rip), %al -; CHECK64-NEXT: addl $42, %ecx -; CHECK64-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK64-NEXT: movl $42, %ecx +; CHECK64-NEXT: addl %ecx, {{.*}}(%rip) ; CHECK64-NEXT: jne .LBB1_2 ; CHECK64-NEXT: # BB#1: # %entry ; CHECK64-NEXT: andb $8, %al @@ -165,10 +163,9 @@ define i1 @minus_forty_two() nounwind { ; CHECK32-LABEL: minus_forty_two: ; CHECK32: # BB#0: # %entry -; CHECK32-NEXT: movl L, %ecx ; CHECK32-NEXT: movb M, %al -; CHECK32-NEXT: addl $-42, %ecx -; CHECK32-NEXT: movl %ecx, L +; CHECK32-NEXT: movl $-42, %ecx +; CHECK32-NEXT: addl %ecx, L ; CHECK32-NEXT: jne .LBB3_2 ; CHECK32-NEXT: # BB#1: # %entry ; CHECK32-NEXT: andb $8, %al @@ -182,10 +179,9 @@ ; ; CHECK64-LABEL: minus_forty_two: ; CHECK64: # BB#0: # %entry -; CHECK64-NEXT: movl {{.*}}(%rip), %ecx ; CHECK64-NEXT: movb {{.*}}(%rip), %al -; CHECK64-NEXT: addl $-42, %ecx -; CHECK64-NEXT: movl %ecx, {{.*}}(%rip) +; CHECK64-NEXT: movl $-42, %ecx +; CHECK64-NEXT: addl %ecx, {{.*}}(%rip) ; CHECK64-NEXT: jne .LBB3_2 ; CHECK64-NEXT: # BB#1: # %entry ; CHECK64-NEXT: andb $8, %al Index: llvm/trunk/test/CodeGen/X86/pr32659.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32659.ll +++ llvm/trunk/test/CodeGen/X86/pr32659.ll @@ -50,10 +50,10 @@ ; CHECK-NEXT: sarl $31, %eax ; CHECK-NEXT: andl %eax, e+4 ; CHECK-NEXT: decl g -; CHECK-NEXT: movl f, %eax -; CHECK-NEXT: addl $1, %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: incl %eax +; CHECK-NEXT: addl %eax, f ; CHECK-NEXT: adcl $0, f+4 -; CHECK-NEXT: movl %eax, f ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl