Index: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1932,6 +1932,73 @@ return true; } +/// Test whether the given node which sets flags has any uses which require the +/// CF flag to be accurate. +static bool hasNoCarryFlagUses(SDNode *N) { + // Examine each user of the node. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; + ++UI) { + // Only check things that use the flags. + if (UI.getUse().getResNo() != 1) + continue; + // Only examine CopyToReg uses. + if (UI->getOpcode() != ISD::CopyToReg) + return false; + // Only examine CopyToReg uses that copy to EFLAGS. + if (cast(UI->getOperand(1))->getReg() != X86::EFLAGS) + return false; + // Examine each user of the CopyToReg use. + for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); + FlagUI != FlagUE; ++FlagUI) { + // Only examine the Flag result. + if (FlagUI.getUse().getResNo() != 1) + continue; + // Anything unusual: assume conservatively. + if (!FlagUI->isMachineOpcode()) + return false; + // Examine the opcode of the user. + switch (FlagUI->getMachineOpcode()) { + // Comparisons which don't examine the CF flag. + case X86::SETOr: case X86::SETNOr: case X86::SETEr: case X86::SETNEr: + case X86::SETSr: case X86::SETNSr: case X86::SETPr: case X86::SETNPr: + case X86::SETLr: case X86::SETGEr: case X86::SETLEr: case X86::SETGr: + case X86::JO_1: case X86::JNO_1: case X86::JE_1: case X86::JNE_1: + case X86::JS_1: case X86::JNS_1: case X86::JP_1: case X86::JNP_1: + case X86::JL_1: case X86::JGE_1: case X86::JLE_1: case X86::JG_1: + case X86::CMOVO16rr: case X86::CMOVO32rr: case X86::CMOVO64rr: + case X86::CMOVO16rm: case X86::CMOVO32rm: case X86::CMOVO64rm: + case X86::CMOVNO16rr: case X86::CMOVNO32rr: case X86::CMOVNO64rr: + case X86::CMOVNO16rm: case X86::CMOVNO32rm: case X86::CMOVNO64rm: + case X86::CMOVE16rr: case X86::CMOVE32rr: case X86::CMOVE64rr: + case X86::CMOVE16rm: case X86::CMOVE32rm: case X86::CMOVE64rm: + case X86::CMOVNE16rr: case X86::CMOVNE32rr: case X86::CMOVNE64rr: + case X86::CMOVNE16rm: case X86::CMOVNE32rm: case X86::CMOVNE64rm: + case X86::CMOVS16rr: case X86::CMOVS32rr: case X86::CMOVS64rr: + case X86::CMOVS16rm: case X86::CMOVS32rm: case X86::CMOVS64rm: + case X86::CMOVNS16rr: case X86::CMOVNS32rr: case X86::CMOVNS64rr: + case X86::CMOVNS16rm: case X86::CMOVNS32rm: case X86::CMOVNS64rm: + case X86::CMOVP16rr: case X86::CMOVP32rr: case X86::CMOVP64rr: + case X86::CMOVP16rm: case X86::CMOVP32rm: case X86::CMOVP64rm: + case X86::CMOVNP16rr: case X86::CMOVNP32rr: case X86::CMOVNP64rr: + case X86::CMOVNP16rm: case X86::CMOVNP32rm: case X86::CMOVNP64rm: + case X86::CMOVL16rr: case X86::CMOVL32rr: case X86::CMOVL64rr: + case X86::CMOVL16rm: case X86::CMOVL32rm: case X86::CMOVL64rm: + case X86::CMOVGE16rr: case X86::CMOVGE32rr: case X86::CMOVGE64rr: + case X86::CMOVGE16rm: case X86::CMOVGE32rm: case X86::CMOVGE64rm: + case X86::CMOVLE16rr: case X86::CMOVLE32rr: case X86::CMOVLE64rr: + case X86::CMOVLE16rm: case X86::CMOVLE32rm: case X86::CMOVLE64rm: + case X86::CMOVG16rr: case X86::CMOVG32rr: case X86::CMOVG64rr: + case X86::CMOVG16rm: case X86::CMOVG32rm: case X86::CMOVG64rm: + continue; + // Anything else: assume conservatively. + default: + return false; + } + } + } + return true; +} + /// Check whether or not the chain ending in StoreNode is suitable for doing /// the {load; op; store} to modify transformation. static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, @@ -2064,8 +2131,8 @@ Segment)) return false; - auto SelectOpcodeForSize = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, - unsigned Opc8) { + auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, + unsigned Opc8 = 0) { switch (MemVT.getSimpleVT().SimpleTy) { case MVT::i64: return Opc64; @@ -2084,11 +2151,10 @@ switch (Opc) { case X86ISD::INC: case X86ISD::DEC: { - unsigned NewOpc = Opc == X86ISD::INC - ? SelectOpcodeForSize(X86::INC64m, X86::INC32m, - X86::INC16m, X86::INC8m) - : SelectOpcodeForSize(X86::DEC64m, X86::DEC32m, - X86::DEC16m, X86::DEC8m); + unsigned NewOpc = + Opc == X86ISD::INC + ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) + : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); @@ -2096,14 +2162,75 @@ } case X86ISD::ADD: case X86ISD::SUB: { - unsigned NewOpc = Opc == X86ISD::ADD - ? SelectOpcodeForSize(X86::ADD64mr, X86::ADD32mr, - X86::ADD16mr, X86::ADD8mr) - : SelectOpcodeForSize(X86::SUB64mr, X86::SUB32mr, - X86::SUB16mr, X86::SUB8mr); - const SDValue Ops[] = {Base, Scale, Index, - Disp, Segment, StoredVal->getOperand(1), - InputChain}; + auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, + X86::ADD8mr); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, + X86::SUB8mr); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { + switch (Opc) { + case X86ISD::ADD: + return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, + X86::ADD8mi); + case X86ISD::SUB: + return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, + X86::SUB8mi); + default: + llvm_unreachable("Invalid opcode!"); + } + }; + + unsigned NewOpc = SelectRegOpcode(Opc); + SDValue Operand = StoredVal->getOperand(1); + + // See if the operand is a constant that we can fold into an immediate + // operand. + if (auto *OperandC = dyn_cast(Operand)) { + auto OperandV = OperandC->getAPIntValue(); + + // Check if we can shrink the operand enough to fit in an immediate (or + // fit into a smaller immediate) by negating it and switching the + // operation. + if (((MemVT != MVT::i8 && OperandV.getMinSignedBits() > 8 && + (-OperandV).getMinSignedBits() <= 8) || + (MemVT == MVT::i64 && OperandV.getMinSignedBits() > 32 && + (-OperandV).getMinSignedBits() <= 32)) && + hasNoCarryFlagUses(StoredVal.getNode())) { + OperandV = -OperandV; + Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; + } + + // First try to fit this into an Imm8 operand. If it doesn't fit, then try + // the larger immediate operand. + if (MemVT != MVT::i8 && OperandV.getMinSignedBits() <= 8) { + Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); + NewOpc = SelectImm8Opcode(Opc); + } else if (OperandV.getActiveBits() <= MemVT.getSizeInBits() && + (MemVT != MVT::i64 || OperandV.getMinSignedBits() <= 32)) { + Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); + NewOpc = SelectImmOpcode(Opc); + } + } + + const SDValue Ops[] = {Base, Scale, Index, Disp, + Segment, Operand, InputChain}; Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops); break; Index: llvm/trunk/test/CodeGen/X86/add.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/add.ll +++ llvm/trunk/test/CodeGen/X86/add.ll @@ -340,8 +340,7 @@ define void @test12(i64* inreg %a) nounwind { ; X32-LABEL: test12: ; X32: # BB#0: # %entry -; X32-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X32-NEXT: addl %ecx, (%eax) +; X32-NEXT: addl $-2147483648, (%eax) # imm = 0x80000000 ; X32-NEXT: adcl $0, 4(%eax) ; X32-NEXT: retl ; @@ -364,8 +363,7 @@ define void @test13(i64* inreg %a) nounwind { ; X32-LABEL: test13: ; X32: # BB#0: # %entry -; X32-NEXT: movl $128, %ecx -; X32-NEXT: addl %ecx, (%eax) +; X32-NEXT: addl $128, (%eax) ; X32-NEXT: adcl $0, 4(%eax) ; X32-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll +++ llvm/trunk/test/CodeGen/X86/fold-rmw-ops.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -show-mc-encoding | FileCheck %s target triple = "x86_64-unknown-unknown" @@ -11,19 +11,259 @@ declare void @a() declare void @b() -define void @add64_imm_br() nounwind { -; CHECK-LABEL: add64_imm_br: +define void @add64_imm32_br() nounwind { +; CHECK-LABEL: add64_imm32_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: addq %rax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB0_1 +; CHECK-NEXT: addq $16777214, {{.*}}(%rip) # encoding: [0x48,0x81,0x05,A,A,A,A,0xfe,0xff,0xff,0x00] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0xFFFFFE +; CHECK-NEXT: js .LBB0_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB0_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 ; CHECK-NEXT: .LBB0_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i64, i64* @g64 - %add = add nsw i64 %load1, 42 + ; Add 0x00FFFFFE, a positive immediate requiring 24-bits. + %add = add i64 %load1, 16777214 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_sext_imm32_br() nounwind { +; CHECK-LABEL: add64_sext_imm32_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq $-2147483648, {{.*}}(%rip) # encoding: [0x48,0x81,0x05,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: js .LBB1_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB1_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB1_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Add -0x80000000, which requires sign-extended 32 bits. + %add = add i64 %load1, -2147483648 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_imm32_via_sub_br() nounwind { +; CHECK-LABEL: add64_imm32_via_sub_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subq $-2147483648, {{.*}}(%rip) # encoding: [0x48,0x81,0x2d,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: js .LBB2_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB2_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB2_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Add 0x80000000, which cannot fit in a sign extended 32-bit immediate. This + ; get's folded because we can instead subtract -0x80000000. + %add = add i64 %load1, 2147483648 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_no_imm32_via_sub_due_to_cf_br() nounwind { +; CHECK-LABEL: add64_no_imm32_via_sub_due_to_cf_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $2147483648, %eax # encoding: [0xb8,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: addq %rax, {{.*}}(%rip) # encoding: [0x48,0x01,0x05,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-4, kind: reloc_riprel_4byte +; CHECK-NEXT: jae .LBB3_2 # encoding: [0x73,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB3_2-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB3_2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Add 0x80000000, which cannot fit in a sign extended 32-bit immediate, but + ; could in theory be folded into an immediate operand of a sub. However, we + ; use the CF flag here and so shouldn't make that transformation. + %add = add i64 %load1, 2147483648 + store i64 %add, i64* @g64 + %cond = icmp ult i64 %add, 2147483648 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_too_large_imm32_br() nounwind { +; CHECK-LABEL: add64_too_large_imm32_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movl $2147483649, %eax # encoding: [0xb8,0x01,0x00,0x00,0x80] +; CHECK-NEXT: # imm = 0x80000001 +; CHECK-NEXT: addq %rax, {{.*}}(%rip) # encoding: [0x48,0x01,0x05,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB4_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB4_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB4_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Add 0x80000001, which cannot fit in a sign extended 32-bit immediate. This + ; should not get folded into an immediate. + %add = add i64 %load1, 2147483649 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_imm8_via_sub_br() nounwind { +; CHECK-LABEL: add64_imm8_via_sub_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subq $-128, {{.*}}(%rip) # encoding: [0x48,0x83,0x2d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB5_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB5_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB5_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Add 0x80 which can't quite fit into an imm8 because it would be sign + ; extended, but which can fit if we convert to a sub and negate the value. + %add = add i64 %load1, 128 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_imm8_br() nounwind { +; CHECK-LABEL: add64_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq $42, {{.*}}(%rip) # encoding: [0x48,0x83,0x05,A,A,A,A,0x2a] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB6_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB6_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB6_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + %add = add i64 %load1, 42 + store i64 %add, i64* @g64 + %cond = icmp slt i64 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add64_imm8_neg_br() nounwind { +; CHECK-LABEL: add64_imm8_neg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addq $-42, {{.*}}(%rip) # encoding: [0x48,0x83,0x05,A,A,A,A,0xd6] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB7_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB7_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB7_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + %add = add i64 %load1, -42 store i64 %add, i64* @g64 %cond = icmp slt i64 %add, 0 br i1 %cond, label %a, label %b @@ -40,16 +280,85 @@ define void @add32_imm_br() nounwind { ; CHECK-LABEL: add32_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $42, %eax -; CHECK-NEXT: addl %eax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB1_1 +; CHECK-NEXT: addl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x05,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: js .LBB8_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB8_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB1_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB8_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i32, i32* @g32 + ; Add 0x80000000, a positive number requiring 32 bits of immediate. + %add = add i32 %load1, 2147483648 + store i32 %add, i32* @g32 + %cond = icmp slt i32 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add32_imm8_br() nounwind { +; CHECK-LABEL: add32_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addl $42, {{.*}}(%rip) # encoding: [0x83,0x05,A,A,A,A,0x2a] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB9_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB9_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB9_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i32, i32* @g32 - %add = add nsw i32 %load1, 42 + %add = add i32 %load1, 42 + store i32 %add, i32* @g32 + %cond = icmp slt i32 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add32_imm8_neg_br() nounwind { +; CHECK-LABEL: add32_imm8_neg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addl $-42, {{.*}}(%rip) # encoding: [0x83,0x05,A,A,A,A,0xd6] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB10_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB10_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB10_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i32, i32* @g32 + %add = add i32 %load1, -42 store i32 %add, i32* @g32 %cond = icmp slt i32 %add, 0 br i1 %cond, label %a, label %b @@ -66,16 +375,85 @@ define void @add16_imm_br() nounwind { ; CHECK-LABEL: add16_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movw $42, %ax -; CHECK-NEXT: addw %ax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB2_1 +; CHECK-NEXT: addw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x05,A,A,A,A,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x8000 +; CHECK-NEXT: js .LBB11_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB11_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB2_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB11_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i16, i16* @g16 - %add = add nsw i16 %load1, 42 + ; Add 0x8000, a positive number requiring 16 bits of immediate. + %add = add i16 %load1, 32768 + store i16 %add, i16* @g16 + %cond = icmp slt i16 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add16_imm8_br() nounwind { +; CHECK-LABEL: add16_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addw $42, {{.*}}(%rip) # encoding: [0x66,0x83,0x05,A,A,A,A,0x2a] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB12_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB12_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB12_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i16, i16* @g16 + %add = add i16 %load1, 42 + store i16 %add, i16* @g16 + %cond = icmp slt i16 %add, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @add16_imm8_neg_br() nounwind { +; CHECK-LABEL: add16_imm8_neg_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: addw $-42, {{.*}}(%rip) # encoding: [0x66,0x83,0x05,A,A,A,A,0xd6] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB13_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB13_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB13_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i16, i16* @g16 + %add = add i16 %load1, -42 store i16 %add, i16* @g16 %cond = icmp slt i16 %add, 0 br i1 %cond, label %a, label %b @@ -92,16 +470,21 @@ define void @add8_imm_br() nounwind { ; CHECK-LABEL: add8_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movb $42, %al -; CHECK-NEXT: addb %al, {{.*}}(%rip) -; CHECK-NEXT: js .LBB3_1 +; CHECK-NEXT: addb $-2, {{.*}}(%rip) # encoding: [0x80,0x05,A,A,A,A,0xfe] +; CHECK-NEXT: # fixup A - offset: 2, value: g8-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB14_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB14_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB3_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB14_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i8, i8* @g8 - %add = add nsw i8 %load1, 42 + %add = add i8 %load1, -2 store i8 %add, i8* @g8 %cond = icmp slt i8 %add, 0 br i1 %cond, label %a, label %b @@ -118,15 +501,21 @@ define void @add64_reg_br(i64 %arg) nounwind { ; CHECK-LABEL: add64_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addq %rdi, {{.*}}(%rip) -; CHECK-NEXT: js .LBB4_1 +; CHECK-NEXT: addq %rdi, {{.*}}(%rip) # encoding: [0x48,0x01,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB15_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB15_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB4_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB15_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i64, i64* @g64 - %add = add nsw i64 %load1, %arg + %add = add i64 %load1, %arg store i64 %add, i64* @g64 %cond = icmp slt i64 %add, 0 br i1 %cond, label %a, label %b @@ -143,15 +532,21 @@ define void @add32_reg_br(i32 %arg) nounwind { ; CHECK-LABEL: add32_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addl %edi, {{.*}}(%rip) -; CHECK-NEXT: js .LBB5_1 +; CHECK-NEXT: addl %edi, {{.*}}(%rip) # encoding: [0x01,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB16_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB16_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB5_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB16_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i32, i32* @g32 - %add = add nsw i32 %load1, %arg + %add = add i32 %load1, %arg store i32 %add, i32* @g32 %cond = icmp slt i32 %add, 0 br i1 %cond, label %a, label %b @@ -168,15 +563,21 @@ define void @add16_reg_br(i16 %arg) nounwind { ; CHECK-LABEL: add16_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addw %di, {{.*}}(%rip) -; CHECK-NEXT: js .LBB6_1 +; CHECK-NEXT: addw %di, {{.*}}(%rip) # encoding: [0x66,0x01,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB17_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB17_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB6_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB17_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i16, i16* @g16 - %add = add nsw i16 %load1, %arg + %add = add i16 %load1, %arg store i16 %add, i16* @g16 %cond = icmp slt i16 %add, 0 br i1 %cond, label %a, label %b @@ -193,15 +594,21 @@ define void @add8_reg_br(i8 %arg) nounwind { ; CHECK-LABEL: add8_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: addb %dil, {{.*}}(%rip) -; CHECK-NEXT: js .LBB7_1 +; CHECK-NEXT: addb %dil, {{.*}}(%rip) # encoding: [0x40,0x00,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g8-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB18_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB18_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB7_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB18_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i8, i8* @g8 - %add = add nsw i8 %load1, %arg + %add = add i8 %load1, %arg store i8 %add, i8* @g8 %cond = icmp slt i8 %add, 0 br i1 %cond, label %a, label %b @@ -215,19 +622,95 @@ ret void } -define void @sub64_imm_br() nounwind { -; CHECK-LABEL: sub64_imm_br: +define void @sub64_imm32_br() nounwind { +; CHECK-LABEL: sub64_imm32_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movq $-42, %rax -; CHECK-NEXT: addq %rax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB8_1 +; CHECK-NEXT: subq $-2147483648, {{.*}}(%rip) # encoding: [0x48,0x81,0x2d,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: js .LBB19_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB19_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB19_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Subtract -0x80000000, which can't be negated into a sign-extended 32-bit + ; immediate, so that we have to select sub here. + %sub = sub i64 %load1, -2147483648 + store i64 %sub, i64* @g64 + %cond = icmp slt i64 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub64_too_large_imm32_br() nounwind { +; CHECK-LABEL: sub64_too_large_imm32_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: movabsq $-4294967295, %rax # encoding: [0x48,0xb8,0x01,0x00,0x00,0x00,0xff,0xff,0xff,0xff] +; CHECK-NEXT: # imm = 0xFFFFFFFF00000001 +; CHECK-NEXT: addq %rax, {{.*}}(%rip) # encoding: [0x48,0x01,0x05,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB20_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB20_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB20_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i64, i64* @g64 + ; Subtract 0xFFFFFFFF, which cannot fit in a sign extended 32-bit immediate, + ; even if negated and sign extended as an add. + %sub = sub i64 %load1, 4294967295 + store i64 %sub, i64* @g64 + %cond = icmp slt i64 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub64_imm8_br() nounwind { +; CHECK-LABEL: sub64_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subq $-128, {{.*}}(%rip) # encoding: [0x48,0x83,0x2d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB21_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB21_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB8_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB21_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i64, i64* @g64 - %sub = sub nsw i64 %load1, 42 + ; Subtract -0x80, which can be done with an 8-bit immediate but only as + ; a subtract where that immediate can be negative. + %sub = sub i64 %load1, -128 store i64 %sub, i64* @g64 %cond = icmp slt i64 %sub, 0 br i1 %cond, label %a, label %b @@ -244,16 +727,57 @@ define void @sub32_imm_br() nounwind { ; CHECK-LABEL: sub32_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movl $-42, %eax -; CHECK-NEXT: addl %eax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB9_1 +; CHECK-NEXT: addl $-2147483648, {{.*}}(%rip) # encoding: [0x81,0x05,A,A,A,A,0x00,0x00,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-8, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x80000000 +; CHECK-NEXT: js .LBB22_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB22_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB9_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB22_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i32, i32* @g32 + ; Subtract -0x80000000, which requires 32 bits of immediate but still gets + ; lowered as an add. + %sub = sub i32 %load1, -2147483648 + store i32 %sub, i32* @g32 + %cond = icmp slt i32 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub32_imm8_br() nounwind { +; CHECK-LABEL: sub32_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subl $-128, {{.*}}(%rip) # encoding: [0x83,0x2d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB23_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB23_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB23_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i32, i32* @g32 - %sub = sub nsw i32 %load1, 42 + ; Subtract -0x80, which can be done with an 8-bit immediate but only as + ; a subtract where that immediate can be negative. + %sub = sub i32 %load1, -128 store i32 %sub, i32* @g32 %cond = icmp slt i32 %sub, 0 br i1 %cond, label %a, label %b @@ -270,16 +794,57 @@ define void @sub16_imm_br() nounwind { ; CHECK-LABEL: sub16_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movw $-42, %ax -; CHECK-NEXT: addw %ax, {{.*}}(%rip) -; CHECK-NEXT: js .LBB10_1 +; CHECK-NEXT: addw $-32768, {{.*}}(%rip) # encoding: [0x66,0x81,0x05,A,A,A,A,0x00,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-6, kind: reloc_riprel_4byte +; CHECK-NEXT: # imm = 0x8000 +; CHECK-NEXT: js .LBB24_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB24_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB10_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB24_1: # %a +; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 +entry: + %load1 = load i16, i16* @g16 + ; Subtract -0x8000, which requires a 16 bits of immediate but still gets + ; lowered as an add. + %sub = sub i16 %load1, -32768 + store i16 %sub, i16* @g16 + %cond = icmp slt i16 %sub, 0 + br i1 %cond, label %a, label %b + +a: + tail call void @a() + ret void + +b: + tail call void @b() + ret void +} + +define void @sub16_imm8_br() nounwind { +; CHECK-LABEL: sub16_imm8_br: +; CHECK: # BB#0: # %entry +; CHECK-NEXT: subw $-128, {{.*}}(%rip) # encoding: [0x66,0x83,0x2d,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB25_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB25_1-1, kind: FK_PCRel_1 +; CHECK-NEXT: # BB#2: # %b +; CHECK-NEXT: jmp b # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB25_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i16, i16* @g16 - %sub = sub nsw i16 %load1, 42 + ; Subtract -0x80, which can be done with an 8-bit immediate but only as + ; a subtract where that immediate can be negative. + %sub = sub i16 %load1, -128 store i16 %sub, i16* @g16 %cond = icmp slt i16 %sub, 0 br i1 %cond, label %a, label %b @@ -296,16 +861,23 @@ define void @sub8_imm_br() nounwind { ; CHECK-LABEL: sub8_imm_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: movb $-42, %al -; CHECK-NEXT: addb %al, {{.*}}(%rip) -; CHECK-NEXT: js .LBB11_1 +; CHECK-NEXT: addb $-128, {{.*}}(%rip) # encoding: [0x80,0x05,A,A,A,A,0x80] +; CHECK-NEXT: # fixup A - offset: 2, value: g8-5, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB26_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB26_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB11_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB26_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i8, i8* @g8 - %sub = sub nsw i8 %load1, 42 + ; Subtract -0x80, which requires an 8-bit immediate but still gets lowered as + ; an add. + %sub = sub i8 %load1, -128 store i8 %sub, i8* @g8 %cond = icmp slt i8 %sub, 0 br i1 %cond, label %a, label %b @@ -322,15 +894,21 @@ define void @sub64_reg_br(i64 %arg) nounwind { ; CHECK-LABEL: sub64_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: subq %rdi, {{.*}}(%rip) -; CHECK-NEXT: js .LBB12_1 +; CHECK-NEXT: subq %rdi, {{.*}}(%rip) # encoding: [0x48,0x29,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g64-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB27_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB27_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB12_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB27_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i64, i64* @g64 - %sub = sub nsw i64 %load1, %arg + %sub = sub i64 %load1, %arg store i64 %sub, i64* @g64 %cond = icmp slt i64 %sub, 0 br i1 %cond, label %a, label %b @@ -347,15 +925,21 @@ define void @sub32_reg_br(i32 %arg) nounwind { ; CHECK-LABEL: sub32_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: subl %edi, {{.*}}(%rip) -; CHECK-NEXT: js .LBB13_1 +; CHECK-NEXT: subl %edi, {{.*}}(%rip) # encoding: [0x29,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 2, value: g32-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB28_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB28_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB13_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB28_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i32, i32* @g32 - %sub = sub nsw i32 %load1, %arg + %sub = sub i32 %load1, %arg store i32 %sub, i32* @g32 %cond = icmp slt i32 %sub, 0 br i1 %cond, label %a, label %b @@ -372,15 +956,21 @@ define void @sub16_reg_br(i16 %arg) nounwind { ; CHECK-LABEL: sub16_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: subw %di, {{.*}}(%rip) -; CHECK-NEXT: js .LBB14_1 +; CHECK-NEXT: subw %di, {{.*}}(%rip) # encoding: [0x66,0x29,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g16-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB29_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB29_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB14_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB29_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i16, i16* @g16 - %sub = sub nsw i16 %load1, %arg + %sub = sub i16 %load1, %arg store i16 %sub, i16* @g16 %cond = icmp slt i16 %sub, 0 br i1 %cond, label %a, label %b @@ -397,15 +987,21 @@ define void @sub8_reg_br(i8 %arg) nounwind { ; CHECK-LABEL: sub8_reg_br: ; CHECK: # BB#0: # %entry -; CHECK-NEXT: subb %dil, {{.*}}(%rip) -; CHECK-NEXT: js .LBB15_1 +; CHECK-NEXT: subb %dil, {{.*}}(%rip) # encoding: [0x40,0x28,0x3d,A,A,A,A] +; CHECK-NEXT: # fixup A - offset: 3, value: g8-4, kind: reloc_riprel_4byte +; CHECK-NEXT: js .LBB30_1 # encoding: [0x78,A] +; CHECK-NEXT: # fixup A - offset: 1, value: .LBB30_1-1, kind: FK_PCRel_1 ; CHECK-NEXT: # BB#2: # %b ; CHECK-NEXT: jmp b # TAILCALL -; CHECK-NEXT: .LBB15_1: # %a +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: b-1, kind: FK_PCRel_1 +; CHECK-NEXT: .LBB30_1: # %a ; CHECK-NEXT: jmp a # TAILCALL +; CHECK-NEXT: # encoding: [0xeb,A] +; CHECK-NEXT: # fixup A - offset: 1, value: a-1, kind: FK_PCRel_1 entry: %load1 = load i8, i8* @g8 - %sub = sub nsw i8 %load1, %arg + %sub = sub i8 %load1, %arg store i8 %sub, i8* @g8 %cond = icmp slt i8 %sub, 0 br i1 %cond, label %a, label %b Index: llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll +++ llvm/trunk/test/CodeGen/X86/peephole-na-phys-copy-folding.ll @@ -66,8 +66,7 @@ ; CHECK32-LABEL: plus_forty_two: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb M, %al -; CHECK32-NEXT: movl $42, %ecx -; CHECK32-NEXT: addl %ecx, L +; CHECK32-NEXT: addl $42, L ; CHECK32-NEXT: jne .LBB1_2 ; CHECK32-NEXT: # BB#1: # %entry ; CHECK32-NEXT: andb $8, %al @@ -82,8 +81,7 @@ ; CHECK64-LABEL: plus_forty_two: ; CHECK64: # BB#0: # %entry ; CHECK64-NEXT: movb {{.*}}(%rip), %al -; CHECK64-NEXT: movl $42, %ecx -; CHECK64-NEXT: addl %ecx, {{.*}}(%rip) +; CHECK64-NEXT: addl $42, {{.*}}(%rip) ; CHECK64-NEXT: jne .LBB1_2 ; CHECK64-NEXT: # BB#1: # %entry ; CHECK64-NEXT: andb $8, %al @@ -164,8 +162,7 @@ ; CHECK32-LABEL: minus_forty_two: ; CHECK32: # BB#0: # %entry ; CHECK32-NEXT: movb M, %al -; CHECK32-NEXT: movl $-42, %ecx -; CHECK32-NEXT: addl %ecx, L +; CHECK32-NEXT: addl $-42, L ; CHECK32-NEXT: jne .LBB3_2 ; CHECK32-NEXT: # BB#1: # %entry ; CHECK32-NEXT: andb $8, %al @@ -180,8 +177,7 @@ ; CHECK64-LABEL: minus_forty_two: ; CHECK64: # BB#0: # %entry ; CHECK64-NEXT: movb {{.*}}(%rip), %al -; CHECK64-NEXT: movl $-42, %ecx -; CHECK64-NEXT: addl %ecx, {{.*}}(%rip) +; CHECK64-NEXT: addl $-42, {{.*}}(%rip) ; CHECK64-NEXT: jne .LBB3_2 ; CHECK64-NEXT: # BB#1: # %entry ; CHECK64-NEXT: andb $8, %al Index: llvm/trunk/test/CodeGen/X86/pr32659.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pr32659.ll +++ llvm/trunk/test/CodeGen/X86/pr32659.ll @@ -50,9 +50,7 @@ ; CHECK-NEXT: sarl $31, %eax ; CHECK-NEXT: andl %eax, e+4 ; CHECK-NEXT: decl g -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: incl %eax -; CHECK-NEXT: addl %eax, f +; CHECK-NEXT: addl $1, f ; CHECK-NEXT: adcl $0, f+4 ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebx