Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -372,6 +372,11 @@ /// then the result type must also be a vector type. SETCC, + /// Like SetCC, but with LHS and RHS split into a high and low part. Ops #0 + /// and #1 are the low and high bits of LHS, respectively. Ops #2 and #3 + /// are the low and high bits of RHS. Op #4 is the condition code. + SETCC_PARTS, + /// SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded /// integer shift operations, just like ADD/SUB_PARTS. The operation /// ordering is: Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1232,9 +1232,11 @@ } case ISD::SELECT_CC: case ISD::SETCC: + case ISD::SETCC_PARTS: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCC_PARTS ? 4 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = Index: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2738,6 +2738,23 @@ return; } + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + if (TLI.getOperationAction(ISD::SETCC_PARTS, LHSLo.getValueType()) == + TargetLowering::Custom) { + // If the target san lower SETCC_PARTS, that's more efficient. + NewLHS = DAG.getNode(ISD::SETCC_PARTS, dl, + getSetCCResultType(LHSLo.getValueType()), LHSLo, LHSHi, + RHSLo, RHSHi, DAG.getCondCode(CCCode)); + NewRHS = SDValue(); + return; + } + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); Index: lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -209,6 +209,7 @@ case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; + case ISD::SETCC_PARTS: return "setcc_parts"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; case ISD::SELECT_CC: return "select_cc"; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -1021,6 +1021,7 @@ SDValue LowerToBT(SDValue And, ISD::CondCode CC, SDLoc dl, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSETCC_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -455,6 +455,11 @@ setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); } + setOperationAction(ISD::SETCC_PARTS , MVT::i32 , Custom); + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SETCC_PARTS , MVT::i64 , Custom); + } + if (Subtarget->hasSSE1()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); @@ -14570,6 +14575,52 @@ return SetCC; } +SDValue X86TargetLowering::LowerSETCC_PARTS(SDValue Op, + SelectionDAG &DAG) const { + // Perform the comparison with a wide subtraction, using SUB and SBB. The + // result of SBB is the high word of the difference between LHS and RHS. That + // word is negative iff LHS < RHS, and zero or greater iff LHS >= RHS. For the + // other relations, we need to flip the operands. + SDValue LHSLo = Op.getOperand(0); + SDValue LHSHi = Op.getOperand(1); + SDValue RHSLo = Op.getOperand(2); + SDValue RHSHi = Op.getOperand(3); + SDValue Cond = Op.getOperand(4); + SDLoc DL(Op); + + X86::CondCode CC; + bool FlipOperands = false; + switch (cast(Cond)->get()) { + // Detect < and >= directly: + case ISD::SETLT: CC = X86::COND_L; break; + case ISD::SETULT: CC = X86::COND_B; break; + case ISD::SETGE: CC = X86::COND_GE; break; + case ISD::SETUGE: CC = X86::COND_AE; break; + + // For > and <=, flip operands and condition code: + case ISD::SETGT: CC = X86::COND_L; FlipOperands = true; break; + case ISD::SETUGT: CC = X86::COND_B; FlipOperands = true; break; + case ISD::SETLE: CC = X86::COND_GE; FlipOperands = true; break; + case ISD::SETULE: CC = X86::COND_AE; FlipOperands = true; break; + default: + llvm_unreachable("Unexpected condition code!"); + } + + if (FlipOperands) { + std::swap(LHSHi, RHSHi); + std::swap(LHSLo, RHSLo); + } + + SDVTList VTs = DAG.getVTList(LHSHi.getValueType(), MVT::i32); + SDValue LowSub = DAG.getNode(X86ISD::SUB, DL, VTs, LHSLo, RHSLo); + SDValue LowFlags = LowSub.getValue(1); + SDValue HighSbb = DAG.getNode(X86ISD::SBB, DL, VTs, LHSHi, RHSHi, LowFlags); + SDValue HighFlags = HighSbb.getValue(1); + + return DAG.getNode(X86ISD::SETCC, DL, Op.getValueType(), + DAG.getConstant(CC, DL, MVT::i8), HighFlags); +} + // isX86LogicalCmp - Return true if opcode is a X86 logical comparison. static bool isX86LogicalCmp(SDValue Op) { unsigned Opc = Op.getNode()->getOpcode(); @@ -19655,6 +19706,7 @@ case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); case ISD::SETCC: return LowerSETCC(Op, DAG); + case ISD::SETCC_PARTS: return LowerSETCC_PARTS(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); Index: test/CodeGen/X86/2012-08-17-legalizer-crash.ll =================================================================== --- test/CodeGen/X86/2012-08-17-legalizer-crash.ll +++ test/CodeGen/X86/2012-08-17-legalizer-crash.ll @@ -26,6 +26,5 @@ ret void ; CHECK-LABEL: fn1: -; CHECK: shrq $32, [[REG:%.*]] ; CHECK: sete } Index: test/CodeGen/X86/atomic-minmax-i6432.ll =================================================================== --- test/CodeGen/X86/atomic-minmax-i6432.ll +++ test/CodeGen/X86/atomic-minmax-i6432.ll @@ -8,7 +8,7 @@ %1 = atomicrmw max i64* @sc64, i64 5 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -16,7 +16,7 @@ %2 = atomicrmw min i64* @sc64, i64 6 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -24,7 +24,7 @@ %3 = atomicrmw umax i64* @sc64, i64 7 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: seta +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b @@ -32,7 +32,7 @@ %4 = atomicrmw umin i64* @sc64, i64 8 acquire ; LINUX: [[LABEL:.LBB[0-9]+_[0-9]+]] ; LINUX: cmpl -; LINUX: setb +; LINUX: sbbl ; LINUX: cmovne ; LINUX: cmovne ; LINUX: lock cmpxchg8b Index: test/CodeGen/X86/atomic128.ll =================================================================== --- test/CodeGen/X86/atomic128.ll +++ test/CodeGen/X86/atomic128.ll @@ -119,16 +119,9 @@ ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setle [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setg ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -151,16 +144,9 @@ ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setae [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setge [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setge ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -183,16 +169,9 @@ ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rsi, %rax -; CHECK: setbe [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: setbe [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: seta ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx @@ -215,16 +194,9 @@ ; CHECK-DAG: movq 8(%rdi), %rdx ; CHECK: [[LOOP:.?LBB[0-9]+_[0-9]+]]: -; CHECK: cmpq %rax, %rsi -; CHECK: setb [[CMP:%[a-z0-9]+]] -; CHECK: cmpq [[INCHI]], %rdx -; CHECK: seta [[HICMP:%[a-z0-9]+]] -; CHECK: je [[USE_LO:.?LBB[0-9]+_[0-9]+]] - -; CHECK: movb [[HICMP]], [[CMP]] -; CHECK: [[USE_LO]]: -; CHECK: testb [[CMP]], [[CMP]] -; CHECK: movq %rsi, %rbx +; CHECK: cmpq +; CHECK: sbbq +; CHECK: setb ; CHECK: cmovneq %rax, %rbx ; CHECK: movq [[INCHI]], %rcx ; CHECK: cmovneq %rdx, %rcx Index: test/CodeGen/X86/avx512-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-cmp.ll +++ test/CodeGen/X86/avx512-cmp.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix AVX512-32 ; CHECK-LABEL: test1 ; CHECK: vucomisd {{.*}}encoding: [0x62 @@ -100,27 +99,3 @@ B: ret i32 7 } - -; AVX512-32-LABEL: test10 -; AVX512-32: movl 4(%esp), %ecx -; AVX512-32: cmpl $9, (%ecx) -; AVX512-32: seta %al -; AVX512-32: cmpl $0, 4(%ecx) -; AVX512-32: setg %cl -; AVX512-32: je -; AVX512-32: movb %cl, %al -; AVX512-32: testb $1, %al - -define void @test10(i64* %i.addr) { - - %x = load i64, i64* %i.addr, align 8 - %cmp = icmp slt i64 %x, 10 - br i1 %cmp, label %true, label %false - -true: - ret void - -false: - ret void -} - Index: test/CodeGen/X86/wide-integer-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/wide-integer-cmp.ll @@ -0,0 +1,84 @@ +; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s + + +define i32 @branch_eq(i64 %a, i64 %b) { +entry: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_eq: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: xorl 16(%esp), [[LHSHi]] +; CHECK: xorl 12(%esp), [[LHSLo]] +; CHECK: orl [[LHSHi]], [[LHSLo]] +; CHECK: jne [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_slt(i64 %a, i64 %b) { +entry: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_slt: +; CHECK: movl 4(%esp), [[LHSLo:%[a-z]+]] +; CHECK: movl 8(%esp), [[LHSHi:%[a-z]+]] +; CHECK: cmpl 12(%esp), [[LHSLo]] +; CHECK: sbbl 16(%esp), [[LHSHi]] +; CHECK: jge [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @branch_ule(i64 %a, i64 %b) { +entry: + %cmp = icmp ule i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: branch_ule: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: jb [[FALSE:.LBB[0-9_]+]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @set_gt(i64 %a, i64 %b) { +entry: + %cmp = icmp sgt i64 %a, %b + %res = select i1 %cmp, i32 1, i32 0 + ret i32 %res + +; CHECK-LABEL: set_gt: +; CHECK: movl 12(%esp), [[RHSLo:%[a-z]+]] +; CHECK: movl 16(%esp), [[RHSHi:%[a-z]+]] +; CHECK: cmpl 4(%esp), [[RHSLo]] +; CHECK: sbbl 8(%esp), [[RHSHi]] +; CHECK: setl %al +; CHECK: retl +} Index: test/CodeGen/X86/win32-pic-jumptable.ll =================================================================== --- test/CodeGen/X86/win32-pic-jumptable.ll +++ test/CodeGen/X86/win32-pic-jumptable.ll @@ -7,10 +7,10 @@ ; CHECK-NEXT: jmpl *%eax ; CHECK: LJTI0_0: +; CHECK-NEXT: .long LBB0_2-L0$pb +; CHECK-NEXT: .long LBB0_3-L0$pb ; CHECK-NEXT: .long LBB0_4-L0$pb ; CHECK-NEXT: .long LBB0_5-L0$pb -; CHECK-NEXT: .long LBB0_6-L0$pb -; CHECK-NEXT: .long LBB0_7-L0$pb target triple = "i686--windows-itanium"