Index: include/llvm/CodeGen/MachineBasicBlock.h =================================================================== --- include/llvm/CodeGen/MachineBasicBlock.h +++ include/llvm/CodeGen/MachineBasicBlock.h @@ -253,6 +253,12 @@ const_reverse_instr_iterator instr_rbegin() const { return Insts.rbegin(); } reverse_instr_iterator instr_rend () { return Insts.rend(); } const_reverse_instr_iterator instr_rend () const { return Insts.rend(); } + iterator_range instrs() { + return iterator_range(Insts.begin(), Insts.end()); + } + iterator_range instrs() const { + return iterator_range(Insts.begin(), Insts.end()); + } iterator begin() { return instr_begin(); } const_iterator begin() const { return instr_begin(); } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -122,6 +122,9 @@ /// or TEST instruction. BRCOND, + /// Wide comparison and branch. E.g. "branch if hi1:lo1 < hi2:lo2". + WIDECMPBR, + /// Return with a flag operand. Operand 0 is the chain operand, operand /// 1 is the number of bytes of stack to pop. RET_FLAG, @@ -1082,6 +1085,9 @@ MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitWideCmpBr(MachineInstr *I, + MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr *I, MachineBasicBlock *BB) const; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15228,6 +15228,103 @@ return false; } +// isWideCmp - Return true if Node is a doing a "wide" comparison of Hi1:Lo1 +// and Hi2:Lo2. +static bool isWideCmp(SDValue Node, SDValue &Hi1, SDValue &Hi2, SDValue &Lo1, + SDValue &Lo2, unsigned &CC) { + // This function is pattern matching for the output of + // DAGTypeLegalizer::IntegerExpandSetCCOperands. + + // Check for: (setcc (or (xor hi1 hi2) (xor lo1 lo2)) 0 {eq,ne}). + if (Node.getOpcode() == ISD::SETCC && Node.hasOneUse() && + Node.getOperand(0)->getOpcode() == ISD::OR) { + SDValue Or = Node.getOperand(0); + ConstantSDNode *Const = dyn_cast(Node.getOperand(1)); + if (!Or.hasOneUse() || !Const || !Const->isNullValue()) + return false; + if (Or.getOperand(0).getOpcode() != ISD::XOR || + !Or.getOperand(0).hasOneUse() || + Or.getOperand(1).getOpcode() != ISD::XOR || + !Or.getOperand(1).hasOneUse()) + return false; + Hi1 = Or.getOperand(0).getOperand(0); + Hi2 = Or.getOperand(0).getOperand(1); + Lo1 = Or.getOperand(1).getOperand(0); + Lo2 = Or.getOperand(1).getOperand(1); + + switch (cast(Node.getOperand(2))->get()) { + case ISD::SETEQ: CC = X86::COND_E; break; + case ISD::SETNE: CC = X86::COND_NE; break; + default: return false; + } + // Wide compares use 32-bit integers only. + if (Hi1.getValueType() != MVT::i32 || Lo1.getValueType() != MVT::i32) + return false; + + return true; + } + + // Check for: (select (setcc hi1 hi2 eq) (setcc lo1 lo2 <) (setcc hi1 hi2 <)). + if (Node.getOpcode() != ISD::SELECT || !Node.hasOneUse() || + Node.getOperand(0)->getOpcode() != ISD::SETCC || + Node.getOperand(1)->getOpcode() != ISD::SETCC || + Node.getOperand(2)->getOpcode() != ISD::SETCC) + return false; + + // The select condition is checking equality of the high bits. + SDValue SelectCond = Node.getOperand(0); + if (cast(SelectCond->getOperand(2))->get() != ISD::SETEQ) + return false; + if (!SelectCond.hasOneUse()) + return false; + Hi1 = SelectCond->getOperand(0); + Hi2 = SelectCond->getOperand(1); + + // The select false node is comparing the high bits. + SDValue SelectFalse = Node.getOperand(2); + if (!SelectFalse.hasOneUse()) + return false; + if (SelectFalse->getOperand(0) != Hi1 || SelectFalse->getOperand(1) != Hi2) + return false; + unsigned HighCC = cast(SelectFalse->getOperand(2))->get(); + + // The select true is comparing the low bits. + SDValue SelectTrue = Node.getOperand(1); + if (!SelectTrue.hasOneUse()) + return false; + Lo1 = SelectTrue->getOperand(0); + Lo2 = SelectTrue->getOperand(1); + unsigned LowCC = cast(SelectTrue->getOperand(2))->get(); + + // Wide compares only work with 32-bit integers. + if (Hi1.getValueType() != MVT::i32 || Lo1.getValueType() != MVT::i32) + return false; + + // Determine the condition code. The low bits use unsigned cmp. + switch (HighCC) { + case ISD::SETLT: + if (LowCC != ISD::SETULT) { return false; } CC = X86::COND_L; break; + case ISD::SETULT: + if (LowCC != ISD::SETULT) { return false; } CC = X86::COND_B; break; + case ISD::SETGT: + if (LowCC != ISD::SETUGT) { return false; } CC = X86::COND_G; break; + case ISD::SETUGT: + if (LowCC != ISD::SETUGT) { return false; } CC = X86::COND_A; break; + case ISD::SETLE: + if (LowCC != ISD::SETULE) { return false; } CC = X86::COND_LE; break; + case ISD::SETULE: + if (LowCC != ISD::SETULE) { return false; } CC = X86::COND_BE; break; + case ISD::SETGE: + if (LowCC != ISD::SETUGE) { return false; } CC = X86::COND_GE; break; + case ISD::SETUGE: + if (LowCC != ISD::SETUGE) { return false; } CC = X86::COND_AE; break; + default: + return false; + } + + return true; +} + SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { bool addTest = true; SDValue Chain = Op.getOperand(0); @@ -15237,6 +15334,17 @@ SDValue CC; bool Inverted = false; + { + // Check for wide integer compare. + SDValue Hi1, Hi2, Lo1, Lo2; + unsigned CC; + if (isWideCmp(Cond, Hi1, Hi2, Lo1, Lo2, CC)) { + SDValue Ops[] = { + Chain, Dest, Hi1, Hi2, Lo1, Lo2, DAG.getConstant(CC, dl, MVT::i8)}; + return DAG.getNode(X86ISD::WIDECMPBR, dl, Op.getValueType(), makeArrayRef(Ops)); + } + } + if (Cond.getOpcode() == ISD::SETCC) { // Check for setcc([su]{add,sub,mul}o == 0). if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && @@ -19915,6 +20023,7 @@ case X86ISD::FGETSIGNx86: return "X86ISD::FGETSIGNx86"; case X86ISD::CMOV: return "X86ISD::CMOV"; case X86ISD::BRCOND: return "X86ISD::BRCOND"; + case X86ISD::WIDECMPBR: return "X86ISD::WIDECMPBR"; case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; @@ -21154,6 +21263,110 @@ } MachineBasicBlock * +X86TargetLowering::EmitWideCmpBr(MachineInstr *MI, + MachineBasicBlock *BB) const { + DebugLoc DL = MI->getDebugLoc(); + const X86InstrInfo *TII = Subtarget->getInstrInfo(); + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + + // The next instruction should be a jump to the "false" destination. + MachineInstr *FalseJmp = ++MachineBasicBlock::iterator(MI); + assert(FalseJmp->getOpcode() == X86::JMP_1); + + MachineBasicBlock *FalseMBB = FalseJmp->getOperand(0).getMBB(); + MachineBasicBlock *TrueMBB = MI->getOperand(0).getMBB(); + + // JCC is a jump matching the overall relation we're checking (e.g. JL) + // DisproveJCC is a comparison on the high bits disproving the relation (e.g. + // JG), and UnsignedJCC is the unsigned variant of JCC (e.g. JB). Except for + // ==, normalize the condition so it may be proved by comparing the high bits. + // E.g for >= we check < instead and reverse the destinations. + unsigned JCC, DisproveJCC, UnsignedJCC; + switch (MI->getOperand(5).getImm()) { + case X86::COND_NE: + std::swap(FalseMBB, TrueMBB); + case X86::COND_E: + JCC = X86::JE_1; DisproveJCC = X86::JNE_1; UnsignedJCC = X86::JE_1; break; + + case X86::COND_GE: + std::swap(FalseMBB, TrueMBB); + case X86::COND_L: + JCC = X86::JL_1; DisproveJCC = X86::JG_1; UnsignedJCC = X86::JB_1; break; + + case X86::COND_AE: + std::swap(FalseMBB, TrueMBB); + case X86::COND_B: + JCC = X86::JB_1; DisproveJCC = X86::JA_1; UnsignedJCC = X86::JB_1; break; + + case X86::COND_LE: + std::swap(FalseMBB, TrueMBB); + case X86::COND_G: + JCC = X86::JG_1; DisproveJCC = X86::JL_1; UnsignedJCC = X86::JA_1; break; + + case X86::COND_BE: + std::swap(FalseMBB, TrueMBB); + case X86::COND_A: + JCC = X86::JA_1; DisproveJCC = X86::JB_1; UnsignedJCC = X86::JA_1; break; + + default: llvm_unreachable("Unexpected condition code!"); + } + + + // Compare the high bits. + BuildMI(*BB, MI, DL, TII->get(X86::CMP32rr)) + .addOperand(/*Hi1=*/MI->getOperand(1)) + .addOperand(/*Hi2=*/MI->getOperand(2)); + if (JCC != X86::JE_1) // High bits not enough to prove equality. + BuildMI(*BB, MI, DL, TII->get(JCC)).addMBB(TrueMBB); + BuildMI(*BB, MI, DL, TII->get(DisproveJCC)).addMBB(FalseMBB); + + // Fall through to a new block. + MachineBasicBlock *CmpLowMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(++BB->getIterator(), CmpLowMBB); + FalseJmp->getOperand(0).setMBB(CmpLowMBB); + BB->addSuccessor(CmpLowMBB); + + // The new block does an unsigned comparison of the low bits. + BuildMI(CmpLowMBB, DL, TII->get(X86::CMP32rr)) + .addOperand(/*Lo1=*/MI->getOperand(3)) + .addOperand(/*Lo2=*/MI->getOperand(4)); + BuildMI(CmpLowMBB, DL, TII->get(UnsignedJCC)).addMBB(TrueMBB); + BuildMI(CmpLowMBB, DL, TII->get(X86::JMP_1)).addMBB(FalseMBB); + + // Add NewPred as a predecessor to Succ, updating PHIs to get the same value + // as from OldPred. If RemoveOld is true, OldPred is no longer a predecessor. + auto updateSuccessor = [MF](MachineBasicBlock *Succ, + MachineBasicBlock *OldPred, + MachineBasicBlock *NewPred, bool RemoveOld) { + NewPred->addSuccessor(Succ); + if (RemoveOld) + OldPred->removeSuccessor(Succ); + for (auto &I : Succ->instrs()) { + if (!I.isPHI()) break; + for (unsigned i = 1, e = I.getNumOperands() - 1; i != e; i += 2) { + unsigned Value = I.getOperand(i).getReg(); + MachineBasicBlock *MBB = I.getOperand(i + 1).getMBB(); + if (MBB == OldPred) { + if (RemoveOld) + I.getOperand(i + 1).setMBB(NewPred); + else + MachineInstrBuilder(*MF, &I).addReg(Value).addMBB(NewPred); + break; + } + } + } + }; + + // If comparing for equality, BB no longer branches directly to TrueMBB. + updateSuccessor(TrueMBB, BB, CmpLowMBB, JCC == X86::JE_1); + updateSuccessor(FalseMBB, BB, CmpLowMBB, false); + + MI->eraseFromParent(); + return BB; +} + +MachineBasicBlock * X86TargetLowering::EmitLoweredAtomicFP(MachineInstr *MI, MachineBasicBlock *BB) const { // Combine the following atomic floating-point modification pattern: @@ -21751,6 +21964,9 @@ case X86::CMOV_V64I1: return EmitLoweredSelect(MI, BB); + case X86::WIDECMPBR: + return EmitWideCmpBr(MI, BB); + case X86::RELEASE_FADD32mr: case X86::RELEASE_FADD64mr: return EmitLoweredAtomicFP(MI, BB); Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -132,6 +132,13 @@ Requires<[In64BitMode]>; } +let usesCustomInserter = 1, Defs = [EFLAGS], isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { +def WIDECMPBR : I<0, Pseudo, (outs), (ins brtarget32:$dst, GR32:$hi1, GR32:$hi2, GR32:$lo1, GR32:$lo2, i8imm:$cc), + " #WIDECMPBR PSEUDO! $dst, $hi1, $hi2, $lo1, $lo2 $cc", + [(X86widecmpbr bb:$dst, GR32:$hi1, GR32:$hi2, GR32:$lo1, GR32:$lo2, imm:$cc), + (implicit EFLAGS)]>; +} + //===----------------------------------------------------------------------===// // EH Pseudo Instructions // Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -57,6 +57,14 @@ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; +def SDTX86WideCmpBr : SDTypeProfile<0, 6, + [SDTCisVT<0, OtherVT>, + SDTCisVT<1, i32>, + SDTCisVT<2, i32>, + SDTCisVT<3, i32>, + SDTCisVT<4, i32>, + SDTCisVT<5, i8>]>; + def SDTX86SetCC : SDTypeProfile<1, 2, [SDTCisVT<0, i8>, SDTCisVT<1, i8>, SDTCisVT<2, i32>]>; @@ -133,6 +141,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>; def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond, [SDNPHasChain]>; +def X86widecmpbr : SDNode<"X86ISD::WIDECMPBR", SDTX86WideCmpBr, [SDNPHasChain]>; def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>; def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>; Index: test/CodeGen/X86/avx512-cmp.ll =================================================================== --- test/CodeGen/X86/avx512-cmp.ll +++ test/CodeGen/X86/avx512-cmp.ll @@ -103,24 +103,16 @@ ; AVX512-32-LABEL: test10 ; AVX512-32: movl 4(%esp), %ecx -; AVX512-32: cmpl $9, (%ecx) -; AVX512-32: seta %al +; AVX512-32: cmpl $10, (%ecx) +; AVX512-32: setb %al ; AVX512-32: cmpl $0, 4(%ecx) -; AVX512-32: setg %cl +; AVX512-32: sets %cl ; AVX512-32: je ; AVX512-32: movb %cl, %al -; AVX512-32: testb $1, %al - -define void @test10(i64* %i.addr) { +; AVX512-32: retl +define i1 @test10(i64* %i.addr) { %x = load i64, i64* %i.addr, align 8 - %cmp = icmp slt i64 %x, 10 - br i1 %cmp, label %true, label %false - -true: - ret void - -false: - ret void + %res = icmp slt i64 %x, 10 + ret i1 %res } - Index: test/CodeGen/X86/wide-integer-cmp.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/wide-integer-cmp.ll @@ -0,0 +1,149 @@ +; RUN: llc -mtriple=i686-linux-gnu %s -o - | FileCheck %s + + +define i32 @test_eq(i64 %a, i64 %b) { +entry: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_eq: +; CHECK: cmp +; CHECK: jne [[FALSE:.LBB[0-9_]+]] +; CHECK: cmp +; CHECK: jne [[FALSE]] +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @test_ne(i64 %a, i64 %b) { +entry: + %cmp = icmp ne i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_ne: +; CHECK: cmp +; CHECK: jne [[TRUE:.LBB[0-9_]+]] +; CHECK: cmp +; CHECK: jne [[TRUE]] +; CHECK: movl $2, %eax +; CHECK: retl +; CHECK: [[TRUE]]: +; CHECK: movl $1, %eax +; CHECK: retl +} + +define i32 @test_slt(i64 %a, i64 %b) { +entry: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_slt: +; CHECK: cmp +; CHECK: jl [[TRUE:.LBB[0-9_]+]] +; CHECK: jg [[FALSE:.LBB[0-9_]+]] +; CHECK: cmp +; CHECK: jae [[FALSE]] +; CHECK: [[TRUE]]: +; CHECK: movl $1, %eax +; CHECK: retl +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +} + +define i32 @test_uge(i64 %a, i64 %b) { +entry: + %cmp = icmp uge i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + ret i32 1 +bb2: + ret i32 2 + +; CHECK-LABEL: test_uge: +; CHECK: cmp +; CHECK: jb [[FALSE:.LBB[0-9_]+]] +; CHECK: ja [[TRUE:.LBB[0-9_]+]] +; CHECK: cmp +; CHECK: jae [[TRUE]] +; CHECK: [[FALSE]]: +; CHECK: movl $2, %eax +; CHECK: retl +; CHECK: [[TRUE]]: +; CHECK: movl $1, %eax +; CHECK: retl +} + +define i32 @test_lt_phi_updates(i64 %a, i64 %b, i1 %foo) { +entry: + br i1 %foo, label %bb0, label %bb1 +bb0: + %cmp = icmp slt i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + %phi1 = phi i32 [ 42, %entry ], [ 1, %bb0 ] + br label %bb2 +bb2: + %phi2 = phi i32 [ 2, %bb0 ], [ %phi1, %bb1 ] + ret i32 %phi2 + +; CHECK-LABEL: test_lt_phi_updates: +; CHECK: movl $42, [[PHI1:%[a-z]+]] +; CHECK: test +; CHECK: je [[BB1:.LBB[0-9_]+]] +; CHECK: movl $2, [[PHI2:%[a-z]+]] +; CHECK: movl $1, [[PHI1]] +; CHECK: cmp +; CHECK: jl [[BB1]] +; CHECK: jg [[BB2:.LBB[0-9_]+]] +; CHECK: cmp +; CHECK: jae [[BB2]] +; CHECK: [[BB1]] +; CHECK: movl [[PHI1]], [[PHI2]] +; CHECK: [[BB2]] +; CHECK retl +} + +define i32 @test_eq_phi_updates(i64 %a, i64 %b, i1 %foo) { +entry: + br i1 %foo, label %bb0, label %bb1 +bb0: + %cmp = icmp eq i64 %a, %b + br i1 %cmp, label %bb1, label %bb2 +bb1: + %phi1 = phi i32 [ 42, %entry ], [ 1, %bb0 ] + br label %bb2 +bb2: + %phi2 = phi i32 [ 2, %bb0 ], [ %phi1, %bb1 ] + ret i32 %phi2 + +; CHECK-LABEL: test_eq_phi_updates: +; CHECK: movl $42, [[PHI1:%[a-z]+]] +; CHECK: test +; CHECK: je [[BB1:.LBB[0-9_]+]] +; CHECK: movl $2, [[PHI2:%[a-z]+]] +; CHECK: cmp +; CHECK: jne [[BB2:.LBB[0-9_]+]] +; CHECK: movl $1, [[PHI1]] +; CHECK: cmp +; CHECK: jne [[BB2]] +; CHECK: [[BB1]] +; CHECK: movl [[PHI1]], [[PHI2]] +; CHECK: [[BB2]] +; CHECK retl +}