Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -470,6 +470,8 @@ MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node, SDValue &InFlag); + + bool tryOptimizeRem8Extend(SDNode *N); }; } @@ -841,22 +843,63 @@ } } +// Look for a redundant movzx/movsx that can occur after an 8-bit divrem. +bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { + unsigned Opc = N->getMachineOpcode(); + if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && + Opc != X86::MOVSX64rr8) + return false; + + SDValue N0 = N->getOperand(0); + + // We need to be extracting the lower bit of an extend. + if (!N0.isMachineOpcode() || + N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || + !N0.hasOneUse() || N0.getConstantOperandVal(1) != X86::sub_8bit) + return false; + + // We're looking for either a movsx or movzx to match the original opcode. + unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX + : X86::MOVSX32rr8_NOREX; + SDValue N00 = N0.getOperand(0); + if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) + return false; + + if (Opc == X86::MOVSX64rr8) { + // If we had a sign extend from 8 to 64 bits. We still need to go from 32 + // to 64. + MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), + MVT::i64, N00); + ReplaceUses(N, Extend); + } else { + // Ok we can drop this extend and just use the original extend. + ReplaceUses(N, N00.getNode()); + } + + return true; +} void X86DAGToDAGISel::PostprocessISelDAG() { // Skip peepholes at -O0. if (TM.getOptLevel() == CodeGenOpt::None) return; - // Attempt to remove vectors moves that were inserted to zero upper bits. - SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); + bool MadeChange = false; while (Position != CurDAG->allnodes_begin()) { SDNode *N = &*--Position; // Skip dead nodes and any non-machine opcodes. if (N->use_empty() || !N->isMachineOpcode()) continue; + if (tryOptimizeRem8Extend(N)) { + MadeChange = true; + continue; + } + + // Attempt to remove vectors moves that were inserted to zero upper bits. + if (N->getMachineOpcode() != TargetOpcode::SUBREG_TO_REG) continue; @@ -905,11 +948,11 @@ // Producing instruction is another vector instruction. We can drop the // move. CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); - - // If the move is now dead, delete it. - if (Move.getNode()->use_empty()) - CurDAG->RemoveDeadNode(Move.getNode()); + MadeChange = true; } + + if (MadeChange) + CurDAG->RemoveDeadNodes(); } @@ -3370,15 +3413,12 @@ } case ISD::SDIVREM: - case ISD::UDIVREM: - case X86ISD::SDIVREM8_SEXT_HREG: - case X86ISD::UDIVREM8_ZEXT_HREG: { + case ISD::UDIVREM: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); unsigned Opc, MOpc; - bool isSigned = (Opcode == ISD::SDIVREM || - Opcode == X86ISD::SDIVREM8_SEXT_HREG); + bool isSigned = Opcode == ISD::SDIVREM; if (!isSigned) { switch (NVT.SimpleTy) { default: llvm_unreachable("Unsupported VT!"); @@ -3517,13 +3557,9 @@ SDValue Result(RNode, 0); InFlag = SDValue(RNode, 1); - if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG || - Opcode == X86ISD::SDIVREM8_SEXT_HREG) { - assert(Node->getValueType(1) == MVT::i32 && "Unexpected result type!"); - } else { - Result = - CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); - } + Result = + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); + ReplaceUses(SDValue(Node, 1), Result); LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -361,10 +361,6 @@ // 8-bit SMUL/UMUL - AX, FLAGS = smul8/umul8 AL, RHS. SMUL8, UMUL8, - // 8-bit divrem that zero-extend the high result (AH). - UDIVREM8_ZEXT_HREG, - SDIVREM8_SEXT_HREG, - // X86-specific multiply by immediate. MUL_IMM, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -26628,8 +26628,6 @@ case X86ISD::UMUL: return "X86ISD::UMUL"; case X86ISD::SMUL8: return "X86ISD::SMUL8"; case X86ISD::UMUL8: return "X86ISD::UMUL8"; - case X86ISD::SDIVREM8_SEXT_HREG: return "X86ISD::SDIVREM8_SEXT_HREG"; - case X86ISD::UDIVREM8_ZEXT_HREG: return "X86ISD::UDIVREM8_ZEXT_HREG"; case X86ISD::INC: return "X86ISD::INC"; case X86ISD::DEC: return "X86ISD::DEC"; case X86ISD::OR: return "X86ISD::OR"; @@ -29573,13 +29571,6 @@ Known.Zero &= Known2.Zero; break; } - case X86ISD::UDIVREM8_ZEXT_HREG: - // TODO: Support more than just the zero extended bits? - if (Op.getResNo() != 1) - break; - // The remainder is zero extended. - Known.Zero.setBitsFrom(8); - break; } // Handle target shuffles. @@ -29710,12 +29701,6 @@ unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); return std::min(Tmp0, Tmp1); } - case X86ISD::SDIVREM8_SEXT_HREG: - // TODO: Support more than just the sign extended bits? - if (Op.getResNo() != 1) - break; - // The remainder is sign extended. - return VTBits - 7; } // Fallback case. @@ -38232,36 +38217,6 @@ return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); } -/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) -> -/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y) -/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly -/// extends from AH (which we otherwise need to do contortions to access). -static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) { - SDValue N0 = N->getOperand(0); - auto OpcodeN = N->getOpcode(); - auto OpcodeN0 = N0.getOpcode(); - if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) || - (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM))) - return SDValue(); - - EVT VT = N->getValueType(0); - EVT InVT = N0.getValueType(); - if (N0.getResNo() != 1 || InVT != MVT::i8 || - !(VT == MVT::i32 || VT == MVT::i64)) - return SDValue(); - - SDVTList NodeTys = DAG.getVTList(MVT::i8, MVT::i32); - auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG - : X86ISD::UDIVREM8_ZEXT_HREG; - SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0), - N0.getOperand(1)); - DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0)); - // If this was a 64-bit extend, complete it. - if (VT == MVT::i64) - return DAG.getNode(OpcodeN, SDLoc(N), VT, R.getValue(1)); - return R.getValue(1); -} - // If we face {ANY,SIGN,ZERO}_EXTEND that is applied to a CMOV with constant // operands and the result of CMOV is not used anywhere else - promote CMOV // itself instead of promoting its result. This could be beneficial, because: @@ -38562,9 +38517,6 @@ EVT InVT = N0.getValueType(); SDLoc DL(N); - if (SDValue DivRem8 = getDivRem8(N, DAG)) - return DivRem8; - if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) return NewCMov; @@ -38765,9 +38717,6 @@ if (SDValue R = WidenMaskArithmetic(N, DAG, Subtarget)) return R; - if (SDValue DivRem8 = getDivRem8(N, DAG)) - return DivRem8; - if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) return NewAdd;