Index: llvm/lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -483,6 +483,7 @@ bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); + bool tryShrinkShlLogicImm(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -3325,6 +3326,69 @@ return true; } +bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { + MVT NVT = N->getSimpleValueType(0); + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + + // For operations of the form (x << C1) op C2, check if we can use a smaller + // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) + return false; + + // i8 is unshrinkable, i16 should be promoted to i32. + if (NVT != MVT::i32 && NVT != MVT::i64) + return false; + + ConstantSDNode *Cst = dyn_cast(N1); + ConstantSDNode *ShlCst = dyn_cast(N0->getOperand(1)); + if (!Cst || !ShlCst) + return false; + + int64_t Val = Cst->getSExtValue(); + uint64_t ShAmt = ShlCst->getZExtValue(); + + // Make sure that we don't change the operation by removing bits. + // This only matters for OR and XOR, AND is unaffected. + uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; + if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) + return false; + + // Check the minimum bitwidth for the new constant. + // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. + auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { + ShiftedVal = Val >> ShAmt; + if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || + (!isInt<32>(Val) && isInt<32>(ShiftedVal))) + return true; + // For 64-bit we can also try unsigned 32 bit immediates. + // AND32ri is the same as AND64ri32 with zext imm. + // MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr + ShiftedVal = (uint64_t)Val >> ShAmt; + if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) + return true; + return false; + }; + + int64_t ShiftedVal; + if (!CanShrinkImmediate(ShiftedVal)) + return false; + + SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); + insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); + SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0), + NewCst); + insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); + SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, + N0->getOperand(1)); + ReplaceNode(N, NewSHL.getNode()); + SelectCode(NewSHL.getNode()); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -3481,66 +3545,112 @@ LLVM_FALLTHROUGH; case ISD::OR: - case ISD::XOR: { - - // For operations of the form (x << C1) op C2, check if we can use a smaller - // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); + case ISD::XOR: + if (tryShrinkShlLogicImm(Node)) + return; - if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) + LLVM_FALLTHROUGH; + case ISD::ADD: + case ISD::SUB: { + // Try to avoid folding immediates with multiple uses for optsize. + // This code tries to select to register form directly to avoid going + // through the isel table which might fold the immediate. We can't change + // the patterns on the add/sub/and/or/xor with immediate paterns in the + // tablegen files to check immediate use count without making the patterns + // unavailable to the fast-isel table. + if (!OptForSize) break; - // i8 is unshrinkable, i16 should be promoted to i32. - if (NVT != MVT::i32 && NVT != MVT::i64) + // Only handle i8/i16/i32/i64. + if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) break; + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + ConstantSDNode *Cst = dyn_cast(N1); - ConstantSDNode *ShlCst = dyn_cast(N0->getOperand(1)); - if (!Cst || !ShlCst) + if (!Cst) break; int64_t Val = Cst->getSExtValue(); - uint64_t ShAmt = ShlCst->getZExtValue(); - // Make sure that we don't change the operation by removing bits. - // This only matters for OR and XOR, AND is unaffected. - uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; - if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) + // Make sure its an immediate that is considered foldable. + // FIXME: Handle unsigned 32 bit immediates for 64-bit AND. + if (!isInt<8>(Val) && !isInt<32>(Val)) break; - // Check the minimum bitwidth for the new constant. - // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. - auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { - ShiftedVal = Val >> ShAmt; - if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || - (!isInt<32>(Val) && isInt<32>(ShiftedVal))) - return true; - // For 64-bit we can also try unsigned 32 bit immediates. - // AND32ri is the same as AND64ri32 with zext imm. - // MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr - ShiftedVal = (uint64_t)Val >> ShAmt; - if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) - return true; - return false; - }; + // Check if we should avoid folding this immediate. + if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) + break; - int64_t ShiftedVal; - if (CanShrinkImmediate(ShiftedVal)) { - SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); - insertDAGNode(*CurDAG, SDValue(Node, 0), NewCst); - SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, N0->getOperand(0), - NewCst); - insertDAGNode(*CurDAG, SDValue(Node, 0), NewBinOp); - SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, - N0->getOperand(1)); - ReplaceNode(Node, NewSHL.getNode()); - SelectCode(NewSHL.getNode()); - return; + // We should not fold the immediate. So we need a register form instead. + unsigned ROpc, MOpc; + switch (NVT.SimpleTy) { + default: llvm_unreachable("Unexpected VT!"); + case MVT::i8: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; + case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; + case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; + case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; + case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; + } + break; + case MVT::i16: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; + case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; + case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; + case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; + case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; + } + break; + case MVT::i32: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; + case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; + case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; + case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; + case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; + } + break; + case MVT::i64: + switch (Opcode) { + default: llvm_unreachable("Unexpected opcode!"); + case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; + case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; + case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; + case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; + case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; + } + break; } - break; - } + // Ok this is a AND/OR/XOR/ADD/SUB with constant. + + // If this is a not a subtract, we can still try to fold a load. + if (Opcode != ISD::SUB) { + SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; + if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { + SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; + SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); + MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); + // Update the chain. + ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); + // Record the mem-refs + CurDAG->setNodeMemRefs(CNode, {cast(N0)->getMemOperand()}); + ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); + CurDAG->RemoveDeadNode(Node); + return; + } + } + + CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); + return; + } case X86ISD::SMUL: // i16/i32/i64 are handled with isel patterns. if (NVT != MVT::i8) Index: llvm/test/CodeGen/X86/pr27202.ll =================================================================== --- llvm/test/CodeGen/X86/pr27202.ll +++ llvm/test/CodeGen/X86/pr27202.ll @@ -4,8 +4,8 @@ define i1 @foo(i32 %i) optsize { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: andl $305419896, %edi # imm = 0x12345678 ; CHECK-NEXT: movl $305419896, %eax # imm = 0x12345678 +; CHECK-NEXT: andl %eax, %edi ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq @@ -17,8 +17,8 @@ define zeroext i1 @g(i32 %x) optsize { ; CHECK-LABEL: g: ; CHECK: # %bb.0: -; CHECK-NEXT: orl $1, %edi ; CHECK-NEXT: movl $1, %eax +; CHECK-NEXT: orl %eax, %edi ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: sete %al ; CHECK-NEXT: retq