Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2049,6 +2049,15 @@ APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth = 0) const; + /// Do a target-specific optimization of Op's constant immediate operand using + /// the demanded bits information. For example, targets can sign-extend the + /// immediate in order to have it folded into the immediate field of the + /// instruction. Return true if the immediate node doesn't need further + /// optimization and set the New and Old fields of LTO if a new immediate + /// node was created. + virtual bool OptimizeConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const; + /// Determine which of the bits specified in Mask are known to be either zero /// or one and return them in the KnownZero/KnownOne bitsets. virtual void computeKnownBitsForTargetNode(const SDValue Op, Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -284,6 +284,21 @@ const APInt &Demanded) { SDLoc dl(Op); + if (Op.getOpcode() == ISD::XOR) { + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + + // If an XOR already has all the bits set, nothing to change but don't + // shrink either. + if (!C || (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + } + + assert(!Old.getNode() && !New.getNode()); + + // Return if the constant operand doesn't need further optimization. + if (DAG.getTargetLoweringInfo().OptimizeConstant(Op, Demanded, *this)) + return New.getNode(); + // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Op.getOpcode()) { default: break; @@ -293,10 +308,6 @@ ConstantSDNode *C = dyn_cast(Op.getOperand(1)); if (!C) return false; - if (Op.getOpcode() == ISD::XOR && - (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) - return false; - // if we can expand it to have all bits set, do it if (C->getAPIntValue().intersects(~Demanded)) { EVT VT = Op.getValueType(); @@ -1082,6 +1093,11 @@ return false; } +bool TargetLowering::OptimizeConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + return false; +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified /// in Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -209,6 +209,9 @@ /// Selects the correct CCAssignFn for a given CallingConvention value. CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const; + bool OptimizeConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const override; + /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -659,6 +659,56 @@ return VT.changeVectorElementTypeToInteger(); } +bool AArch64TargetLowering::OptimizeConstant(SDValue Op, const APInt &Demanded, + TargetLoweringOpt &TLO) const { + // Delay this optimization to as late as possible. + if (!TLO.LegalOps) + return false; + + switch (Op.getOpcode()) { + default: break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: + ConstantSDNode *C = dyn_cast(Op.getOperand(1)); + + if (!C) + break; + + uint64_t Enc; + EVT VT = Op.getValueType(); + unsigned Size = std::max(VT.getSizeInBits(), 32u); + assert(Size > 0 && Size <= 64); + uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)); + int64_t Imm = C->getSExtValue(); + + // Return if the immediate is already a bimm32 or bimm64. + if (AArch64_AM::processLogicalImmediate(Imm & Mask, Size, Enc)) + return true; + + // Try sign-extending the immediate and see if we can turn it into a bimm32 + // or bimm64. + unsigned LZ = Demanded.countLeadingZeros() + (64 - Demanded.getBitWidth()); + + if (LZ == 0 || LZ == 64) + break; + + int64_t NewImm = (Imm << LZ) >> LZ; + + if (NewImm == Imm || + !AArch64_AM::processLogicalImmediate(NewImm & Mask, Size, Enc)) + break; + + // Create the new constant immediate node. + SDValue New = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, + Op.getOperand(0), + TLO.DAG.getConstant(NewImm, VT)); + return TLO.CombineTo(Op, New); + } + + return false; +} + /// computeKnownBitsForTargetNode - Determine which of the bits specified in /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. Index: test/CodeGen/AArch64/optimize-imm.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/optimize-imm.ll @@ -0,0 +1,28 @@ +; RUN: llc -o - %s -march=aarch64 | FileCheck %s + +; CHECK-LABEL: _and1: +; CHECK: and {{w[0-9]+}}, w0, #0xfffffffd + +define void @and1(i32 %a, i8* nocapture %p) { +entry: + %and = and i32 %a, 253 + %conv = trunc i32 %and to i8 + store i8 %conv, i8* %p, align 1 + ret void +} + +; Make sure we don't shrink or optimize an XOR's immediate operand if the +; immediate is -1. Instruction selection turns (and ((xor $mask, -1), $v0)) into +; a BIC. + +; CHECK-LABEL: _xor1: +; CHECK: orr [[R0:w[0-9]+]], wzr, #0x38 +; CHECK: bic {{w[0-9]+}}, [[R0]], w0, lsl #3 + +define i32 @xor1(i32 %a) { +entry: + %shl = shl i32 %a, 3 + %xor = and i32 %shl, 56 + %and = xor i32 %xor, 56 + ret i32 %and +}