Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -829,6 +829,13 @@ return VT.changeVectorElementTypeToInteger(); } +static void customLowerDbgMsg(raw_ostream &OS, StringRef Str, SDValue &V) { + DEBUG( + OS << "Creating " << Str << ": "; + V.dump(); + ); +} + static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, const APInt &Demanded, TargetLowering::TargetLoweringOpt &TLO, @@ -1542,6 +1549,8 @@ if (LHS.getValueType() == MVT::f16 && !FullFP16) { LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS); + customLowerDbgMsg(dbgs(), "lhs promotion", LHS); + customLowerDbgMsg(dbgs(), "rhs promotion", RHS); } Opcode = AArch64ISD::FCCMP; } else if (RHS.getOpcode() == ISD::SUB) { @@ -1556,9 +1565,13 @@ Opcode = AArch64ISD::CCMP; SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC); + customLowerDbgMsg(dbgs(), "predicate const", Condition); + AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32); + customLowerDbgMsg(dbgs(), "NZCV const", NZCVOp); + return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp); } @@ -1763,6 +1776,7 @@ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; RHS = DAG.getConstant(C, dl, VT); + customLowerDbgMsg(dbgs(), "constant", RHS); } break; case ISD::SETULT: @@ -1773,6 +1787,7 @@ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1; RHS = DAG.getConstant(C, dl, VT); + customLowerDbgMsg(dbgs(), "constant", RHS); } break; case ISD::SETLE: @@ -1784,6 +1799,7 @@ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; RHS = DAG.getConstant(C, dl, VT); + customLowerDbgMsg(dbgs(), "constant", RHS); } break; case ISD::SETULE: @@ -1795,6 +1811,7 @@ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1; RHS = DAG.getConstant(C, dl, VT); + customLowerDbgMsg(dbgs(), "constant", RHS); } break; } @@ -1830,12 +1847,15 @@ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS, DAG.getValueType(MVT::i16)); + customLowerDbgMsg(dbgs(), "sign extension", SExt); + Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl, RHS.getValueType()), CC, dl, DAG, FullFP16); customLowerDbgMsg(dbgs(), "comparison", Cmp); AArch64CC = changeIntCCToAArch64CC(CC); + customLowerDbgMsg(dbgs(), "cond code constant", AArch64cc); } } @@ -1853,6 +1873,7 @@ AArch64CC = changeIntCCToAArch64CC(CC); } AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC); + customLowerDbgMsg(dbgs(), "cond code constant", AArch64cc); return Cmp; } @@ -1905,6 +1926,11 @@ // widening multiply that wrote all 64 bits. In the end this should be a // noop. Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add); + + customLowerDbgMsg(dbgs(), "mul", Mul); + customLowerDbgMsg(dbgs(), "add", Add); + customLowerDbgMsg(dbgs(), "truncate", Value); + if (IsSigned) { // The signed overflow check requires more than just a simple check for // any bit set in the upper 32 bits of the result. These bits could be @@ -1921,6 +1947,8 @@ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); + + customLowerDbgMsg(dbgs(), "signed mult overflow check", Overflow); } else { // The overflow check for unsigned multiply is easy. We only need to // check if any of the upper 32 bits are set. This can be done with a @@ -1934,6 +1962,7 @@ DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); + customLowerDbgMsg(dbgs(), "unsigned mult overflow check", Overflow); } break; } @@ -1949,6 +1978,7 @@ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits) .getValue(1); + customLowerDbgMsg(dbgs(), "signed 64-bit mult overflow check", Overflow); } else { SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS); SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32); @@ -1956,6 +1986,7 @@ DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, DL, MVT::i64), UpperBits).getValue(1); + customLowerDbgMsg(dbgs(), "unsigned 64-bit mult overflow check", Overflow); } break; } @@ -1966,6 +1997,7 @@ // Emit the AArch64 operation with overflow check. Value = DAG.getNode(Opc, DL, VTs, LHS, RHS); + customLowerDbgMsg(dbgs(), "overflow check", Value); Overflow = Value.getValue(1); } return std::make_pair(Value, Overflow); @@ -2086,13 +2118,18 @@ // We use 0 and 1 as false and true values. SDValue TVal = DAG.getConstant(1, dl, MVT::i32); SDValue FVal = DAG.getConstant(0, dl, MVT::i32); + customLowerDbgMsg(dbgs(), "true value", TVal); + customLowerDbgMsg(dbgs(), "false value", FVal); // We use an inverted condition, because the conditional select is inverted // too. This will allow it to be selected to a single instruction: // CSINC Wd, WZR, WZR, invert(cond). SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32); + customLowerDbgMsg(dbgs(), "cond code value", CCVal); + Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow); + customLowerDbgMsg(dbgs(), "cond select", Overflow); SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow); @@ -2326,7 +2363,10 @@ SDLoc DL(Op); Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0)); + customLowerDbgMsg(dbgs(), "conversion", Op); Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op); + customLowerDbgMsg(dbgs(), "bitcast", Op); + return SDValue( DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op, DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)), @@ -2548,6 +2588,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + DEBUG(dbgs() << "Custom lowering: "); + DEBUG(Op.dump()); + switch (Op.getOpcode()) { default: llvm_unreachable("unimplemented operand"); @@ -3955,6 +3998,7 @@ if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; + customLowerDbgMsg(dbgs(), "zero constant", RHS); } } @@ -3974,10 +4018,13 @@ AArch64CC::CondCode OFCC; SDValue Value, Overflow; std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG); + customLowerDbgMsg(dbgs(), "opcode", Value); + customLowerDbgMsg(dbgs(), "overflow", Overflow); if (CC == ISD::SETNE) OFCC = getInvertedCondCode(OFCC); SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32); + customLowerDbgMsg(dbgs(), "cond code constant", CCVal); return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Overflow); @@ -4061,10 +4108,16 @@ AArch64CC::CondCode CC1, CC2; changeFPCCToAArch64CC(CC, CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); + customLowerDbgMsg(dbgs(), "cond code constant1", CC1Val); + SDValue BR1 = DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp); + customLowerDbgMsg(dbgs(), "cond branch", BR1); + if (CC2 != AArch64CC::AL) { SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); + customLowerDbgMsg(dbgs(), "cond code constant2", CC2Val); + return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val, Cmp); } @@ -4081,10 +4134,14 @@ SDValue In2 = Op.getOperand(1); EVT SrcVT = In2.getValueType(); - if (SrcVT.bitsLT(VT)) + if (SrcVT.bitsLT(VT) && + !(In2.getValueType() == MVT::f16 && Subtarget->hasFullFP16())) In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2); - else if (SrcVT.bitsGT(VT)) + customLowerDbgMsg(dbgs(), "fp conversion", In2); + } else if (SrcVT.bitsGT(VT)) { In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL)); + customLowerDbgMsg(dbgs(), "fp rounding", In2); + } EVT VecVT; EVT EltVT; @@ -4100,9 +4157,13 @@ DAG.getUNDEF(VecVT), In1); VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT, DAG.getUNDEF(VecVT), In2); + customLowerDbgMsg(dbgs(), "insert sub reg", VecVal1); + customLowerDbgMsg(dbgs(), "insert sub reg", VecVal1); } else { VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1); VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2); + customLowerDbgMsg(dbgs(), "bitcast", VecVal1); + customLowerDbgMsg(dbgs(), "bitcast", VecVal2); } } else if (VT == MVT::f64 || VT == MVT::v2f64) { EltVT = MVT::i64; @@ -4127,17 +4188,22 @@ } SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT); + customLowerDbgMsg(dbgs(), "constant vec", BuildVec); // If we couldn't materialize the mask above, then the mask vector will be // the zero vector, and we need to negate it here. if (VT == MVT::f64 || VT == MVT::v2f64) { BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec); + customLowerDbgMsg(dbgs(), "bitcast", BuildVec); BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec); + customLowerDbgMsg(dbgs(), "fneg", BuildVec); BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec); + customLowerDbgMsg(dbgs(), "bitcast", BuildVec); } SDValue Sel = DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec); + customLowerDbgMsg(dbgs(), "Bitwise Insert if True", BuildVec); if (VT == MVT::f32) return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel); @@ -4196,6 +4262,9 @@ SDValue TVal = DAG.getConstant(1, dl, VT); SDValue FVal = DAG.getConstant(0, dl, VT); + customLowerDbgMsg(dbgs(), "true constant", TVal); + customLowerDbgMsg(dbgs(), "false constant", FVal); + // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { @@ -4236,6 +4305,7 @@ if (CC2 == AArch64CC::AL) { changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2); SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32); + customLowerDbgMsg(dbgs(), "condition code constant", CC1Val); // Note that we inverted the condition above, so we reverse the order of // the true and false operands here. This will allow the setcc to be @@ -4253,6 +4323,11 @@ DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp); SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32); + + customLowerDbgMsg(dbgs(), "condition code constant1", CC1Val); + customLowerDbgMsg(dbgs(), "condition code constant2", CC2Val); + customLowerDbgMsg(dbgs(), "conditional select", CS1); + return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp); } } @@ -4271,6 +4346,7 @@ if (!RHS.getNode()) { RHS = DAG.getConstant(0, dl, LHS.getValueType()); CC = ISD::SETNE; + customLowerDbgMsg(dbgs(), "constant", RHS); } } @@ -4278,6 +4354,8 @@ if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) { LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS); RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS); + customLowerDbgMsg(dbgs(), "f16->f32 conversion", LHS); + customLowerDbgMsg(dbgs(), "f16->f32 conversion", RHS); } // Next, handle integers. @@ -4480,6 +4558,10 @@ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG); SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32); + customLowerDbgMsg(dbgs(), "opcode", Value); + customLowerDbgMsg(dbgs(), "overflow", Overflow); + customLowerDbgMsg(dbgs(), "constant", CCVal); + return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal, Overflow); } @@ -4495,6 +4577,7 @@ LHS = CCVal; RHS = DAG.getConstant(0, DL, CCVal.getValueType()); CC = ISD::SETNE; + customLowerDbgMsg(dbgs(), "constant", RHS); } return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG); } @@ -4898,13 +4981,32 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) + DEBUG(dbgs() << "Is legal fp immediate: "); + if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) { + DEBUG(dbgs() << "yes, 0 imm can be materialized using the zero register\n"); return true; + } + + StringRef Type; + bool Res; + + if (VT == MVT::f64) { + Type = "f64"; + Res = AArch64_AM::getFP64Imm(Imm) != -1; + } else if (VT == MVT::f32) { + Type = "f32"; + Res = AArch64_AM::getFP32Imm(Imm) != -1; + } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) { + Type = "f16"; + Res = AArch64_AM::getFP16Imm(Imm) != -1; + } - if (VT == MVT::f64) - return AArch64_AM::getFP64Imm(Imm) != -1; - else if (VT == MVT::f32) - return AArch64_AM::getFP32Imm(Imm) != -1; + if (!Type.empty()) { + DEBUG(dbgs() << "no, illegal " << Type << " type\n"); + return Res; + } + + DEBUG(dbgs() << "no, unsupported fp type\n"); return false; } @@ -7844,12 +7946,21 @@ // 12-bit optionally shifted immediates are legal for adds. bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const { + DEBUG(dbgs() << "Is legal 12-bit add immediate: "); // Avoid UB for INT64_MIN. - if (Immed == std::numeric_limits::min()) + if (Immed == std::numeric_limits::min()) { + DEBUG(dbgs() << "no, avoid UB for INT64_MIN\n"); return false; + } // Same encoding for add/sub, just flip the sign. Immed = std::abs(Immed); - return ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)); + if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) { + DEBUG(dbgs() << "yes\n"); + return true; + } + + DEBUG(dbgs() << "no\n"); + return false; } // Integer comparisons are implemented with ADDS/SUBS, so the range of valid