Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -829,6 +829,13 @@
   return VT.changeVectorElementTypeToInteger();
 }
 
+static void customLowerDbgMsg(raw_ostream &OS, StringRef Str, SDValue &V) {
+  DEBUG(
+    OS << "Creating " << Str << ": ";
+    V.dump();
+  );
+}
+
 static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
                                const APInt &Demanded,
                                TargetLowering::TargetLoweringOpt &TLO,
@@ -1542,6 +1549,8 @@
     if (LHS.getValueType() == MVT::f16 && !FullFP16) {
       LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
       RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
+      customLowerDbgMsg(dbgs(), "lhs promotion", LHS);
+      customLowerDbgMsg(dbgs(), "rhs promotion", RHS);
     }
     Opcode = AArch64ISD::FCCMP;
   } else if (RHS.getOpcode() == ISD::SUB) {
@@ -1556,9 +1565,13 @@
     Opcode = AArch64ISD::CCMP;
 
   SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
+  customLowerDbgMsg(dbgs(), "predicate const", Condition);
+
   AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
   unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
   SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
+  customLowerDbgMsg(dbgs(), "NZCV const", NZCVOp);
+
   return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
 }
 
@@ -1763,6 +1776,7 @@
           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
           RHS = DAG.getConstant(C, dl, VT);
+          customLowerDbgMsg(dbgs(), "constant", RHS);
         }
         break;
       case ISD::SETULT:
@@ -1773,6 +1787,7 @@
           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
           C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
           RHS = DAG.getConstant(C, dl, VT);
+          customLowerDbgMsg(dbgs(), "constant", RHS);
         }
         break;
       case ISD::SETLE:
@@ -1784,6 +1799,7 @@
           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
           RHS = DAG.getConstant(C, dl, VT);
+          customLowerDbgMsg(dbgs(), "constant", RHS);
         }
         break;
       case ISD::SETULE:
@@ -1795,6 +1811,7 @@
           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
           C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
           RHS = DAG.getConstant(C, dl, VT);
+          customLowerDbgMsg(dbgs(), "constant", RHS);
         }
         break;
       }
@@ -1830,12 +1847,15 @@
         SDValue SExt =
             DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                         DAG.getValueType(MVT::i16));
+        customLowerDbgMsg(dbgs(), "sign extension", SExt);
+
         Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
                                                    RHS.getValueType()),
                              CC, dl, DAG, FullFP16);
         customLowerDbgMsg(dbgs(), "comparison", Cmp);
 
         AArch64CC = changeIntCCToAArch64CC(CC);
+        customLowerDbgMsg(dbgs(), "cond code constant", AArch64cc);
       }
     }
 
@@ -1853,6 +1873,7 @@
     AArch64CC = changeIntCCToAArch64CC(CC);
   }
   AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
+  customLowerDbgMsg(dbgs(), "cond code constant", AArch64cc);
   return Cmp;
 }
 
@@ -1905,6 +1926,11 @@
       // widening multiply that wrote all 64 bits. In the end this should be a
       // noop.
       Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
+
+      customLowerDbgMsg(dbgs(), "mul", Mul);
+      customLowerDbgMsg(dbgs(), "add", Add);
+      customLowerDbgMsg(dbgs(), "truncate", Value);
+
       if (IsSigned) {
         // The signed overflow check requires more than just a simple check for
         // any bit set in the upper 32 bits of the result. These bits could be
@@ -1921,6 +1947,8 @@
         SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
         Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
                        .getValue(1);
+
+        customLowerDbgMsg(dbgs(), "signed mult overflow check", Overflow);
       } else {
         // The overflow check for unsigned multiply is easy. We only need to
         // check if any of the upper 32 bits are set. This can be done with a
@@ -1934,6 +1962,7 @@
             DAG.getNode(AArch64ISD::SUBS, DL, VTs,
                         DAG.getConstant(0, DL, MVT::i64),
                         UpperBits).getValue(1);
+        customLowerDbgMsg(dbgs(), "unsigned mult overflow check", Overflow);
       }
       break;
     }
@@ -1949,6 +1978,7 @@
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
       Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
                      .getValue(1);
+      customLowerDbgMsg(dbgs(), "signed 64-bit mult overflow check", Overflow);
     } else {
       SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
       SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
@@ -1956,6 +1986,7 @@
           DAG.getNode(AArch64ISD::SUBS, DL, VTs,
                       DAG.getConstant(0, DL, MVT::i64),
                       UpperBits).getValue(1);
+      customLowerDbgMsg(dbgs(), "unsigned 64-bit mult overflow check", Overflow);
     }
     break;
   }
@@ -1966,6 +1997,7 @@
 
     // Emit the AArch64 operation with overflow check.
     Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
+    customLowerDbgMsg(dbgs(), "overflow check", Value);
     Overflow = Value.getValue(1);
   }
   return std::make_pair(Value, Overflow);
@@ -2086,13 +2118,18 @@
   // We use 0 and 1 as false and true values.
   SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
   SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
+  customLowerDbgMsg(dbgs(), "true value", TVal);
+  customLowerDbgMsg(dbgs(), "false value", FVal);
 
   // We use an inverted condition, because the conditional select is inverted
   // too. This will allow it to be selected to a single instruction:
   // CSINC Wd, WZR, WZR, invert(cond).
   SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+  customLowerDbgMsg(dbgs(), "cond code value", CCVal);
+
   Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
                          CCVal, Overflow);
+  customLowerDbgMsg(dbgs(), "cond select", Overflow);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
   return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
@@ -2326,7 +2363,10 @@
   SDLoc DL(Op);
 
   Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
+  customLowerDbgMsg(dbgs(), "conversion", Op);
   Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
+  customLowerDbgMsg(dbgs(), "bitcast", Op);
+
   return SDValue(
       DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
                          DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
@@ -2548,6 +2588,9 @@
 
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
                                               SelectionDAG &DAG) const {
+  DEBUG(dbgs() << "Custom lowering: ");
+  DEBUG(Op.dump());
+
   switch (Op.getOpcode()) {
   default:
     llvm_unreachable("unimplemented operand");
@@ -3955,6 +3998,7 @@
     if (!RHS.getNode()) {
       RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
+      customLowerDbgMsg(dbgs(), "zero constant", RHS);
     }
   }
 
@@ -3974,10 +4018,13 @@
     AArch64CC::CondCode OFCC;
     SDValue Value, Overflow;
     std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
+    customLowerDbgMsg(dbgs(), "opcode", Value);
+    customLowerDbgMsg(dbgs(), "overflow", Overflow);
 
     if (CC == ISD::SETNE)
       OFCC = getInvertedCondCode(OFCC);
     SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
+    customLowerDbgMsg(dbgs(), "cond code constant", CCVal);
 
     return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
                        Overflow);
@@ -4061,10 +4108,16 @@
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
   SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
+  customLowerDbgMsg(dbgs(), "cond code constant1", CC1Val);
+
   SDValue BR1 =
       DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
+  customLowerDbgMsg(dbgs(), "cond branch", BR1);
+
   if (CC2 != AArch64CC::AL) {
     SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
+    customLowerDbgMsg(dbgs(), "cond code constant2", CC2Val);
+
     return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                        Cmp);
   }
@@ -4081,10 +4134,14 @@
   SDValue In2 = Op.getOperand(1);
   EVT SrcVT = In2.getValueType();
 
-  if (SrcVT.bitsLT(VT))
+  if (SrcVT.bitsLT(VT) &&
+      !(In2.getValueType() == MVT::f16 && Subtarget->hasFullFP16()))
     In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
-  else if (SrcVT.bitsGT(VT))
+    customLowerDbgMsg(dbgs(), "fp conversion", In2);
+  } else if (SrcVT.bitsGT(VT)) {
     In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
+    customLowerDbgMsg(dbgs(), "fp rounding", In2);
+  }
 
   EVT VecVT;
   EVT EltVT;
@@ -4100,9 +4157,13 @@
                                           DAG.getUNDEF(VecVT), In1);
       VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
                                           DAG.getUNDEF(VecVT), In2);
+      customLowerDbgMsg(dbgs(), "insert sub reg", VecVal1);
+      customLowerDbgMsg(dbgs(), "insert sub reg", VecVal1);
     } else {
       VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
       VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
+      customLowerDbgMsg(dbgs(), "bitcast", VecVal1);
+      customLowerDbgMsg(dbgs(), "bitcast", VecVal2);
     }
   } else if (VT == MVT::f64 || VT == MVT::v2f64) {
     EltVT = MVT::i64;
@@ -4127,17 +4188,22 @@
   }
 
   SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
+  customLowerDbgMsg(dbgs(), "constant vec", BuildVec);
 
   // If we couldn't materialize the mask above, then the mask vector will be
   // the zero vector, and we need to negate it here.
   if (VT == MVT::f64 || VT == MVT::v2f64) {
     BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
+    customLowerDbgMsg(dbgs(), "bitcast", BuildVec);
     BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
+    customLowerDbgMsg(dbgs(), "fneg", BuildVec);
     BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
+    customLowerDbgMsg(dbgs(), "bitcast", BuildVec);
   }
 
   SDValue Sel =
       DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
+  customLowerDbgMsg(dbgs(), "Bitwise Insert if True", BuildVec);
 
   if (VT == MVT::f32)
     return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
@@ -4196,6 +4262,9 @@
   SDValue TVal = DAG.getConstant(1, dl, VT);
   SDValue FVal = DAG.getConstant(0, dl, VT);
 
+  customLowerDbgMsg(dbgs(), "true constant", TVal);
+  customLowerDbgMsg(dbgs(), "false constant", FVal);
+
   // Handle f128 first, since one possible outcome is a normal integer
   // comparison which gets picked up by the next if statement.
   if (LHS.getValueType() == MVT::f128) {
@@ -4236,6 +4305,7 @@
   if (CC2 == AArch64CC::AL) {
     changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
     SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
+    customLowerDbgMsg(dbgs(), "condition code constant", CC1Val);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
@@ -4253,6 +4323,11 @@
         DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
     SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
+
+    customLowerDbgMsg(dbgs(), "condition code constant1", CC1Val);
+    customLowerDbgMsg(dbgs(), "condition code constant2", CC2Val);
+    customLowerDbgMsg(dbgs(), "conditional select", CS1);
+
     return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 }
@@ -4271,6 +4346,7 @@
     if (!RHS.getNode()) {
       RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
+      customLowerDbgMsg(dbgs(), "constant", RHS);
     }
   }
 
@@ -4278,6 +4354,8 @@
   if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
     LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
     RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
+    customLowerDbgMsg(dbgs(), "f16->f32 conversion", LHS);
+    customLowerDbgMsg(dbgs(), "f16->f32 conversion", RHS);
   }
 
   // Next, handle integers.
@@ -4480,6 +4558,10 @@
     std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
     SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
 
+    customLowerDbgMsg(dbgs(), "opcode", Value);
+    customLowerDbgMsg(dbgs(), "overflow", Overflow);
+    customLowerDbgMsg(dbgs(), "constant", CCVal);
+
     return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
                        CCVal, Overflow);
   }
@@ -4495,6 +4577,7 @@
     LHS = CCVal;
     RHS = DAG.getConstant(0, DL, CCVal.getValueType());
     CC = ISD::SETNE;
+    customLowerDbgMsg(dbgs(), "constant", RHS);
   }
   return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
 }
@@ -4898,13 +4981,32 @@
 bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
   // FIXME: We should be able to handle f128 as well with a clever lowering.
-  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
+  DEBUG(dbgs() << "Is legal fp immediate: ");
+  if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32)) {
+    DEBUG(dbgs() << "yes, 0 imm can be materialized using the zero register\n");
     return true;
+  }
+
+  StringRef Type;
+  bool Res;
+
+  if (VT == MVT::f64) {
+    Type = "f64";
+    Res = AArch64_AM::getFP64Imm(Imm) != -1;
+  } else if (VT == MVT::f32) {
+    Type = "f32";
+    Res = AArch64_AM::getFP32Imm(Imm) != -1;
+  } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
+    Type = "f16";
+    Res = AArch64_AM::getFP16Imm(Imm) != -1;
+  }
 
-  if (VT == MVT::f64)
-    return AArch64_AM::getFP64Imm(Imm) != -1;
-  else if (VT == MVT::f32)
-    return AArch64_AM::getFP32Imm(Imm) != -1;
+  if (!Type.empty()) {
+    DEBUG(dbgs() << "no, illegal " << Type << " type\n");
+    return Res;
+  }
+
+  DEBUG(dbgs() << "no, unsupported fp type\n");
   return false;
 }
 
@@ -7844,12 +7946,21 @@
 
 // 12-bit optionally shifted immediates are legal for adds.
 bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
+  DEBUG(dbgs() << "Is legal 12-bit add immediate: ");
   // Avoid UB for INT64_MIN.
-  if (Immed == std::numeric_limits<int64_t>::min())
+  if (Immed == std::numeric_limits<int64_t>::min()) {
+    DEBUG(dbgs() << "no, avoid UB for INT64_MIN\n");
     return false;
+  }
   // Same encoding for add/sub, just flip the sign.
   Immed = std::abs(Immed);
-  return ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
+  if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0)) {
+    DEBUG(dbgs() << "yes\n");
+    return true;
+  }
+
+  DEBUG(dbgs() << "no\n");
+  return false;
 }
 
 // Integer comparisons are implemented with ADDS/SUBS, so the range of valid