Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -278,7 +278,7 @@ } MVT ElemTy = VT.getVectorElementType(); - if (ElemTy != MVT::i64 && ElemTy != MVT::f64) + if (ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -742,8 +742,6 @@ setOperationAction(ISD::SDIV, MVT::v8i8, Custom); setOperationAction(ISD::UDIV, MVT::v4i16, Custom); setOperationAction(ISD::UDIV, MVT::v8i8, Custom); - setOperationAction(ISD::SETCC, MVT::v1i64, Expand); - setOperationAction(ISD::SETCC, MVT::v2i64, Expand); // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with // a destination type that is wider than the source, and nor does // it have a FP_TO_[SU]INT instruction with a narrower destination than @@ -5242,10 +5240,27 @@ ISD::CondCode SetCCOpcode = cast(CC)->get(); SDLoc dl(Op); + if (Op0.getValueType().getVectorElementType() == MVT::i64 && + (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) { + // Special-case integer 64-bit equality comparisons. They aren't legal, + // but they can be lowered with a few vector instructions. + unsigned CmpElements = CmpVT.getVectorNumElements() * 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements); + SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0); + SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1); + SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1, + DAG.getCondCode(ISD::SETEQ)); + SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp); + SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed); + Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged); + if (SetCCOpcode == ISD::SETNE) + Merged = DAG.getNOT(dl, Merged, CmpVT); + Merged = DAG.getSExtOrTrunc(Merged, dl, VT); + return Merged; + } + if (CmpVT.getVectorElementType() == MVT::i64) - // 64-bit comparisons are not legal. We've marked SETCC as non-Custom, - // but it's possible that our operands are 64-bit but our result is 32-bit. - // Bail in this case. + // 64-bit comparisons are not legal in general. return SDValue(); if (Op1.getValueType().isFloatingPoint()) { Index: llvm/trunk/test/CodeGen/ARM/vicmp-64.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/vicmp-64.ll +++ llvm/trunk/test/CodeGen/ARM/vicmp-64.ll @@ -0,0 +1,52 @@ +; RUN: llc -mtriple=arm -mattr=+neon %s -o - | FileCheck %s + +; Check codegen for 64-bit icmp operations, which don't directly map to any +; instruction. + +define <2 x i64> @vne(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vne: +;CHECK: vceq.i32 +;CHECK-NEXT: vrev64.32 +;CHECK-NEXT: vand +;CHECK-NEXT: vmvn +;CHECK-NEXT: vmov +;CHECK-NEXT: vmov +;CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp ne <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +define <2 x i64> @veq(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: veq: +;CHECK: vceq.i32 +;CHECK-NEXT: vrev64.32 +;CHECK-NEXT: vand +;CHECK-NEXT: vmov +;CHECK-NEXT: vmov +;CHECK-NEXT: mov pc, lr + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp eq <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +} + +; FIXME: We currently generate terrible code for this. +; (Atop < Btop) | ((ATop == BTop) & (ABottom < BBottom)) +; would come out to roughly 6 instructions, but we currently +; scalarize it. +define <2 x i64> @vult(<2 x i64>* %A, <2 x i64>* %B) nounwind { +;CHECK-LABEL: vult: +;CHECK: subs +;CHECK: sbcs +;CHECK: subs +;CHECK: sbcs + %tmp1 = load <2 x i64>, <2 x i64>* %A + %tmp2 = load <2 x i64>, <2 x i64>* %B + %tmp3 = icmp ult <2 x i64> %tmp1, %tmp2 + %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> + ret <2 x i64> %tmp4 +}