Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -351,6 +351,9 @@ virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const override; + virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl& Results, + SelectionDAG &DAG) const; + protected: std::pair findRepresentativeClass(MVT VT) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -536,12 +536,135 @@ setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); + + if (!Subtarget->isLittle()) + setOperationAction(ISD::BITCAST, MVT::i128, Custom); } setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::VSELECT); } +static void IntegerToVector(SDValue Op, SelectionDAG &DAG, unsigned NumElements, + SmallVectorImpl &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(Op); + SDValue Lo, Hi; + + if (NumElements > 1) { + NumElements >>= 1; + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op); + Lo = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(HalfVT.getSizeInBits(), TLI.getPointerTy())); + Lo = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Lo); + + IntegerToVector(Hi, DAG, NumElements, Ops, EltVT); + IntegerToVector(Lo, DAG, NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op)); + } +} + +static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + SDLoc dl(N); + assert((InVT == MVT::i128 || OutVT == MVT::i128) && + "ExpandBITCAST called for non-i128 type"); + if (TLI.getTypeAction(*DAG.getContext(), InVT) == TargetLowering::TypeLegal || + TLI.getTypeAction(*DAG.getContext(), InVT) == TargetLowering::TypePromoteInteger) { + if (InVT.isVector() && OutVT.isInteger()) { + // Handle cases like i128 = BITCAST v4i32 + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If is not a legal type, try . + while (!TLI.isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } + if (TLI.isTypeLegal(NVT)) { + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); + + SmallVector Vals; + for (unsigned i = 0; i < NumElems; ++i) + SmallVector Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + ElemVT, CastInOp, + DAG.getConstant(i, TLI.getVectorIdxTy()))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (TLI.isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + SDValue Lo = Vals[Slot++]; + SDValue Hi = Vals[Slot]; + return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i128, Lo, Hi); + } + } + } + if (InVT.isInteger() && OutVT.isVector()) { + unsigned NumElts = 2; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), InVT), + NumElts); + if (!TLI.isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = OutVT.getVectorNumElements(); + NVT = OutVT; + } + SmallVector Ops; + IntegerToVector(InOp, DAG, NumElts, Ops, NVT.getVectorElementType()); + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, OutVT, Vec); + } + return SDValue(); +} + +void +AArch64TargetLowering::ReplaceNodeResults(SDNode *N, + SmallVectorImpl& Results, + SelectionDAG &DAG) const { + SDValue Res; + switch (N->getOpcode()) { + default: + llvm_unreachable("Don't know how to custom expand this!"); + case ISD::BITCAST: + Res = ExpandBITCAST(N, DAG); + break; + } + if (Res.getNode()) + Results.push_back(Res); +} + EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { // It's reasonably important that this value matches the "natural" legal // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself @@ -3320,6 +3443,7 @@ return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); } return SDValue(); Index: test/CodeGen/AArch64/bitcast-i128.ll =================================================================== --- test/CodeGen/AArch64/bitcast-i128.ll +++ test/CodeGen/AArch64/bitcast-i128.ll @@ -0,0 +1,30 @@ +; RUN: llc -verify-machineinstrs < %s -march aarch64 -mattr neon | FileCheck %s --check-prefix=CHECK-LE +; RUN: llc -verify-machineinstrs < %s -march aarch64_be -mattr neon | FileCheck %s --check-prefix=CHECK-BE + +define void @callee_i128( i128 %var, <4 x i32 >* %storeaddr ) { +; CHECK-LE-LABEL: callee_i128: +; CHECK-LE: ins v1.d[0], x0 +; CHECK-LE: ins v1.d[1], x1 +; CHECK-BE-LABEL: callee_i128: +; CHECK-BE: ins v1.d[0], x1 +; CHECK-BE: ins v1.d[1], x0 + %v0 = bitcast i128 %var to <4 x i32> + %v1 = add <4 x i32> %v0, < i32 0, i32 1, i32 2, i32 3 > + store <4 x i32> %v1, <4 x i32>* %storeaddr + ret void +} + +define void @caller_i128( <4 x i32>* %loadaddr, <4 x i32>* %storeaddr ) { +; CHECK-LE-LABEL: caller_i128: +; CHECK-LE: umov x0, v0.d[0] +; CHECK-LE: umov x1, v0.d[1] +; CHECK-BE-LABEL: caller_i128: +; CHECK-BE: umov x0, v0.d[1] +; CHECK-BE: umov x1, v0.d[0] + %v0 = load <4 x i32>* %loadaddr + %v1 = add <4 x i32> %v0, < i32 0, i32 1, i32 2, i32 3 > + %v2 = bitcast <4 x i32> %v1 to i128 + call void (i128, <4 x i32>*)* @callee_i128( i128 %v2, <4 x i32>* %storeaddr ) + ret void +} +