Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1465,7 +1465,13 @@ /// like i140, which are first promoted then expanded, it is the number of /// registers needed to hold all the bits of the original type. For an i140 /// on a 32 bit machine this means 5 registers. - unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { + /// + /// RC is typically not needed, but it may be passed as a way to override + /// the default settings, for instance with i128 inline assembly operands + /// on SystemZ. + virtual unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + const TargetRegisterClass *RC = nullptr) const { if (VT.isSimple()) { assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -942,6 +942,7 @@ SelectionDAG &DAG, std::vector &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetRegisterClass *RC = nullptr; unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) @@ -953,7 +954,7 @@ // Don't do this for tied operands that can use the regclass information // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + RC = MRI.getRegClass(Regs.front()); Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); } @@ -979,8 +980,13 @@ } for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + // Pass RC to getNumRegisters() since inline assembly operands may not + // follow the ordinary pattern of splitting. MVT RegisterVT = RegVTs[Value]; + if (RC == nullptr) + RC = TLI.getRegClassFor(RegisterVT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value], + RC); for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; @@ -8177,7 +8183,7 @@ // remember that AX is actually i16 to get the right extension. const MVT RegVT = *TRI.legalclasstypes_begin(*RC); - if (OpInfo.ConstraintVT != MVT::Other) { + if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. @@ -8224,7 +8230,7 @@ // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) - NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RC); // If this is a constraint for a specific physical register, like {r17}, // assign it now. Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -422,6 +422,19 @@ return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + const TargetRegisterClass * + getRegClassFor(MVT VT, bool isDivergent = false) const override { + if (VT == MVT::Untyped) // Needed for inline asm phys regs. + return &SystemZ::GR128BitRegClass; + return TargetLowering::getRegClassFor(VT); + } + unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + const TargetRegisterClass *RC = nullptr) const override { + if (VT == MVT::i128 && RC == &SystemZ::GR128BitRegClass) + return 1; + return TargetLowering::getNumRegisters(Context, VT); + } bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; @@ -517,6 +530,15 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, + SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, + Optional CC) const override; + SDValue + joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, + Optional CC) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1368,6 +1368,55 @@ } } +static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(0, DL)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(1, DL)); + SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, + MVT::Untyped, Hi, Lo); + return SDValue(Pair, 0); +} + +static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::i64, In); + SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::i64, In); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); +} + +bool SystemZTargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional CC) const { + EVT ValueVT = Val.getValueType(); + assert((ValueVT != MVT::i128 || + ((NumParts == 1 && PartVT == MVT::Untyped) || + (NumParts == 2 && PartVT == MVT::i64))) && + "Unknown handling of i128 value."); + if (ValueVT == MVT::i128 && NumParts == 1) { + // Inline assembly operand. + Parts[0] = lowerI128ToGR128(DAG, Val); + return true; + } + return false; +} + +SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, Optional CC) const { + assert((ValueVT != MVT::i128 || + ((NumParts == 1 && PartVT == MVT::Untyped) || + (NumParts == 2 && PartVT == MVT::i64))) && + "Unknown handling of i128 value."); + if (ValueVT == MVT::i128 && NumParts == 1) + // Inline assembly operand. + return lowerGR128ToI128(DAG, Parts[0]); + return SDValue(); +} + SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, @@ -5489,27 +5538,6 @@ // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). - -static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(0, DL)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(1, DL)); - SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, - MVT::Untyped, Hi, Lo); - return SDValue(Pair, 0); -} - -static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, - DL, MVT::i64, In); - SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, - DL, MVT::i64, In); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); -} - void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, Index: llvm/test/CodeGen/SystemZ/inline-asm-i128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/inline-asm-i128.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=s390x-linux-gnu -no-integrated-as < %s | FileCheck %s +; +; Test i128 (tied) operands. + +define i32 @clcl(i8* %p1, i32 signext %l1, i8* %p2, i32 signext %l2, i8 zeroext %pad) { +; CHECK-LABEL: clcl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r0, %r5 +; CHECK-NEXT: # kill: def $r4d killed $r4d def $r4q +; CHECK-NEXT: lgr %r1, %r3 +; CHECK-NEXT: # kill: def $r2d killed $r2d def $r2q +; CHECK-NEXT: sllg %r5, %r6, 24 +; CHECK-NEXT: rosbg %r5, %r0, 40, 63, 0 +; CHECK-NEXT: risbg %r3, %r1, 40, 191, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: clcl %r2, %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: risbg %r0, %r3, 40, 191, 0 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK-NEXT: br %r14 +entry: + %0 = ptrtoint i8* %p1 to i64 + %1 = ptrtoint i8* %p2 to i64 + %and5 = and i32 %l2, 16777215 + %2 = zext i32 %and5 to i64 + %conv7 = zext i8 %pad to i64 + %shl = shl nuw nsw i64 %conv7, 24 + %or = or i64 %shl, %2 + %u1.sroa.0.0.insert.ext = zext i64 %0 to i128 + %u1.sroa.0.0.insert.shift = shl nuw i128 %u1.sroa.0.0.insert.ext, 64 + %3 = and i32 %l1, 16777215 + %u1.sroa.0.0.insert.mask = zext i32 %3 to i128 + %u1.sroa.0.0.insert.insert = or i128 %u1.sroa.0.0.insert.shift, %u1.sroa.0.0.insert.mask + %u2.sroa.5.0.insert.ext = zext i64 %or to i128 + %u2.sroa.0.0.insert.ext = zext i64 %1 to i128 + %u2.sroa.0.0.insert.shift = shl nuw i128 %u2.sroa.0.0.insert.ext, 64 + %u2.sroa.0.0.insert.insert = or i128 %u2.sroa.0.0.insert.shift, %u2.sroa.5.0.insert.ext + %4 = tail call { i128, i128 } asm "clcl $0, $1", "=r,=r,0,1"(i128 %u1.sroa.0.0.insert.insert, i128 %u2.sroa.0.0.insert.insert) + %asmresult = extractvalue { i128, i128 } %4, 0 + %asmresult11 = extractvalue { i128, i128 } %4, 1 + %5 = or i128 %asmresult, %asmresult11 + %6 = and i128 %5, 16777215 + %7 = icmp eq i128 %6, 0 + %land.ext = zext i1 %7 to i32 + ret i32 %land.ext +} + +; Test a tied phys-reg. +define void @fun(i128* %Src, i128* %Dst) { +; CHECK-LABEL: fun: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lg %r5, 8(%r2) +; CHECK-NEXT: lg %r4, 0(%r2) +; CHECK-NEXT: #APP +; CHECK-NEXT: BLA %r4, %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: stg %r5, 8(%r3) +; CHECK-NEXT: stg %r4, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %L = load i128, i128* %Src + %IAsm = call i128 asm "BLA $0, $1", "={r4},0"(i128 %L) + store volatile i128 %IAsm, i128* %Dst + ret void +}