diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1472,7 +1472,12 @@ /// like i140, which are first promoted then expanded, it is the number of /// registers needed to hold all the bits of the original type. For an i140 /// on a 32 bit machine this means 5 registers. - unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { + /// + /// RegisterVT may be passed as a way to override the default settings, for + /// instance with i128 inline assembly operands on SystemZ. + virtual unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + Optional RegisterVT = None) const { if (VT.isSimple()) { assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -166,9 +166,8 @@ assert(TRI->isTypeLegalForClass(*UseRC, VT) && "Incompatible phys register def and uses!"); DstRC = UseRC; - } else { - DstRC = TLI->getRegClassFor(VT, Node->isDivergent()); - } + } else + DstRC = SrcRC; // If all uses are reading from the src physical register and copying the // register is either impossible or very expensive, then don't create a copy. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -988,8 +988,9 @@ } for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value], + RegisterVT); for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; @@ -8241,7 +8242,7 @@ // remember that AX is actually i16 to get the right extension. const MVT RegVT = *TRI.legalclasstypes_begin(*RC); - if (OpInfo.ConstraintVT != MVT::Other) { + if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. @@ -8288,7 +8289,7 @@ // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) - NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT, RegVT); // If this is a constraint for a specific physical register, like {r17}, // assign it now. @@ -8621,21 +8622,18 @@ return; } - MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); SmallVector Regs; - - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { - unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); - MachineRegisterInfo &RegInfo = - DAG.getMachineFunction().getRegInfo(); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(RegInfo.createVirtualRegister(RC)); - } else { - emitInlineAsmError(Call, - "inline asm error: This value type register " - "class is not natively supported!"); - return; - } + MachineFunction &MF = DAG.getMachineFunction(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + RegisterSDNode *R = dyn_cast(AsmNodeOperands[CurOp+1]); + Register TiedReg = R->getReg(); + MVT RegVT = R->getSimpleValueType(0); + const TargetRegisterClass *RC = TiedReg.isVirtual() ? + MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg); + unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(MRI.createVirtualRegister(RC)); RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -422,6 +422,15 @@ return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + unsigned + getNumRegisters(LLVMContext &Context, EVT VT, + Optional RegisterVT) const override { + // i128 inline assembly operand. + if (VT == MVT::i128 && + RegisterVT.hasValue() && RegisterVT.getValue() == MVT::Untyped) + return 1; + return TargetLowering::getNumRegisters(Context, VT); + } bool isCheapToSpeculateCtlz() const override { return true; } bool preferZeroCompareBranch() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, @@ -518,6 +527,15 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, + SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, + Optional CC) const override; + SDValue + joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, + Optional CC) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp --- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1368,6 +1368,55 @@ } } +static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(0, DL)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(1, DL)); + SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, + MVT::Untyped, Hi, Lo); + return SDValue(Pair, 0); +} + +static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::i64, In); + SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::i64, In); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); +} + +bool SystemZTargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional CC) const { + EVT ValueVT = Val.getValueType(); + assert((ValueVT != MVT::i128 || + ((NumParts == 1 && PartVT == MVT::Untyped) || + (NumParts == 2 && PartVT == MVT::i64))) && + "Unknown handling of i128 value."); + if (ValueVT == MVT::i128 && NumParts == 1) { + // Inline assembly operand. + Parts[0] = lowerI128ToGR128(DAG, Val); + return true; + } + return false; +} + +SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, Optional CC) const { + assert((ValueVT != MVT::i128 || + ((NumParts == 1 && PartVT == MVT::Untyped) || + (NumParts == 2 && PartVT == MVT::i64))) && + "Unknown handling of i128 value."); + if (ValueVT == MVT::i128 && NumParts == 1) + // Inline assembly operand. + return lowerGR128ToI128(DAG, Parts[0]); + return SDValue(); +} + SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, @@ -5489,27 +5538,6 @@ // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). - -static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(0, DL)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(1, DL)); - SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, - MVT::Untyped, Hi, Lo); - return SDValue(Pair, 0); -} - -static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, - DL, MVT::i64, In); - SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, - DL, MVT::i64, In); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); -} - void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-i128.ll b/llvm/test/CodeGen/SystemZ/inline-asm-i128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/inline-asm-i128.ll @@ -0,0 +1,120 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=s390x-linux-gnu -no-integrated-as < %s | FileCheck %s +; +; Test i128 (tied) operands. + +define i32 @fun0(i8* %p1, i32 signext %l1, i8* %p2, i32 signext %l2, i8 zeroext %pad) { +; CHECK-LABEL: fun0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r0, %r5 +; CHECK-NEXT: # kill: def $r4d killed $r4d def $r4q +; CHECK-NEXT: lgr %r1, %r3 +; CHECK-NEXT: # kill: def $r2d killed $r2d def $r2q +; CHECK-NEXT: sllg %r5, %r6, 24 +; CHECK-NEXT: rosbg %r5, %r0, 40, 63, 0 +; CHECK-NEXT: risbg %r3, %r1, 40, 191, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: clcl %r2, %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: risbg %r0, %r3, 40, 191, 0 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK-NEXT: br %r14 +entry: + %0 = ptrtoint i8* %p1 to i64 + %1 = ptrtoint i8* %p2 to i64 + %and5 = and i32 %l2, 16777215 + %2 = zext i32 %and5 to i64 + %conv7 = zext i8 %pad to i64 + %shl = shl nuw nsw i64 %conv7, 24 + %or = or i64 %shl, %2 + %u1.sroa.0.0.insert.ext = zext i64 %0 to i128 + %u1.sroa.0.0.insert.shift = shl nuw i128 %u1.sroa.0.0.insert.ext, 64 + %3 = and i32 %l1, 16777215 + %u1.sroa.0.0.insert.mask = zext i32 %3 to i128 + %u1.sroa.0.0.insert.insert = or i128 %u1.sroa.0.0.insert.shift, %u1.sroa.0.0.insert.mask + %u2.sroa.5.0.insert.ext = zext i64 %or to i128 + %u2.sroa.0.0.insert.ext = zext i64 %1 to i128 + %u2.sroa.0.0.insert.shift = shl nuw i128 %u2.sroa.0.0.insert.ext, 64 + %u2.sroa.0.0.insert.insert = or i128 %u2.sroa.0.0.insert.shift, %u2.sroa.5.0.insert.ext + %4 = tail call { i128, i128 } asm "clcl $0, $1", "=r,=r,0,1"(i128 %u1.sroa.0.0.insert.insert, i128 %u2.sroa.0.0.insert.insert) + %asmresult = extractvalue { i128, i128 } %4, 0 + %asmresult11 = extractvalue { i128, i128 } %4, 1 + %5 = or i128 %asmresult, %asmresult11 + %6 = and i128 %5, 16777215 + %7 = icmp eq i128 %6, 0 + %land.ext = zext i1 %7 to i32 + ret i32 %land.ext +} + +; Test a phys-reg def. +define void @fun1(i128* %Src, i128* %Dst) { +; CHECK-LABEL: fun1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: BLA %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: stg %r5, 8(%r3) +; CHECK-NEXT: stg %r4, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %IAsm = call i128 asm "BLA $0", "={r4}"() + store volatile i128 %IAsm, i128* %Dst + ret void +} + +; Test a phys-reg use. +define void @fun2(i128* %Src, i128* %Dst) { +; CHECK-LABEL: fun2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lg %r5, 8(%r2) +; CHECK-NEXT: lg %r4, 0(%r2) +; CHECK-NEXT: #APP +; CHECK-NEXT: BLA %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: br %r14 +entry: + %L = load i128, i128* %Src + call void asm "BLA $0", "{r4}"(i128 %L) + ret void +} + +; Test phys-reg use and phys-reg def. +define void @fun3(i128* %Src, i128* %Dst) { +; CHECK-LABEL: fun3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lg %r1, 8(%r2) +; CHECK-NEXT: lg %r0, 0(%r2) +; CHECK-NEXT: #APP +; CHECK-NEXT: BLA %r4, %r0 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: stg %r5, 8(%r3) +; CHECK-NEXT: stg %r4, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %L = load i128, i128* %Src + %IAsm = call i128 asm "BLA $0, $1", "={r4},{r0}"(i128 %L) + store volatile i128 %IAsm, i128* %Dst + ret void +} + +; Test a tied phys-reg. +define void @fun4(i128* %Src, i128* %Dst) { +; CHECK-LABEL: fun4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lg %r5, 8(%r2) +; CHECK-NEXT: lg %r4, 0(%r2) +; CHECK-NEXT: #APP +; CHECK-NEXT: BLA %r4, %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: stg %r5, 8(%r3) +; CHECK-NEXT: stg %r4, 0(%r3) +; CHECK-NEXT: br %r14 +entry: + %L = load i128, i128* %Src + %IAsm = call i128 asm "BLA $0, $1", "={r4},0"(i128 %L) + store volatile i128 %IAsm, i128* %Dst + ret void +}