Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1484,6 +1484,14 @@ llvm_unreachable("Unsupported extended type!"); } + /// InlineAsm operands may not follow the general treatment during + /// instruction selection: on SystemZ i128 is split in two parts generally, + /// but in inline assembly it is just one operand. + virtual unsigned getNumRegistersForInlineAsmOp(LLVMContext &Context, + EVT VT) const { + return getNumRegisters(Context, VT); + } + /// Certain combinations of ABIs, Targets and features require that types /// are legal for some operations and not for other operations. /// For MIPS all vector types must be passed through the integer register set. @@ -3661,7 +3669,8 @@ virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const { + Optional CC, + const Value *V = nullptr) const { return false; } @@ -3670,7 +3679,8 @@ joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, - Optional CC) const { + Optional CC, + const Value *V = nullptr) const { return SDValue(); } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -169,7 +169,7 @@ // Let the target assemble the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, - PartVT, ValueVT, CC)) + PartVT, ValueVT, CC, V)) return Val; if (ValueVT.isVector()) @@ -478,7 +478,7 @@ // Let the target split the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, - CallConv)) + CallConv, V)) return; EVT ValueVT = Val.getValueType(); @@ -979,7 +979,8 @@ } for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + unsigned NumRegs = + TLI.getNumRegistersForInlineAsmOp(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); @@ -8178,7 +8179,7 @@ // remember that AX is actually i16 to get the right extension. const MVT RegVT = *TRI.legalclasstypes_begin(*RC); - if (OpInfo.ConstraintVT != MVT::Other) { + if (OpInfo.ConstraintVT != MVT::Other && RegVT != MVT::Untyped) { // If this is an FP operand in an integer register (or visa versa), or more // generally if the operand value disagrees with the register class we plan // to stick it in, fix the operand type. @@ -8225,7 +8226,7 @@ // Initialize NumRegs. unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) - NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + NumRegs = TLI.getNumRegistersForInlineAsmOp(Context, OpInfo.ConstraintVT); // If this is a constraint for a specific physical register, like {r17}, // assign it now. @@ -8551,7 +8552,15 @@ MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); SmallVector Regs; - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) { + const TargetRegisterClass *RC = nullptr; + if (RegVT == MVT::Untyped) { + const MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + RC = TLI.getRegForInlineAsmConstraint(&TRI, RefOpInfo.ConstraintCode, + OpInfo.ConstraintVT).second; + } else + RC = TLI.getRegClassFor(RegVT); + if (RC) { unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -846,13 +846,15 @@ bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const override; + Optional CC, + const Value *V = nullptr) const override; SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, - Optional CC) const override; + Optional CC, + const Value *V = nullptr) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4215,7 +4215,8 @@ bool ARMTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, Optional CC, + const Value *V) const { bool IsABIRegCopy = CC.hasValue(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && @@ -4233,7 +4234,8 @@ SDValue ARMTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, Optional CC, + const Value *V) const { bool IsABIRegCopy = CC.hasValue(); if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) { Index: llvm/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -449,13 +449,15 @@ bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, - Optional CC) const override; + Optional CC, + const Value *V = nullptr) const override; SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, - Optional CC) const override; + Optional CC, + const Value *V = nullptr) const override; static RISCVVLMUL getLMUL(MVT VT); static unsigned getRegClassIDForLMUL(RISCVVLMUL LMul); Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -7755,7 +7755,8 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, - unsigned NumParts, MVT PartVT, Optional CC) const { + unsigned NumParts, MVT PartVT, Optional CC, + const Value *V) const { bool IsABIRegCopy = CC.hasValue(); EVT ValueVT = Val.getValueType(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { @@ -7797,7 +7798,8 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, Optional CC) const { + MVT PartVT, EVT ValueVT, Optional CC, + const Value *V) const { bool IsABIRegCopy = CC.hasValue(); if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) { SDValue Val = Parts[0]; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -422,6 +422,12 @@ return TypeWidenVector; return TargetLoweringBase::getPreferredVectorAction(VT); } + unsigned getNumRegistersForInlineAsmOp(LLVMContext &Context, + EVT VT) const override { + if (VT == MVT::i128) + return 1; + return getNumRegisters(Context, VT); + } bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; @@ -517,6 +523,17 @@ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; bool allowTruncateForTailCall(Type *, Type *) const override; bool mayBeEmittedAsTailCall(const CallInst *CI) const override; + bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, + SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, + Optional CC, + const Value *V = nullptr) const override; + SDValue + joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, + Optional CC, + const Value *V = nullptr) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1365,6 +1365,56 @@ } } +static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(0, DL)); + SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, + DAG.getIntPtrConstant(1, DL)); + SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, + MVT::Untyped, Hi, Lo); + return SDValue(Pair, 0); +} + +static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { + SDLoc DL(In); + SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, + DL, MVT::i64, In); + SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, + DL, MVT::i64, In); + return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); +} + +static bool isInlineAsm(const Value *V) { + if (const CallInst *CI = dyn_cast(V)) + return CI->isInlineAsm(); + return false; +} + +bool SystemZTargetLowering::splitValueIntoRegisterParts( + SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, Optional CC, + const Value *V) const { + EVT ValueVT = Val.getValueType(); + if (ValueVT == MVT::i128 && V && isInlineAsm(V)) { + assert((PartVT == MVT::Untyped && NumParts == 1) && "Unknown i128 op."); + Parts[0] = lowerI128ToGR128(DAG, Val); + return true; + } + return false; +} + +SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( + SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, Optional CC, + const Value *V) const { + if (ValueVT == MVT::i128 && V && isInlineAsm(V)) { + assert((PartVT == MVT::Untyped && NumParts == 1) && "Unknown i128 op."); + return lowerGR128ToI128(DAG, Parts[0]); + } + return SDValue(); +} + SDValue SystemZTargetLowering::LowerFormalArguments( SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, @@ -5488,27 +5538,6 @@ // Lower operations with invalid operand or result types (currently used // only for 128-bit integer types). - -static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(0, DL)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In, - DAG.getIntPtrConstant(1, DL)); - SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, - MVT::Untyped, Hi, Lo); - return SDValue(Pair, 0); -} - -static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { - SDLoc DL(In); - SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, - DL, MVT::i64, In); - SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, - DL, MVT::i64, In); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); -} - void SystemZTargetLowering::LowerOperationWrapper(SDNode *N, SmallVectorImpl &Results, Index: llvm/test/CodeGen/SystemZ/inline-asm-i128.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/inline-asm-i128.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=s390x-linux-gnu < %s | FileCheck %s +; +; Test i128 (tied) operands. + +define i32 @clcl(i8* %p1, i32 signext %l1, i8* %p2, i32 signext %l2, i8 zeroext %pad) { +; CHECK-LABEL: clcl: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lgr %r0, %r5 +; CHECK-NEXT: # kill: def $r4d killed $r4d def $r4q +; CHECK-NEXT: lgr %r1, %r3 +; CHECK-NEXT: # kill: def $r2d killed $r2d def $r2q +; CHECK-NEXT: sllg %r5, %r6, 24 +; CHECK-NEXT: rosbg %r5, %r0, 40, 63, 0 +; CHECK-NEXT: risbg %r3, %r1, 40, 191, 0 +; CHECK-NEXT: #APP +; CHECK-NEXT: clcl %r2, %r4 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ogr %r3, %r5 +; CHECK-NEXT: risbg %r0, %r3, 40, 191, 0 +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: afi %r2, -268435456 +; CHECK-NEXT: srl %r2, 31 +; CHECK-NEXT: br %r14 +entry: + %0 = ptrtoint i8* %p1 to i64 + %1 = ptrtoint i8* %p2 to i64 + %and5 = and i32 %l2, 16777215 + %2 = zext i32 %and5 to i64 + %conv7 = zext i8 %pad to i64 + %shl = shl nuw nsw i64 %conv7, 24 + %or = or i64 %shl, %2 + %u1.sroa.0.0.insert.ext = zext i64 %0 to i128 + %u1.sroa.0.0.insert.shift = shl nuw i128 %u1.sroa.0.0.insert.ext, 64 + %3 = and i32 %l1, 16777215 + %u1.sroa.0.0.insert.mask = zext i32 %3 to i128 + %u1.sroa.0.0.insert.insert = or i128 %u1.sroa.0.0.insert.shift, %u1.sroa.0.0.insert.mask + %u2.sroa.5.0.insert.ext = zext i64 %or to i128 + %u2.sroa.0.0.insert.ext = zext i64 %1 to i128 + %u2.sroa.0.0.insert.shift = shl nuw i128 %u2.sroa.0.0.insert.ext, 64 + %u2.sroa.0.0.insert.insert = or i128 %u2.sroa.0.0.insert.shift, %u2.sroa.5.0.insert.ext + %4 = tail call { i128, i128 } asm "CLCL $0,$1", "=r,=r,0,1"(i128 %u1.sroa.0.0.insert.insert, i128 %u2.sroa.0.0.insert.insert) + %asmresult = extractvalue { i128, i128 } %4, 0 + %asmresult11 = extractvalue { i128, i128 } %4, 1 + %5 = or i128 %asmresult, %asmresult11 + %6 = and i128 %5, 16777215 + %7 = icmp eq i128 %6, 0 + %land.ext = zext i1 %7 to i32 + ret i32 %land.ext +}