diff --git a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp --- a/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp +++ b/llvm/lib/Target/VE/InstPrinter/VEInstPrinter.cpp @@ -60,7 +60,6 @@ switch (MI->getOpcode()) { default: // Expects signed 32bit literals - assert(isInt<32>(MO.getImm()) && "Immediate too large"); int32_t TruncatedImm = static_cast(MO.getImm()); O << TruncatedImm; return; diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -94,16 +94,5 @@ llvm_unreachable("Invalid cond code"); } -// Different to Hi_32/Lo_32 the HI32 and LO32 functions -// preserve the correct numerical value -// on the LLVM data type for MC immediates (int64_t). -inline static int64_t HI32(int64_t imm) { - return (int32_t)(imm >> 32); -} - -inline static int64_t LO32(int64_t imm) { - return (int32_t)(imm); -} - } // namespace llvm #endif diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td --- a/llvm/lib/Target/VE/VECallingConv.td +++ b/llvm/lib/Target/VE/VECallingConv.td @@ -14,6 +14,20 @@ // Aurora VE //===----------------------------------------------------------------------===// +def CC_VE : CallingConv<[ + // All arguments get passed in generic registers if there is space. + + // long long/double --> generic 64 bit registers + CCIfType<[i64], + CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, +]>; + +def RetCC_VE : CallingConv<[ + // long long/double --> generic 64 bit registers + CCIfType<[i64], + CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>, +]>; + // Callee-saved registers def CSR : CalleeSavedRegs<(add (sequence "SX%u", 18, 33))>; def CSR_NoRegs : CalleeSavedRegs<(add)>; diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp --- a/llvm/lib/Target/VE/VEFrameLowering.cpp +++ b/llvm/lib/Target/VE/VEFrameLowering.cpp @@ -124,14 +124,14 @@ // and %s13,%s13,(32)0 // lea.sl %sp,%hi(NumBytes)(%sp, %s13) BuildMI(MBB, MBBI, dl, TII.get(VE::LEAzzi), VE::SX13) - .addImm(LO32(NumBytes)); + .addImm(Lo_32(NumBytes)); BuildMI(MBB, MBBI, dl, TII.get(VE::ANDrm0), VE::SX13) .addReg(VE::SX13) .addImm(32); BuildMI(MBB, MBBI, dl, TII.get(VE::LEASLrri), VE::SX11) .addReg(VE::SX11) .addReg(VE::SX13) - .addImm(HI32(NumBytes)); + .addImm(Hi_32(NumBytes)); } void VEFrameLowering::emitSPExtend(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -41,9 +41,10 @@ bool VETargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg, const SmallVectorImpl &Outs, LLVMContext &Context) const { - assert(!IsVarArg && "TODO implement var args"); - assert(Outs.empty() && "TODO implement return values"); - return true; // TODO support more than 'ret void' + CCAssignFn *RetCC = RetCC_VE; + SmallVector RVLocs; + CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC); } SDValue @@ -52,12 +53,55 @@ const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { - assert(!IsVarArg && "TODO implement var args"); - assert(Outs.empty() && "TODO implement return values"); - assert(OutVals.empty() && "TODO implement return values"); + // CCValAssign - represent the assignment of the return value to locations. + SmallVector RVLocs; + + // CCState - Info about the registers and stack slot. + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs, + *DAG.getContext()); + // Analyze return values. + CCInfo.AnalyzeReturn(Outs, RetCC_VE); + + SDValue Flag; SmallVector RetOps(1, Chain); + + // Copy the result values into the output registers. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + SDValue OutVal = OutVals[i]; + + // Integer return values must be sign or zero extended by the callee. + switch (VA.getLocInfo()) { + case CCValAssign::Full: + break; + case CCValAssign::SExt: + OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::ZExt: + OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal); + break; + case CCValAssign::AExt: + OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal); + break; + default: + llvm_unreachable("Unknown loc info!"); + } + + Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag); + + // Guarantee that all emitted copies are stuck together with flags. + Flag = Chain.getValue(1); + RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + } + RetOps[0] = Chain; // Update chain. + + // Add the flag if we have it. + if (Flag.getNode()) + RetOps.push_back(Flag); + return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps); } @@ -65,8 +109,61 @@ SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Get the size of the preserved arguments area + unsigned ArgsPreserved = 64; + + // Analyze arguments according to CC_VE. + SmallVector ArgLocs; + CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs, + *DAG.getContext()); + // Allocate the preserved area first. + CCInfo.AllocateStack(ArgsPreserved, 8); + // We already allocated the preserved area, so the stack offset computed + // by CC_VE would be correct now. + CCInfo.AnalyzeFormalArguments(Ins, CC_VE); + + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + assert(VA.isRegLoc() && "TODO implement argument passing on stack"); + if (VA.isRegLoc()) { + // This argument is passed in a register. + // All integer register arguments are promoted by the caller to i64. + + // Create a virtual register for the promoted live-in value. + unsigned VReg = + MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT())); + SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT()); + + assert((VA.getValVT() == MVT::i64) && + "TODO implement other argument types than i64"); + + // The caller promoted the argument, so insert an Assert?ext SDNode so we + // won't promote the value again in this function. + switch (VA.getLocInfo()) { + case CCValAssign::SExt: + Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + case CCValAssign::ZExt: + Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg, + DAG.getValueType(VA.getValVT())); + break; + default: + break; + } + + // Truncate the register down to the argument type. + if (VA.isExtInLoc()) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); + + InVals.push_back(Arg); + continue; + } + } + assert(!IsVarArg && "TODO implement var args"); - assert(Ins.empty() && "TODO implement input arguments"); return Chain; } diff --git a/llvm/lib/Target/VE/VEInstrInfo.h b/llvm/lib/Target/VE/VEInstrInfo.h --- a/llvm/lib/Target/VE/VEInstrInfo.h +++ b/llvm/lib/Target/VE/VEInstrInfo.h @@ -37,6 +37,10 @@ /// const VERegisterInfo &getRegisterInfo() const { return RI; } + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const override; + // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -38,6 +38,23 @@ : VEGenInstrInfo(VE::ADJCALLSTACKDOWN, VE::ADJCALLSTACKUP), RI(), Subtarget(ST) {} +void VEInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, const DebugLoc &DL, + MCRegister DestReg, MCRegister SrcReg, + bool KillSrc) const { + + if (VE::I64RegClass.contains(SrcReg) && VE::I64RegClass.contains(DestReg)) { + BuildMI(MBB, I, DL, get(VE::ORri), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + } else { + const TargetRegisterInfo *TRI = &getRegisterInfo(); + dbgs() << "Impossible reg-to-reg copy from " << printReg(SrcReg, TRI) + << " to " << printReg(DestReg, TRI) << "\n"; + llvm_unreachable("Impossible reg-to-reg copy"); + } +} + bool VEInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { switch (MI.getOpcode()) { case VE::EXTEND_STACK: { diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -26,7 +26,23 @@ def simm7 : PatLeaf<(imm), [{ return isInt<7>(N->getSExtValue()); }]>; def simm32 : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>; +def uimm32 : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>; def uimm6 : PatLeaf<(imm), [{ return isUInt<6>(N->getZExtValue()); }]>; +def lomsbzero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0x80000000) + == 0; }]>; +def lozero : PatLeaf<(imm), [{ return (N->getZExtValue() & 0xffffffff) + == 0; }]>; + +def LO32 : SDNodeXFormgetTargetConstant(Lo_32(N->getZExtValue()), + SDLoc(N), MVT::i64); +}]>; + +def HI32 : SDNodeXFormgetTargetConstant(Hi_32(N->getZExtValue()), + SDLoc(N), MVT::i32); +}]>; // ASX format of memory address def MEMri : Operand { @@ -119,6 +135,14 @@ let cz = 1; let hasSideEffects = 0; } + def rzi : RM< + opc, (outs RC:$sx), (ins RC:$sz, immOp2:$imm32), + !strconcat(opcStr, " $sx, ${imm32}(${sz})")> { + let cy = 0; + let sy = 0; + let cz = 1; + let hasSideEffects = 0; + } def zzi : RM< opc, (outs RC:$sx), (ins immOp2:$imm32), !strconcat(opcStr, " $sx, $imm32")> { @@ -172,6 +196,18 @@ } } +multiclass RRNDmimopc, + RegisterClass RCo, ValueType Tyo, + RegisterClass RCi, ValueType Tyi, + Operand immOp, Operand immOp2> { + def im1 : RR { + let cy = 0; + let cz = 0; + let hasSideEffects = 0; + } +} + // Used by add, mul, div, and similar commutative instructions // The order of operands are "$sx, $sy, $sz" @@ -180,7 +216,8 @@ RRmrr, RRmri, RRmiz, - RRNDmrm; + RRNDmrm, + RRNDmim; // Branch multiclass let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in @@ -265,6 +302,24 @@ 0x3F, (outs), (ins), "monc">; +//===----------------------------------------------------------------------===// +// Pattern Matchings +//===----------------------------------------------------------------------===// + +// Small immediates. +def : Pat<(i64 simm7:$val), (ORim1 imm:$val, 0)>; +// Medium immediates. +def : Pat<(i64 simm32:$val), (LEAzzi imm:$val)>; +def : Pat<(i64 uimm32:$val), (ANDrm0 (LEAzzi imm:$val), 32)>; +// Arbitrary immediates. +def : Pat<(i64 lozero:$val), + (LEASLzzi (HI32 imm:$val))>; +def : Pat<(i64 lomsbzero:$val), + (LEASLrzi (LEAzzi (LO32 imm:$val)), (HI32 imm:$val))>; +def : Pat<(i64 imm:$val), + (LEASLrzi (ANDrm0 (LEAzzi (LO32 imm:$val)), 32), + (HI32 imm:$val))>; + //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/VE/constants_i64.ll b/llvm/test/CodeGen/VE/constants_i64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/constants_i64.ll @@ -0,0 +1,157 @@ +; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s + +define i64 @p0i64() { +; CHECK-LABEL: p0i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 0 +} + +define signext i64 @p0si64() { +; CHECK-LABEL: p0si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 0 +} + +define zeroext i64 @p0zi64() { +; CHECK-LABEL: p0zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: or %s0, 0, (0)1 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 0 +} + +define i64 @p128i64() { +; CHECK-LABEL: p128i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 128 +} + +define signext i64 @p128si64() { +; CHECK-LABEL: p128si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 128 +} + +define zeroext i64 @p128zi64() { +; CHECK-LABEL: p128zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 128 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 128 +} + +define i64 @p2264924160i64() { +; CHECK-LABEL: p2264924160i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2030043136 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2264924160 +} + +define signext i64 @p2264924160si64() { +; CHECK-LABEL: p2264924160si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2030043136 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2264924160 +} + +define zeroext i64 @p2264924160zi64() { +; CHECK-LABEL: p2264924160zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2030043136 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2264924160 +} + +define i64 @p2147483647i64() { +; CHECK-LABEL: p2147483647i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2147483647 +} + +define signext i64 @p2147483647si64() { +; CHECK-LABEL: p2147483647si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2147483647 +} + +define zeroext i64 @p2147483647zi64() { +; CHECK-LABEL: p2147483647zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 2147483647 +} + +define i64 @p15032385535i64() { +; CHECK-LABEL: p15032385535i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385535 +} + +define signext i64 @p15032385535si64() { +; CHECK-LABEL: p15032385535si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385535 +} + +define zeroext i64 @p15032385535zi64() { +; CHECK-LABEL: p15032385535zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, 2147483647 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385535 +} + +define i64 @p15032385536i64() { +; CHECK-LABEL: p15032385536i64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2147483648 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385536 +} + +define signext i64 @p15032385536si64() { +; CHECK-LABEL: p15032385536si64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2147483648 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385536 +} + +define zeroext i64 @p15032385536zi64() { +; CHECK-LABEL: p15032385536zi64: +; CHECK: .LBB{{[0-9]+}}_2: +; CHECK-NEXT: lea %s0, -2147483648 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea.sl %s0, 3(%s0) +; CHECK-NEXT: or %s11, 0, %s9 + ret i64 15032385536 +} diff --git a/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll b/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll --- a/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll +++ b/llvm/test/CodeGen/VE/simple_prologue_epilogue.ll @@ -30,3 +30,66 @@ ; CHECK-NEXT: b.l (,%lr) ret void } + +define i64 @func1(i64) { +; CHECK-LABEL: func1: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (,%s11) +; CHECK-NEXT: st %s10, 8(,%s11) +; CHECK-NEXT: st %s15, 24(,%s11) +; CHECK-NEXT: st %s16, 32(,%s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -176 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s11, %s13) +; CHECK-NEXT: brge.l %s11, %s8, .LBB1_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(,%s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(,%s11) +; CHECK-NEXT: ld %s15, 24(,%s11) +; CHECK-NEXT: ld %s10, 8(,%s11) +; CHECK-NEXT: ld %s9, (,%s11) +; CHECK-NEXT: b.l (,%lr) + ret i64 %0 +} + +define i64 @func2(i64, i64, i64, i64, i64) { +; CHECK-LABEL: func2: +; CHECK: # %bb.0: +; CHECK-NEXT: st %s9, (,%s11) +; CHECK-NEXT: st %s10, 8(,%s11) +; CHECK-NEXT: st %s15, 24(,%s11) +; CHECK-NEXT: st %s16, 32(,%s11) +; CHECK-NEXT: or %s9, 0, %s11 +; CHECK-NEXT: lea %s13, -176 +; CHECK-NEXT: and %s13, %s13, (32)0 +; CHECK-NEXT: lea.sl %s11, -1(%s11, %s13) +; CHECK-NEXT: brge.l %s11, %s8, .LBB2_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: ld %s61, 24(,%s14) +; CHECK-NEXT: or %s62, 0, %s0 +; CHECK-NEXT: lea %s63, 315 +; CHECK-NEXT: shm.l %s63, (%s61) +; CHECK-NEXT: shm.l %s8, 8(%s61) +; CHECK-NEXT: shm.l %s11, 16(%s61) +; CHECK-NEXT: monc +; CHECK-NEXT: or %s0, 0, %s62 +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: or %s0, 0, %s4 +; CHECK-NEXT: or %s11, 0, %s9 +; CHECK-NEXT: ld %s16, 32(,%s11) +; CHECK-NEXT: ld %s15, 24(,%s11) +; CHECK-NEXT: ld %s10, 8(,%s11) +; CHECK-NEXT: ld %s9, (,%s11) +; CHECK-NEXT: b.l (,%lr) + ret i64 %4 +}