diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1197,6 +1197,7 @@ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -166,6 +166,10 @@ // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended. setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); + // Custom lower inline assembly to check for special registers. + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); + setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom); + // PowerPC has an i16 but no i8 (or i1) SEXTLOAD. for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); @@ -3625,6 +3629,57 @@ return Op.getOperand(0); } +SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + PPCFunctionInfo &MFI = *MF.getInfo(); + + assert((Op.getOpcode() == ISD::INLINEASM || + Op.getOpcode() == ISD::INLINEASM_BR) && + "Expecting Inline ASM node."); + + // If an LR store is already known to be required then there is not point in + // checking this ASM as well. + if (MFI.isLRStoreRequired()) + return Op; + + // Inline ASM nodes have an optional last operand that is an incoming Flag of + // type MVT::Glue. We want to ignore this last operand if that is the case. + unsigned NumOps = Op.getNumOperands(); + if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue) + --NumOps; + + // Check all operands that may contain the LR. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = cast(Op.getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: + llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegUse: + case InlineAsm::Kind_Imm: + case InlineAsm::Kind_Mem: + i += NumVals; + break; + case InlineAsm::Kind_Clobber: + case InlineAsm::Kind_RegDef: + case InlineAsm::Kind_RegDefEarlyClobber: { + for (; NumVals; --NumVals, ++i) { + Register Reg = cast(Op.getOperand(i))->getReg(); + if (Reg != PPC::LR && Reg != PPC::LR8) + continue; + MFI.setLRStoreRequired(); + return Op; + } + break; + } + } + } + + return Op; +} + SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { if (Subtarget.isAIXABI()) @@ -10735,6 +10790,8 @@ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); + case ISD::INLINEASM: + case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG); // Variable argument lowering. case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); @@ -15583,6 +15640,11 @@ return std::make_pair(0U, &PPC::VSSRCRegClass); else return std::make_pair(0U, &PPC::VSFRCRegClass); + } else if (Constraint == "lr") { + if (VT == MVT::i64) + return std::make_pair(0U, &PPC::LR8RCRegClass); + else + return std::make_pair(0U, &PPC::LRRCRegClass); } // Handle special cases of physical registers that are not properly handled diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td --- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td @@ -409,6 +409,13 @@ let isAllocatable = 0; } +def LRRC : RegisterClass<"PPC", [i32], 32, (add LR)> { + let isAllocatable = 0; +} +def LR8RC : RegisterClass<"PPC", [i64], 64, (add LR8)> { + let isAllocatable = 0; +} + def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>; def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY, XER)> { let CopyCost = -1; diff --git a/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc64-inlineasm-clobber.ll @@ -0,0 +1,264 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64le-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64LE +; RUN: llc -mtriple=powerpc64-unknown-linux-unknown -verify-machineinstrs %s \ +; RUN: -ppc-asm-full-reg-names -o - | FileCheck %s --check-prefix=PPC64BE + +define dso_local void @ClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr}"() + ret void +} + +define dso_local void @ClobberR5() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR5: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r5}"() + ret void +} + +define dso_local void @ClobberR15() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberR15: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR15: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{r15}"() + ret void +} + +;; Test for INLINEASM_BR +define dso_local signext i32 @ClobberLR_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberLR_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp0: # Block address taken +; PPC64LE-NEXT: .LBB3_2: # %return_early +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberLR_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp0: # Block address taken +; PPC64BE-NEXT: .LBB3_2: # %return_early +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{lr}"(i8* blockaddress(@ClobberLR_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + +define dso_local signext i32 @ClobberR5_BR(i32 signext %in) #0 { +; PPC64LE-LABEL: ClobberR5_BR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: nop +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: # %bb.1: # %return +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; PPC64LE-NEXT: .Ltmp1: # Block address taken +; PPC64LE-NEXT: .LBB4_2: # %return_early +; PPC64LE-NEXT: li r3, 0 +; PPC64LE-NEXT: extsw r3, r3 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberR5_BR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: nop +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: # %bb.1: # %return +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +; PPC64BE-NEXT: .Ltmp1: # Block address taken +; PPC64BE-NEXT: .LBB4_2: # %return_early +; PPC64BE-NEXT: li r3, 0 +; PPC64BE-NEXT: extsw r3, r3 +; PPC64BE-NEXT: blr +entry: + callbr void asm sideeffect "nop", "X,~{r5}"(i8* blockaddress(@ClobberR5_BR, %return_early)) + to label %return [label %return_early] + +return_early: + br label %return + +return: + %retval.0 = phi i32 [ 0, %return_early ], [ %in, %entry ] + ret i32 %retval.0 +} + + + +define dso_local void @DefLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: DefLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: DefLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "={lr}"() + ret void +} + +define dso_local void @EarlyClobberLR() local_unnamed_addr #0 { +; PPC64LE-LABEL: EarlyClobberLR: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -32(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 32 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: EarlyClobberLR: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -48(r1) +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: addi r1, r1, 48 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call i64 asm sideeffect "", "=&{lr}"() + ret void +} + +define dso_local void @ClobberMulti() local_unnamed_addr #0 { +; PPC64LE-LABEL: ClobberMulti: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mflr r0 +; PPC64LE-NEXT: std r15, -136(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r16, -128(r1) # 8-byte Folded Spill +; PPC64LE-NEXT: std r0, 16(r1) +; PPC64LE-NEXT: stdu r1, -176(r1) +; PPC64LE-NEXT: #APP +; PPC64LE-NEXT: #NO_APP +; PPC64LE-NEXT: addi r1, r1, 176 +; PPC64LE-NEXT: ld r0, 16(r1) +; PPC64LE-NEXT: ld r16, -128(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: ld r15, -136(r1) # 8-byte Folded Reload +; PPC64LE-NEXT: mtlr r0 +; PPC64LE-NEXT: blr +; +; PPC64BE-LABEL: ClobberMulti: +; PPC64BE: # %bb.0: # %entry +; PPC64BE-NEXT: mflr r0 +; PPC64BE-NEXT: std r0, 16(r1) +; PPC64BE-NEXT: stdu r1, -192(r1) +; PPC64BE-NEXT: std r15, 56(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: std r16, 64(r1) # 8-byte Folded Spill +; PPC64BE-NEXT: #APP +; PPC64BE-NEXT: #NO_APP +; PPC64BE-NEXT: ld r16, 64(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: ld r15, 56(r1) # 8-byte Folded Reload +; PPC64BE-NEXT: addi r1, r1, 192 +; PPC64BE-NEXT: ld r0, 16(r1) +; PPC64BE-NEXT: mtlr r0 +; PPC64BE-NEXT: blr +entry: + tail call void asm sideeffect "", "~{lr},~{r15},~{r16}"() + ret void +} + +attributes #0 = { nounwind }