Index: llvm/lib/Target/PowerPC/PPCISelLowering.h
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1,1239 +1,1243 @@
-//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that PPC uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
-#define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
-
-#include "PPCInstrInfo.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Metadata.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/MachineValueType.h"
-#include <utility>
-
-namespace llvm {
-
-  namespace PPCISD {
-
-    // When adding a NEW PPCISD node please add it to the correct position in
-    // the enum. The order of elements in this enum matters!
-    // Values that are added after this entry:
-    //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
-    // are considered memory opcodes and are treated differently than entries
-    // that come before it. For example, ADD or MUL should be placed before
-    // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
-    // after it.
-    enum NodeType : unsigned {
-      // Start the numbering where the builtin ops and target ops leave off.
-      FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
-      /// FSEL - Traditional three-operand fsel node.
-      ///
-      FSEL,
+  //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+  //
+  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+  // See https://llvm.org/LICENSE.txt for license information.
+  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  //
+  //===----------------------------------------------------------------------===//
+  //
+  // This file defines the interfaces that PPC uses to lower LLVM code into a
+  // selection DAG.
+  //
+  //===----------------------------------------------------------------------===//
+
+  #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+  #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
+
+  #include "PPCInstrInfo.h"
+  #include "llvm/CodeGen/CallingConvLower.h"
+  #include "llvm/CodeGen/MachineFunction.h"
+  #include "llvm/CodeGen/MachineMemOperand.h"
+  #include "llvm/CodeGen/SelectionDAG.h"
+  #include "llvm/CodeGen/SelectionDAGNodes.h"
+  #include "llvm/CodeGen/TargetLowering.h"
+  #include "llvm/CodeGen/ValueTypes.h"
+  #include "llvm/IR/Attributes.h"
+  #include "llvm/IR/CallingConv.h"
+  #include "llvm/IR/Function.h"
+  #include "llvm/IR/InlineAsm.h"
+  #include "llvm/IR/Metadata.h"
+  #include "llvm/IR/Type.h"
+  #include "llvm/Support/MachineValueType.h"
+  #include <utility>
+
+  namespace llvm {
+
+    namespace PPCISD {
+
+      // When adding a NEW PPCISD node please add it to the correct position in
+      // the enum. The order of elements in this enum matters!
+      // Values that are added after this entry:
+      //     STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE
+      // are considered memory opcodes and are treated differently than entries
+      // that come before it. For example, ADD or MUL should be placed before
+      // the ISD::FIRST_TARGET_MEMORY_OPCODE while a LOAD or STORE should come
+      // after it.
+      enum NodeType : unsigned {
+        // Start the numbering where the builtin ops and target ops leave off.
+        FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+        /// FSEL - Traditional three-operand fsel node.
+        ///
+        FSEL,
+
+        /// FCFID - The FCFID instruction, taking an f64 operand and producing
+        /// and f64 value containing the FP representation of the integer that
+        /// was temporarily in the f64 operand.
+        FCFID,
+
+        /// Newer FCFID[US] integer-to-floating-point conversion instructions for
+        /// unsigned integers and single-precision outputs.
+        FCFIDU, FCFIDS, FCFIDUS,
+
+        /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+        /// operand, producing an f64 value containing the integer representation
+        /// of that FP value.
+        FCTIDZ, FCTIWZ,
+
+        /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
+        /// unsigned integers with round toward zero.
+        FCTIDUZ, FCTIWUZ,
+
+        /// Floating-point-to-interger conversion instructions
+        FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
+
+        /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+        /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+        VEXTS,
+
+        /// SExtVElems, takes an input vector of a smaller type and sign
+        /// extends to an output vector of a larger type.
+        SExtVElems,
+
+        /// Reciprocal estimate instructions (unary FP ops).
+        FRE, FRSQRTE,
+
+        // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+        // three v4f32 operands and producing a v4f32 result.
+        VMADDFP, VNMSUBFP,
+
+        /// VPERM - The PPC VPERM Instruction.
+        ///
+        VPERM,
+
+        /// XXSPLT - The PPC VSX splat instructions
+        ///
+        XXSPLT,
+
+        /// VECINSERT - The PPC vector insert instruction
+        ///
+        VECINSERT,
+
+        /// XXREVERSE - The PPC VSX reverse instruction
+        ///
+        XXREVERSE,
+
+        /// VECSHL - The PPC vector shift left instruction
+        ///
+        VECSHL,
+
+        /// XXPERMDI - The PPC XXPERMDI instruction
+        ///
+        XXPERMDI,
+
+        /// The CMPB instruction (takes two operands of i32 or i64).
+        CMPB,
+
+        /// Hi/Lo - These represent the high and low 16-bit parts of a global
+        /// address respectively.  These nodes have two operands, the first of
+        /// which must be a TargetGlobalAddress, and the second of which must be a
+        /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
+        /// though these are usually folded into other nodes.
+        Hi, Lo,
+
+        /// The following two target-specific nodes are used for calls through
+        /// function pointers in the 64-bit SVR4 ABI.
+
+        /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+        /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+        /// compute an allocation on the stack.
+        DYNALLOC,
+
+        /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+        /// compute an offset from native SP to the address  of the most recent
+        /// dynamic alloca.
+        DYNAREAOFFSET,
+
+        /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+        /// at function entry, used for PIC code.
+        GlobalBaseReg,
+
+        /// These nodes represent PPC shifts.
+        ///
+        /// For scalar types, only the last `n + 1` bits of the shift amounts
+        /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
+        /// for exact behaviors.
+        ///
+        /// For vector types, only the last n bits are used. See vsld.
+        SRL, SRA, SHL,
+
+        /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
+        /// word and shift left immediate.
+        EXTSWSLI,
+
+        /// The combination of sra[wd]i and addze used to implemented signed
+        /// integer division by a power of 2. The first operand is the dividend,
+        /// and the second is the constant shift amount (representing the
+        /// divisor).
+        SRA_ADDZE,
+
+        /// CALL - A direct function call.
+        /// CALL_NOP is a call with the special NOP which follows 64-bit
+        /// SVR4 calls and 32-bit/64-bit AIX calls.
+        CALL, CALL_NOP,
+
+        /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+        /// MTCTR instruction.
+        MTCTR,
+
+        /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+        /// BCTRL instruction.
+        BCTRL,
+
+        /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
+        /// instruction and the TOC reload required on SVR4 PPC64.
+        BCTRL_LOAD_TOC,
+
+        /// Return with a flag operand, matched by 'blr'
+        RET_FLAG,
+
+        /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+        /// This copies the bits corresponding to the specified CRREG into the
+        /// resultant GPR.  Bits corresponding to other CR regs are undefined.
+        MFOCRF,
+
+        /// Direct move from a VSX register to a GPR
+        MFVSR,
+
+        /// Direct move from a GPR to a VSX register (algebraic)
+        MTVSRA,
+
+        /// Direct move from a GPR to a VSX register (zero)
+        MTVSRZ,
+
+        /// Direct move of 2 consecutive GPR to a VSX register.
+        BUILD_FP128,
+
+        /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
+        /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
+        /// unsupported for this target.
+        /// Merge 2 GPRs to a single SPE register.
+        BUILD_SPE64,
+
+        /// Extract SPE register component, second argument is high or low.
+        EXTRACT_SPE,
+
+        /// Extract a subvector from signed integer vector and convert to FP.
+        /// It is primarily used to convert a (widened) illegal integer vector
+        /// type to a legal floating point vector type.
+        /// For example v2i32 -> widened to v4i32 -> v2f64
+        SINT_VEC_TO_FP,
+
+        /// Extract a subvector from unsigned integer vector and convert to FP.
+        /// As with SINT_VEC_TO_FP, used for converting illegal types.
+        UINT_VEC_TO_FP,
+
+        // FIXME: Remove these once the ANDI glue bug is fixed:
+        /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
+        /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
+        /// implement truncation of i32 or i64 to i1.
+        ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
+
+        // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
+        // target (returns (Lo, Hi)). It takes a chain operand.
+        READ_TIME_BASE,
+
+        // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
+        EH_SJLJ_SETJMP,
+
+        // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
+        EH_SJLJ_LONGJMP,
+
+        /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+        /// instructions.  For lack of better number, we use the opcode number
+        /// encoding for the OPC field to identify the compare.  For example, 838
+        /// is VCMPGTSH.
+        VCMP,
+
+        /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+        /// altivec VCMP*o instructions.  For lack of better number, we use the
+        /// opcode number encoding for the OPC field to identify the compare.  For
+        /// example, 838 is VCMPGTSH.
+        VCMPo,
+
+        /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+        /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
+        /// condition register to branch on, OPC is the branch opcode to use (e.g.
+        /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+        /// an optional input flag argument.
+        COND_BRANCH,
+
+        /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+        /// loops.
+        BDNZ, BDZ,
+
+        /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
+        /// towards zero.  Used only as part of the long double-to-int
+        /// conversion sequence.
+        FADDRTZ,
+
+        /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
+        MFFS,
+
+        /// TC_RETURN - A tail call return.
+        ///   operand #0 chain
+        ///   operand #1 callee (register or absolute)
+        ///   operand #2 stack adjustment
+        ///   operand #3 optional in flag
+        TC_RETURN,
+
+        /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
+        CR6SET,
+        CR6UNSET,
+
+        /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
+        /// for non-position independent code on PPC32.
+        PPC32_GOT,
+
+        /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
+        /// local dynamic TLS and position indendepent code on PPC32.
+        PPC32_PICGOT,
+
+        /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
+        /// TLS model, produces an ADDIS8 instruction that adds the GOT
+        /// base to sym\@got\@tprel\@ha.
+        ADDIS_GOT_TPREL_HA,
+
+        /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
+        /// TLS model, produces a LD instruction with base register G8RReg
+        /// and offset sym\@got\@tprel\@l.  This completes the addition that
+        /// finds the offset of "sym" relative to the thread pointer.
+        LD_GOT_TPREL_L,
+
+        /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
+        /// model, produces an ADD instruction that adds the contents of
+        /// G8RReg to the thread pointer.  Symbol contains a relocation
+        /// sym\@tls which is to be replaced by the thread pointer and
+        /// identifies to the linker that the instruction is part of a
+        /// TLS sequence.
+        ADD_TLS,
+
+        /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
+        /// model, produces an ADDIS8 instruction that adds the GOT base
+        /// register to sym\@got\@tlsgd\@ha.
+        ADDIS_TLSGD_HA,
+
+        /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
+        /// model, produces an ADDI8 instruction that adds G8RReg to
+        /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
+        /// ADDIS_TLSGD_L_ADDR until after register assignment.
+        ADDI_TLSGD_L,
+
+        /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
+        /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
+        /// ADDIS_TLSGD_L_ADDR until after register assignment.
+        GET_TLS_ADDR,
+
+        /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
+        /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
+        /// register assignment.
+        ADDI_TLSGD_L_ADDR,
+
+        /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
+        /// model, produces an ADDIS8 instruction that adds the GOT base
+        /// register to sym\@got\@tlsld\@ha.
+        ADDIS_TLSLD_HA,
+
+        /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
+        /// model, produces an ADDI8 instruction that adds G8RReg to
+        /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
+        /// ADDIS_TLSLD_L_ADDR until after register assignment.
+        ADDI_TLSLD_L,
+
+        /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
+        /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
+        /// ADDIS_TLSLD_L_ADDR until after register assignment.
+        GET_TLSLD_ADDR,
+
+        /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
+        /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
+        /// following register assignment.
+        ADDI_TLSLD_L_ADDR,
+
+        /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
+        /// model, produces an ADDIS8 instruction that adds X3 to
+        /// sym\@dtprel\@ha.
+        ADDIS_DTPREL_HA,
+
+        /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
+        /// model, produces an ADDI8 instruction that adds G8RReg to
+        /// sym\@got\@dtprel\@l.
+        ADDI_DTPREL_L,
+
+        /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
+        /// during instruction selection to optimize a BUILD_VECTOR into
+        /// operations on splats.  This is necessary to avoid losing these
+        /// optimizations due to constant folding.
+        VADD_SPLAT,
+
+        /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
+        /// operand identifies the operating system entry point.
+        SC,
+
+        /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
+        CLRBHRB,
+
+        /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
+        /// history rolling buffer entry.
+        MFBHRBE,
+
+        /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
+        RFEBB,
+
+        /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
+        /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
+        /// or stxvd2x instruction.  The chain is necessary because the
+        /// sequence replaces a load and needs to provide the same number
+        /// of outputs.
+        XXSWAPD,
+
+        /// An SDNode for swaps that are not associated with any loads/stores
+        /// and thereby have no chain.
+        SWAP_NO_CHAIN,
+        
+        /// An SDNode for Power9 vector absolute value difference.
+        /// operand #0 vector
+        /// operand #1 vector
+        /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
+        /// the most significant bit for signed i32
+        ///
+        /// Power9 VABSD* instructions are designed to support unsigned integer
+        /// vectors (byte/halfword/word), if we want to make use of them for signed
+        /// integer vectors, we have to flip their sign bits first. To flip sign bit
+        /// for byte/halfword integer vector would become inefficient, but for word
+        /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
+        /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
+        ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
+        VABSD,
+
+        /// QVFPERM = This corresponds to the QPX qvfperm instruction.
+        QVFPERM,
+
+        /// QVGPCI = This corresponds to the QPX qvgpci instruction.
+        QVGPCI,
+
+        /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
+        QVALIGNI,
+
+        /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
+        QVESPLATI,
+
+        /// QBFLT = Access the underlying QPX floating-point boolean
+        /// representation.
+        QBFLT,
+
+        /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
+        /// lower (IDX=1) half of v4f32 to v2f64.
+        FP_EXTEND_HALF,
+
+        /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+        /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
+        /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
+        /// i32.
+        STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+        /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+        /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
+        /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
+        /// or i32.
+        LBRX,
+
+        /// STFIWX - The STFIWX instruction.  The first operand is an input token
+        /// chain, then an f64 value to store, then an address to store it to.
+        STFIWX,
+
+        /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
+        /// load which sign-extends from a 32-bit integer value into the
+        /// destination 64-bit register.
+        LFIWAX,
+
+        /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
+        /// load which zero-extends from a 32-bit integer value into the
+        /// destination 64-bit register.
+        LFIWZX,
+
+        /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+        /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+        /// This can be used for converting loaded integers to floating point.
+        LXSIZX,
+
+        /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+        /// chain, then an f64 value to store, then an address to store it to,
+        /// followed by a byte-width for the store.
+        STXSIX,
+
+        /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
+        /// Maps directly to an lxvd2x instruction that will be followed by
+        /// an xxswapd.
+        LXVD2X,
+
+        /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
+        /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
+        /// the vector type to load vector in big-endian element order.
+        LOAD_VEC_BE,
+
+        /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
+        /// v2f32 value into the lower half of a VSR register.
+        LD_VSX_LH,
+
+        /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
+        /// instructions such as LXVDSX, LXVWSX.
+        LD_SPLAT,
+
+        /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
+        /// Maps directly to an stxvd2x instruction that will be preceded by
+        /// an xxswapd.
+        STXVD2X,
+
+        /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
+        /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
+        /// the vector type to store vector in big-endian element order.
+        STORE_VEC_BE,
+
+        /// Store scalar integers from VSR.
+        ST_VSR_SCAL_INT,
+
+        /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
+        /// The 4xf32 load used for v4i1 constants.
+        QVLFSb,
+
+        /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
+        /// except they ensure that the compare input is zero-extended for
+        /// sub-word versions because the atomic loads zero-extend.
+        ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
+
+        /// GPRC = TOC_ENTRY GA, TOC
+        /// Loads the entry for GA from the TOC, where the TOC base is given by
+        /// the last operand.
+        TOC_ENTRY
+      };
+
+    } // end namespace PPCISD
+
+    /// Define some predicates that are used for node matching.
+    namespace PPC {
+
+      /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+      /// VPKUHUM instruction.
+      bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+                                SelectionDAG &DAG);
+
+      /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+      /// VPKUWUM instruction.
+      bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+                                SelectionDAG &DAG);
+
+      /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
+      /// VPKUDUM instruction.
+      bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
+                                SelectionDAG &DAG);
+
+      /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+      /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+      bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                              unsigned ShuffleKind, SelectionDAG &DAG);
+
+      /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+      /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+      bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                              unsigned ShuffleKind, SelectionDAG &DAG);
+
+      /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
+      /// a VMRGEW or VMRGOW instruction
+      bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
+                              unsigned ShuffleKind, SelectionDAG &DAG);
+      /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXSLDWI instruction.
+      bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                                bool &Swap, bool IsLE);
+
+      /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXBRH instruction.
+      bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
+
+      /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXBRW instruction.
+      bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
+
+      /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXBRD instruction.
+      bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
+
+      /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXBRQ instruction.
+      bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
+
+      /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
+      /// for a XXPERMDI instruction.
+      bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                                bool &Swap, bool IsLE);
+
+      /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
+      /// shift amount, otherwise return -1.
+      int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
+                              SelectionDAG &DAG);
 
-      /// FCFID - The FCFID instruction, taking an f64 operand and producing
-      /// and f64 value containing the FP representation of the integer that
-      /// was temporarily in the f64 operand.
-      FCFID,
+      /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+      /// specifies a splat of a single element that is suitable for input to
+      /// VSPLTB/VSPLTH/VSPLTW.
+      bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+      /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
+      /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
+      /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
+      /// vector into the other. This function will also set a couple of
+      /// output parameters for how much the source vector needs to be shifted and
+      /// what byte number needs to be specified for the instruction to put the
+      /// element in the desired location of the target vector.
+      bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
+                          unsigned &InsertAtByte, bool &Swap, bool IsLE);
+
+      /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
+      /// appropriate for PPC mnemonics (which have a big endian bias - namely
+      /// elements are counted from the left of the vector register).
+      unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
+                                          SelectionDAG &DAG);
+
+      /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+      /// formed by using a vspltis[bhw] instruction of the specified element
+      /// size, return the constant being splatted.  The ByteSize field indicates
+      /// the number of bytes of each element [124] -> [bhw].
+      SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+
+      /// If this is a qvaligni shuffle mask, return the shift
+      /// amount, otherwise return -1.
+      int isQVALIGNIShuffleMask(SDNode *N);
+
+    } // end namespace PPC
+
+    class PPCTargetLowering : public TargetLowering {
+      const PPCSubtarget &Subtarget;
+
+    public:
+      explicit PPCTargetLowering(const PPCTargetMachine &TM,
+                                const PPCSubtarget &STI);
+
+      /// getTargetNodeName() - This method returns the name of a target specific
+      /// DAG node.
+      const char *getTargetNodeName(unsigned Opcode) const override;
+
+      bool isSelectSupported(SelectSupportKind Kind) const override {
+        // PowerPC does not support scalar condition selects on vectors.
+        return (Kind != SelectSupportKind::ScalarCondVectorVal);
+      }
 
-      /// Newer FCFID[US] integer-to-floating-point conversion instructions for
-      /// unsigned integers and single-precision outputs.
-      FCFIDU, FCFIDS, FCFIDUS,
+      /// getPreferredVectorAction - The code we generate when vector types are
+      /// legalized by promoting the integer element type is often much worse
+      /// than code we generate if we widen the type for applicable vector types.
+      /// The issue with promoting is that the vector is scalaraized, individual
+      /// elements promoted and then the vector is rebuilt. So say we load a pair
+      /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
+      /// loads, moves back into VSR's (or memory ops if we don't have moves) and
+      /// then the VPERM for the shuffle. All in all a very slow sequence.
+      TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
+        const override {
+        if (VT.getScalarSizeInBits() % 8 == 0)
+          return TypeWidenVector;
+        return TargetLoweringBase::getPreferredVectorAction(VT);
+      }
 
-      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
-      /// operand, producing an f64 value containing the integer representation
-      /// of that FP value.
-      FCTIDZ, FCTIWZ,
+      bool useSoftFloat() const override;
 
-      /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
-      /// unsigned integers with round toward zero.
-      FCTIDUZ, FCTIWUZ,
+      bool hasSPE() const;
 
-      /// Floating-point-to-interger conversion instructions
-      FP_TO_UINT_IN_VSR, FP_TO_SINT_IN_VSR,
+      MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
+        return MVT::i32;
+      }
 
-      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
-      /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
-      VEXTS,
+      bool isCheapToSpeculateCttz() const override {
+        return true;
+      }
 
-      /// SExtVElems, takes an input vector of a smaller type and sign
-      /// extends to an output vector of a larger type.
-      SExtVElems,
+      bool isCheapToSpeculateCtlz() const override {
+        return true;
+      }
 
-      /// Reciprocal estimate instructions (unary FP ops).
-      FRE, FRSQRTE,
+      bool isCtlzFast() const override {
+        return true;
+      }
 
-      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
-      // three v4f32 operands and producing a v4f32 result.
-      VMADDFP, VNMSUBFP,
+      bool hasAndNotCompare(SDValue) const override {
+        return true;
+      }
 
-      /// VPERM - The PPC VPERM Instruction.
-      ///
-      VPERM,
+      bool preferIncOfAddToSubOfNot(EVT VT) const override;
 
-      /// XXSPLT - The PPC VSX splat instructions
-      ///
-      XXSPLT,
+      bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
+        return VT.isScalarInteger();
+      }
 
-      /// VECINSERT - The PPC vector insert instruction
-      ///
-      VECINSERT,
+      bool supportSplitCSR(MachineFunction *MF) const override {
+        return
+          MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
+          MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
+      }
 
-      /// XXREVERSE - The PPC VSX reverse instruction
+      void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+
+      void insertCopiesSplitCSR(
+        MachineBasicBlock *Entry,
+        const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
+      /// getSetCCResultType - Return the ISD::SETCC ValueType
+      EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                            EVT VT) const override;
+
+      /// Return true if target always beneficiates from combining into FMA for a
+      /// given value type. This must typically return false on targets where FMA
+      /// takes more cycles to execute than FADD.
+      bool enableAggressiveFMAFusion(EVT VT) const override;
+
+      /// getPreIndexedAddressParts - returns true by value, base pointer and
+      /// offset pointer and addressing mode by reference if the node's address
+      /// can be legally represented as pre-indexed load / store address.
+      bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                    SDValue &Offset,
+                                    ISD::MemIndexedMode &AM,
+                                    SelectionDAG &DAG) const override;
+
+      /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
+      /// it can be more efficiently represented as [r+imm].
+      bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                                  SelectionDAG &DAG) const;
+
+      /// SelectAddressRegReg - Given the specified addressed, check to see if it
+      /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
+      /// is non-zero, only accept displacement which is not suitable for [r+imm].
+      /// Returns false if it can be represented by [r+imm], which are preferred.
+      bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                              SelectionDAG &DAG,
+                              unsigned EncodingAlignment = 0) const;
+
+      /// SelectAddressRegImm - Returns true if the address N can be represented
+      /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+      /// is not better represented as reg+reg. If \p EncodingAlignment is
+      /// non-zero, only accept displacements suitable for instruction encoding
+      /// requirement, i.e. multiples of 4 for DS form.
+      bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+                              SelectionDAG &DAG,
+                              unsigned EncodingAlignment) const;
+
+      /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+      /// represented as an indexed [r+r] operation.
+      bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+                                  SelectionDAG &DAG) const;
+
+      Sched::Preference getSchedulingPreference(SDNode *N) const override;
+
+      /// LowerOperation - Provide custom lowering hooks for some operations.
       ///
-      XXREVERSE,
+      SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
 
-      /// VECSHL - The PPC vector shift left instruction
+      /// ReplaceNodeResults - Replace the results of node with an illegal result
+      /// type with new values built out of custom code.
       ///
-      VECSHL,
+      void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                              SelectionDAG &DAG) const override;
 
-      /// XXPERMDI - The PPC XXPERMDI instruction
-      ///
-      XXPERMDI,
+      SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
 
-      /// The CMPB instruction (takes two operands of i32 or i64).
-      CMPB,
+      SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
-      /// Hi/Lo - These represent the high and low 16-bit parts of a global
-      /// address respectively.  These nodes have two operands, the first of
-      /// which must be a TargetGlobalAddress, and the second of which must be a
-      /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
-      /// though these are usually folded into other nodes.
-      Hi, Lo,
+      SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
+                            SmallVectorImpl<SDNode *> &Created) const override;
 
-      /// The following two target-specific nodes are used for calls through
-      /// function pointers in the 64-bit SVR4 ABI.
+      Register getRegisterByName(const char* RegName, EVT VT,
+                                const MachineFunction &MF) const override;
 
-      /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
-      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
-      /// compute an allocation on the stack.
-      DYNALLOC,
+      void computeKnownBitsForTargetNode(const SDValue Op,
+                                        KnownBits &Known,
+                                        const APInt &DemandedElts,
+                                        const SelectionDAG &DAG,
+                                        unsigned Depth = 0) const override;
 
-      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
-      /// compute an offset from native SP to the address  of the most recent
-      /// dynamic alloca.
-      DYNAREAOFFSET,
+      Align getPrefLoopAlignment(MachineLoop *ML) const override;
 
-      /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
-      /// at function entry, used for PIC code.
-      GlobalBaseReg,
+      bool shouldInsertFencesForAtomic(const Instruction *I) const override {
+        return true;
+      }
 
-      /// These nodes represent PPC shifts.
-      ///
-      /// For scalar types, only the last `n + 1` bits of the shift amounts
-      /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
-      /// for exact behaviors.
-      ///
-      /// For vector types, only the last n bits are used. See vsld.
-      SRL, SRA, SHL,
-
-      /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
-      /// word and shift left immediate.
-      EXTSWSLI,
-
-      /// The combination of sra[wd]i and addze used to implemented signed
-      /// integer division by a power of 2. The first operand is the dividend,
-      /// and the second is the constant shift amount (representing the
-      /// divisor).
-      SRA_ADDZE,
-
-      /// CALL - A direct function call.
-      /// CALL_NOP is a call with the special NOP which follows 64-bit
-      /// SVR4 calls and 32-bit/64-bit AIX calls.
-      CALL, CALL_NOP,
-
-      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
-      /// MTCTR instruction.
-      MTCTR,
-
-      /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
-      /// BCTRL instruction.
-      BCTRL,
-
-      /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
-      /// instruction and the TOC reload required on SVR4 PPC64.
-      BCTRL_LOAD_TOC,
-
-      /// Return with a flag operand, matched by 'blr'
-      RET_FLAG,
-
-      /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
-      /// This copies the bits corresponding to the specified CRREG into the
-      /// resultant GPR.  Bits corresponding to other CR regs are undefined.
-      MFOCRF,
-
-      /// Direct move from a VSX register to a GPR
-      MFVSR,
-
-      /// Direct move from a GPR to a VSX register (algebraic)
-      MTVSRA,
-
-      /// Direct move from a GPR to a VSX register (zero)
-      MTVSRZ,
-
-      /// Direct move of 2 consecutive GPR to a VSX register.
-      BUILD_FP128,
-
-      /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
-      /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
-      /// unsupported for this target.
-      /// Merge 2 GPRs to a single SPE register.
-      BUILD_SPE64,
-
-      /// Extract SPE register component, second argument is high or low.
-      EXTRACT_SPE,
-
-      /// Extract a subvector from signed integer vector and convert to FP.
-      /// It is primarily used to convert a (widened) illegal integer vector
-      /// type to a legal floating point vector type.
-      /// For example v2i32 -> widened to v4i32 -> v2f64
-      SINT_VEC_TO_FP,
-
-      /// Extract a subvector from unsigned integer vector and convert to FP.
-      /// As with SINT_VEC_TO_FP, used for converting illegal types.
-      UINT_VEC_TO_FP,
-
-      // FIXME: Remove these once the ANDI glue bug is fixed:
-      /// i1 = ANDIo_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
-      /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
-      /// implement truncation of i32 or i64 to i1.
-      ANDIo_1_EQ_BIT, ANDIo_1_GT_BIT,
-
-      // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
-      // target (returns (Lo, Hi)). It takes a chain operand.
-      READ_TIME_BASE,
-
-      // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
-      EH_SJLJ_SETJMP,
-
-      // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
-      EH_SJLJ_LONGJMP,
-
-      /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
-      /// instructions.  For lack of better number, we use the opcode number
-      /// encoding for the OPC field to identify the compare.  For example, 838
-      /// is VCMPGTSH.
-      VCMP,
-
-      /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
-      /// altivec VCMP*o instructions.  For lack of better number, we use the
-      /// opcode number encoding for the OPC field to identify the compare.  For
-      /// example, 838 is VCMPGTSH.
-      VCMPo,
-
-      /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
-      /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
-      /// condition register to branch on, OPC is the branch opcode to use (e.g.
-      /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
-      /// an optional input flag argument.
-      COND_BRANCH,
-
-      /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
-      /// loops.
-      BDNZ, BDZ,
-
-      /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
-      /// towards zero.  Used only as part of the long double-to-int
-      /// conversion sequence.
-      FADDRTZ,
-
-      /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
-      MFFS,
-
-      /// TC_RETURN - A tail call return.
-      ///   operand #0 chain
-      ///   operand #1 callee (register or absolute)
-      ///   operand #2 stack adjustment
-      ///   operand #3 optional in flag
-      TC_RETURN,
-
-      /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
-      CR6SET,
-      CR6UNSET,
-
-      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
-      /// for non-position independent code on PPC32.
-      PPC32_GOT,
-
-      /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
-      /// local dynamic TLS and position indendepent code on PPC32.
-      PPC32_PICGOT,
-
-      /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
-      /// TLS model, produces an ADDIS8 instruction that adds the GOT
-      /// base to sym\@got\@tprel\@ha.
-      ADDIS_GOT_TPREL_HA,
-
-      /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
-      /// TLS model, produces a LD instruction with base register G8RReg
-      /// and offset sym\@got\@tprel\@l.  This completes the addition that
-      /// finds the offset of "sym" relative to the thread pointer.
-      LD_GOT_TPREL_L,
-
-      /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
-      /// model, produces an ADD instruction that adds the contents of
-      /// G8RReg to the thread pointer.  Symbol contains a relocation
-      /// sym\@tls which is to be replaced by the thread pointer and
-      /// identifies to the linker that the instruction is part of a
-      /// TLS sequence.
-      ADD_TLS,
-
-      /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym\@got\@tlsgd\@ha.
-      ADDIS_TLSGD_HA,
-
-      /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
-      /// ADDIS_TLSGD_L_ADDR until after register assignment.
-      ADDI_TLSGD_L,
-
-      /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
-      /// ADDIS_TLSGD_L_ADDR until after register assignment.
-      GET_TLS_ADDR,
-
-      /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
-      /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
-      /// register assignment.
-      ADDI_TLSGD_L_ADDR,
-
-      /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds the GOT base
-      /// register to sym\@got\@tlsld\@ha.
-      ADDIS_TLSLD_HA,
-
-      /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
-      /// ADDIS_TLSLD_L_ADDR until after register assignment.
-      ADDI_TLSLD_L,
-
-      /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
-      /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
-      /// ADDIS_TLSLD_L_ADDR until after register assignment.
-      GET_TLSLD_ADDR,
-
-      /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
-      /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
-      /// following register assignment.
-      ADDI_TLSLD_L_ADDR,
-
-      /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDIS8 instruction that adds X3 to
-      /// sym\@dtprel\@ha.
-      ADDIS_DTPREL_HA,
-
-      /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
-      /// model, produces an ADDI8 instruction that adds G8RReg to
-      /// sym\@got\@dtprel\@l.
-      ADDI_DTPREL_L,
-
-      /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
-      /// during instruction selection to optimize a BUILD_VECTOR into
-      /// operations on splats.  This is necessary to avoid losing these
-      /// optimizations due to constant folding.
-      VADD_SPLAT,
-
-      /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
-      /// operand identifies the operating system entry point.
-      SC,
-
-      /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
-      CLRBHRB,
-
-      /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
-      /// history rolling buffer entry.
-      MFBHRBE,
-
-      /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
-      RFEBB,
-
-      /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
-      /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
-      /// or stxvd2x instruction.  The chain is necessary because the
-      /// sequence replaces a load and needs to provide the same number
-      /// of outputs.
-      XXSWAPD,
-
-      /// An SDNode for swaps that are not associated with any loads/stores
-      /// and thereby have no chain.
-      SWAP_NO_CHAIN,
-      
-      /// An SDNode for Power9 vector absolute value difference.
-      /// operand #0 vector
-      /// operand #1 vector
-      /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
-      /// the most significant bit for signed i32
-      ///
-      /// Power9 VABSD* instructions are designed to support unsigned integer
-      /// vectors (byte/halfword/word), if we want to make use of them for signed
-      /// integer vectors, we have to flip their sign bits first. To flip sign bit
-      /// for byte/halfword integer vector would become inefficient, but for word
-      /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
-      /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000) 
-      ///               => VABSDUW((XVNEGSP a), (XVNEGSP b))
-      VABSD,
-
-      /// QVFPERM = This corresponds to the QPX qvfperm instruction.
-      QVFPERM,
-
-      /// QVGPCI = This corresponds to the QPX qvgpci instruction.
-      QVGPCI,
-
-      /// QVALIGNI = This corresponds to the QPX qvaligni instruction.
-      QVALIGNI,
-
-      /// QVESPLATI = This corresponds to the QPX qvesplati instruction.
-      QVESPLATI,
-
-      /// QBFLT = Access the underlying QPX floating-point boolean
-      /// representation.
-      QBFLT,
-
-      /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
-      /// lower (IDX=1) half of v4f32 to v2f64.
-      FP_EXTEND_HALF,
-
-      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
-      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
-      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
-      /// i32.
-      STBRX = ISD::FIRST_TARGET_MEMORY_OPCODE,
-
-      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
-      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
-      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
-      /// or i32.
-      LBRX,
-
-      /// STFIWX - The STFIWX instruction.  The first operand is an input token
-      /// chain, then an f64 value to store, then an address to store it to.
-      STFIWX,
-
-      /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
-      /// load which sign-extends from a 32-bit integer value into the
-      /// destination 64-bit register.
-      LFIWAX,
-
-      /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
-      /// load which zero-extends from a 32-bit integer value into the
-      /// destination 64-bit register.
-      LFIWZX,
-
-      /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
-      /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
-      /// This can be used for converting loaded integers to floating point.
-      LXSIZX,
-
-      /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
-      /// chain, then an f64 value to store, then an address to store it to,
-      /// followed by a byte-width for the store.
-      STXSIX,
-
-      /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
-      /// Maps directly to an lxvd2x instruction that will be followed by
-      /// an xxswapd.
-      LXVD2X,
-
-      /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
-      /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
-      /// the vector type to load vector in big-endian element order.
-      LOAD_VEC_BE,
-
-      /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
-      /// v2f32 value into the lower half of a VSR register.
-      LD_VSX_LH,
-
-      /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
-      /// instructions such as LXVDSX, LXVWSX.
-      LD_SPLAT,
-
-      /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
-      /// Maps directly to an stxvd2x instruction that will be preceded by
-      /// an xxswapd.
-      STXVD2X,
-
-      /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
-      /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
-      /// the vector type to store vector in big-endian element order.
-      STORE_VEC_BE,
-
-      /// Store scalar integers from VSR.
-      ST_VSR_SCAL_INT,
-
-      /// QBRC, CHAIN = QVLFSb CHAIN, Ptr
-      /// The 4xf32 load used for v4i1 constants.
-      QVLFSb,
-
-      /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
-      /// except they ensure that the compare input is zero-extended for
-      /// sub-word versions because the atomic loads zero-extend.
-      ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
-
-      /// GPRC = TOC_ENTRY GA, TOC
-      /// Loads the entry for GA from the TOC, where the TOC base is given by
-      /// the last operand.
-      TOC_ENTRY
-    };
-
-  } // end namespace PPCISD
-
-  /// Define some predicates that are used for node matching.
-  namespace PPC {
-
-    /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
-    /// VPKUHUM instruction.
-    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
-                              SelectionDAG &DAG);
+      Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                    AtomicOrdering Ord) const override;
+      Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
+                                    AtomicOrdering Ord) const override;
+
+      MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr &MI,
+                                  MachineBasicBlock *MBB) const override;
+      MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
+                                          MachineBasicBlock *MBB,
+                                          unsigned AtomicSize,
+                                          unsigned BinOpcode,
+                                          unsigned CmpOpcode = 0,
+                                          unsigned CmpPred = 0) const;
+      MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
+                                                  MachineBasicBlock *MBB,
+                                                  bool is8bit,
+                                                  unsigned Opcode,
+                                                  unsigned CmpOpcode = 0,
+                                                  unsigned CmpPred = 0) const;
+
+      MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
+                                          MachineBasicBlock *MBB) const;
+
+      MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
+                                          MachineBasicBlock *MBB) const;
+
+      ConstraintType getConstraintType(StringRef Constraint) const override;
+
+      /// Examine constraint string and operand type and determine a weight value.
+      /// The operand object must already have been set up with the operand type.
+      ConstraintWeight getSingleConstraintMatchWeight(
+        AsmOperandInfo &info, const char *constraint) const override;
+
+      std::pair<unsigned, const TargetRegisterClass *>
+      getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                  StringRef Constraint, MVT VT) const override;
+
+      /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+      /// function arguments in the caller parameter area.  This is the actual
+      /// alignment, not its logarithm.
+      unsigned getByValTypeAlignment(Type *Ty,
+                                    const DataLayout &DL) const override;
+
+      /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+      /// vector.  If it is invalid, don't add anything to Ops.
+      void LowerAsmOperandForConstraint(SDValue Op,
+                                        std::string &Constraint,
+                                        std::vector<SDValue> &Ops,
+                                        SelectionDAG &DAG) const override;
+
+      unsigned
+      getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
+        if (ConstraintCode == "es")
+          return InlineAsm::Constraint_es;
+        else if (ConstraintCode == "o")
+          return InlineAsm::Constraint_o;
+        else if (ConstraintCode == "Q")
+          return InlineAsm::Constraint_Q;
+        else if (ConstraintCode == "Z")
+          return InlineAsm::Constraint_Z;
+        else if (ConstraintCode == "Zy")
+          return InlineAsm::Constraint_Zy;
+        return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+      }
 
-    /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
-    /// VPKUWUM instruction.
-    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
-                              SelectionDAG &DAG);
+      /// isLegalAddressingMode - Return true if the addressing mode represented
+      /// by AM is legal for this target, for a load/store of the specified type.
+      bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
+                                Type *Ty, unsigned AS,
+                                Instruction *I = nullptr) const override;
+
+      /// isLegalICmpImmediate - Return true if the specified immediate is legal
+      /// icmp immediate, that is the target has icmp instructions which can
+      /// compare a register against the immediate without having to materialize
+      /// the immediate into a register.
+      bool isLegalICmpImmediate(int64_t Imm) const override;
+
+      /// isLegalAddImmediate - Return true if the specified immediate is legal
+      /// add immediate, that is the target has add instructions which can
+      /// add a register and the immediate without having to materialize
+      /// the immediate into a register.
+      bool isLegalAddImmediate(int64_t Imm) const override;
+
+      /// isTruncateFree - Return true if it's free to truncate a value of
+      /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
+      /// register X1 to i32 by referencing its sub-register R1.
+      bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+      bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+      bool isZExtFree(SDValue Val, EVT VT2) const override;
+
+      bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
+
+      /// Returns true if it is beneficial to convert a load of a constant
+      /// to just the constant itself.
+      bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+                                            Type *Ty) const override;
+
+      bool convertSelectOfConstantsToMath(EVT VT) const override {
+        return true;
+      }
 
-    /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
-    /// VPKUDUM instruction.
-    bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
-                              SelectionDAG &DAG);
+      bool isDesirableToTransformToIntegerOp(unsigned Opc,
+                                            EVT VT) const override {
+        // Only handle float load/store pair because float(fpr) load/store
+        // instruction has more cycles than integer(gpr) load/store in PPC.
+        if (Opc != ISD::LOAD && Opc != ISD::STORE)
+          return false;
+        if (VT != MVT::f32 && VT != MVT::f64)
+          return false;
 
-    /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
-    /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
-    bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                            unsigned ShuffleKind, SelectionDAG &DAG);
-
-    /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
-    /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
-    bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
-                            unsigned ShuffleKind, SelectionDAG &DAG);
-
-    /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
-    /// a VMRGEW or VMRGOW instruction
-    bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
-                             unsigned ShuffleKind, SelectionDAG &DAG);
-    /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXSLDWI instruction.
-    bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
-                              bool &Swap, bool IsLE);
-
-    /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXBRH instruction.
-    bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
-
-    /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXBRW instruction.
-    bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
-
-    /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXBRD instruction.
-    bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
-
-    /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXBRQ instruction.
-    bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
-
-    /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
-    /// for a XXPERMDI instruction.
-    bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
-                              bool &Swap, bool IsLE);
-
-    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
-    /// shift amount, otherwise return -1.
-    int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
-                            SelectionDAG &DAG);
-
-    /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
-    /// specifies a splat of a single element that is suitable for input to
-    /// VSPLTB/VSPLTH/VSPLTW.
-    bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
-
-    /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
-    /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
-    /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
-    /// vector into the other. This function will also set a couple of
-    /// output parameters for how much the source vector needs to be shifted and
-    /// what byte number needs to be specified for the instruction to put the
-    /// element in the desired location of the target vector.
-    bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
-                         unsigned &InsertAtByte, bool &Swap, bool IsLE);
-
-    /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
-    /// appropriate for PPC mnemonics (which have a big endian bias - namely
-    /// elements are counted from the left of the vector register).
-    unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
-                                        SelectionDAG &DAG);
-
-    /// get_VSPLTI_elt - If this is a build_vector of constants which can be
-    /// formed by using a vspltis[bhw] instruction of the specified element
-    /// size, return the constant being splatted.  The ByteSize field indicates
-    /// the number of bytes of each element [124] -> [bhw].
-    SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
-
-    /// If this is a qvaligni shuffle mask, return the shift
-    /// amount, otherwise return -1.
-    int isQVALIGNIShuffleMask(SDNode *N);
-
-  } // end namespace PPC
-
-  class PPCTargetLowering : public TargetLowering {
-    const PPCSubtarget &Subtarget;
-
-  public:
-    explicit PPCTargetLowering(const PPCTargetMachine &TM,
-                               const PPCSubtarget &STI);
-
-    /// getTargetNodeName() - This method returns the name of a target specific
-    /// DAG node.
-    const char *getTargetNodeName(unsigned Opcode) const override;
-
-    bool isSelectSupported(SelectSupportKind Kind) const override {
-      // PowerPC does not support scalar condition selects on vectors.
-      return (Kind != SelectSupportKind::ScalarCondVectorVal);
-    }
-
-    /// getPreferredVectorAction - The code we generate when vector types are
-    /// legalized by promoting the integer element type is often much worse
-    /// than code we generate if we widen the type for applicable vector types.
-    /// The issue with promoting is that the vector is scalaraized, individual
-    /// elements promoted and then the vector is rebuilt. So say we load a pair
-    /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
-    /// loads, moves back into VSR's (or memory ops if we don't have moves) and
-    /// then the VPERM for the shuffle. All in all a very slow sequence.
-    TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
-      const override {
-      if (VT.getScalarSizeInBits() % 8 == 0)
-        return TypeWidenVector;
-      return TargetLoweringBase::getPreferredVectorAction(VT);
-    }
-
-    bool useSoftFloat() const override;
-
-    bool hasSPE() const;
-
-    MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
-      return MVT::i32;
-    }
-
-    bool isCheapToSpeculateCttz() const override {
-      return true;
-    }
-
-    bool isCheapToSpeculateCtlz() const override {
-      return true;
-    }
-
-    bool isCtlzFast() const override {
-      return true;
-    }
-
-    bool hasAndNotCompare(SDValue) const override {
-      return true;
-    }
-
-    bool preferIncOfAddToSubOfNot(EVT VT) const override;
-
-    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
-      return VT.isScalarInteger();
-    }
-
-    bool supportSplitCSR(MachineFunction *MF) const override {
-      return
-        MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
-        MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
-    }
-
-    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
-
-    void insertCopiesSplitCSR(
-      MachineBasicBlock *Entry,
-      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
-
-    /// getSetCCResultType - Return the ISD::SETCC ValueType
-    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
-                           EVT VT) const override;
-
-    /// Return true if target always beneficiates from combining into FMA for a
-    /// given value type. This must typically return false on targets where FMA
-    /// takes more cycles to execute than FADD.
-    bool enableAggressiveFMAFusion(EVT VT) const override;
-
-    /// getPreIndexedAddressParts - returns true by value, base pointer and
-    /// offset pointer and addressing mode by reference if the node's address
-    /// can be legally represented as pre-indexed load / store address.
-    bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
-                                   SDValue &Offset,
-                                   ISD::MemIndexedMode &AM,
-                                   SelectionDAG &DAG) const override;
-
-    /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
-    /// it can be more efficiently represented as [r+imm].
-    bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
-                                SelectionDAG &DAG) const;
-
-    /// SelectAddressRegReg - Given the specified addressed, check to see if it
-    /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
-    /// is non-zero, only accept displacement which is not suitable for [r+imm].
-    /// Returns false if it can be represented by [r+imm], which are preferred.
-    bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
-                             SelectionDAG &DAG,
-                             unsigned EncodingAlignment = 0) const;
-
-    /// SelectAddressRegImm - Returns true if the address N can be represented
-    /// by a base register plus a signed 16-bit displacement [r+imm], and if it
-    /// is not better represented as reg+reg. If \p EncodingAlignment is
-    /// non-zero, only accept displacements suitable for instruction encoding
-    /// requirement, i.e. multiples of 4 for DS form.
-    bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
-                             SelectionDAG &DAG,
-                             unsigned EncodingAlignment) const;
-
-    /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
-    /// represented as an indexed [r+r] operation.
-    bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
-                                 SelectionDAG &DAG) const;
-
-    Sched::Preference getSchedulingPreference(SDNode *N) const override;
-
-    /// LowerOperation - Provide custom lowering hooks for some operations.
-    ///
-    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
-    /// ReplaceNodeResults - Replace the results of node with an illegal result
-    /// type with new values built out of custom code.
-    ///
-    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
-                            SelectionDAG &DAG) const override;
-
-    SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
-
-    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
-    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
-                          SmallVectorImpl<SDNode *> &Created) const override;
-
-    Register getRegisterByName(const char* RegName, EVT VT,
-                               const MachineFunction &MF) const override;
-
-    void computeKnownBitsForTargetNode(const SDValue Op,
-                                       KnownBits &Known,
-                                       const APInt &DemandedElts,
-                                       const SelectionDAG &DAG,
-                                       unsigned Depth = 0) const override;
-
-    Align getPrefLoopAlignment(MachineLoop *ML) const override;
-
-    bool shouldInsertFencesForAtomic(const Instruction *I) const override {
-      return true;
-    }
-
-    Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
-                                  AtomicOrdering Ord) const override;
-    Instruction *emitTrailingFence(IRBuilder<> &Builder, Instruction *Inst,
-                                   AtomicOrdering Ord) const override;
-
-    MachineBasicBlock *
-    EmitInstrWithCustomInserter(MachineInstr &MI,
-                                MachineBasicBlock *MBB) const override;
-    MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
-                                        MachineBasicBlock *MBB,
-                                        unsigned AtomicSize,
-                                        unsigned BinOpcode,
-                                        unsigned CmpOpcode = 0,
-                                        unsigned CmpPred = 0) const;
-    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
-                                                MachineBasicBlock *MBB,
-                                                bool is8bit,
-                                                unsigned Opcode,
-                                                unsigned CmpOpcode = 0,
-                                                unsigned CmpPred = 0) const;
-
-    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
-                                        MachineBasicBlock *MBB) const;
-
-    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
-                                         MachineBasicBlock *MBB) const;
-
-    ConstraintType getConstraintType(StringRef Constraint) const override;
-
-    /// Examine constraint string and operand type and determine a weight value.
-    /// The operand object must already have been set up with the operand type.
-    ConstraintWeight getSingleConstraintMatchWeight(
-      AsmOperandInfo &info, const char *constraint) const override;
-
-    std::pair<unsigned, const TargetRegisterClass *>
-    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
-                                 StringRef Constraint, MVT VT) const override;
-
-    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
-    /// function arguments in the caller parameter area.  This is the actual
-    /// alignment, not its logarithm.
-    unsigned getByValTypeAlignment(Type *Ty,
-                                   const DataLayout &DL) const override;
-
-    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-    /// vector.  If it is invalid, don't add anything to Ops.
-    void LowerAsmOperandForConstraint(SDValue Op,
-                                      std::string &Constraint,
-                                      std::vector<SDValue> &Ops,
-                                      SelectionDAG &DAG) const override;
+        return true; 
+      }
 
-    unsigned
-    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
-      if (ConstraintCode == "es")
-        return InlineAsm::Constraint_es;
-      else if (ConstraintCode == "o")
-        return InlineAsm::Constraint_o;
-      else if (ConstraintCode == "Q")
-        return InlineAsm::Constraint_Q;
-      else if (ConstraintCode == "Z")
-        return InlineAsm::Constraint_Z;
-      else if (ConstraintCode == "Zy")
-        return InlineAsm::Constraint_Zy;
-      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
-    }
-
-    /// isLegalAddressingMode - Return true if the addressing mode represented
-    /// by AM is legal for this target, for a load/store of the specified type.
-    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
-                               Type *Ty, unsigned AS,
-                               Instruction *I = nullptr) const override;
-
-    /// isLegalICmpImmediate - Return true if the specified immediate is legal
-    /// icmp immediate, that is the target has icmp instructions which can
-    /// compare a register against the immediate without having to materialize
-    /// the immediate into a register.
-    bool isLegalICmpImmediate(int64_t Imm) const override;
-
-    /// isLegalAddImmediate - Return true if the specified immediate is legal
-    /// add immediate, that is the target has add instructions which can
-    /// add a register and the immediate without having to materialize
-    /// the immediate into a register.
-    bool isLegalAddImmediate(int64_t Imm) const override;
-
-    /// isTruncateFree - Return true if it's free to truncate a value of
-    /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
-    /// register X1 to i32 by referencing its sub-register R1.
-    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
-    bool isTruncateFree(EVT VT1, EVT VT2) const override;
-
-    bool isZExtFree(SDValue Val, EVT VT2) const override;
-
-    bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
-
-    /// Returns true if it is beneficial to convert a load of a constant
-    /// to just the constant itself.
-    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
-                                           Type *Ty) const override;
-
-    bool convertSelectOfConstantsToMath(EVT VT) const override {
-      return true;
-    }
-
-    bool isDesirableToTransformToIntegerOp(unsigned Opc,
-                                           EVT VT) const override {
-      // Only handle float load/store pair because float(fpr) load/store
-      // instruction has more cycles than integer(gpr) load/store in PPC.
-      if (Opc != ISD::LOAD && Opc != ISD::STORE)
-        return false;
-      if (VT != MVT::f32 && VT != MVT::f64)
-        return false;
-
-      return true; 
-    }
-
-    // Returns true if the address of the global is stored in TOC entry.
-    bool isAccessedAsGotIndirect(SDValue N) const;
-
-    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-
-    bool getTgtMemIntrinsic(IntrinsicInfo &Info,
-                            const CallInst &I,
-                            MachineFunction &MF,
-                            unsigned Intrinsic) const override;
-
-    /// getOptimalMemOpType - Returns the target specific optimal type for load
-    /// and store operations as a result of memset, memcpy, and memmove
-    /// lowering. If DstAlign is zero that means it's safe to destination
-    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
-    /// means there isn't a need to check it against alignment requirement,
-    /// probably because the source does not need to be loaded. If 'IsMemset' is
-    /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
-    /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
-    /// source is constant so it does not need to be loaded.
-    /// It returns EVT::Other if the type should be determined using generic
-    /// target-independent logic.
-    EVT
-    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
-                        bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
-                        const AttributeList &FuncAttributes) const override;
-
-    /// Is unaligned memory access allowed for the given type, and is it fast
-    /// relative to software emulation.
-    bool allowsMisalignedMemoryAccesses(
-        EVT VT, unsigned AddrSpace, unsigned Align = 1,
-        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
-        bool *Fast = nullptr) const override;
-
-    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
-    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
-    /// expanded to FMAs when this method returns true, otherwise fmuladd is
-    /// expanded to fmul + fadd.
-    bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
-
-    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
-
-    // Should we expand the build vector with shuffles?
-    bool
-    shouldExpandBuildVectorWithShuffles(EVT VT,
-                                        unsigned DefinedValues) const override;
-
-    /// createFastISel - This method returns a target-specific FastISel object,
-    /// or null if the target does not support "fast" instruction selection.
-    FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
-                             const TargetLibraryInfo *LibInfo) const override;
-
-    /// Returns true if an argument of type Ty needs to be passed in a
-    /// contiguous block of registers in calling convention CallConv.
-    bool functionArgumentNeedsConsecutiveRegisters(
-      Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
-      // We support any array type as "consecutive" block in the parameter
-      // save area.  The element type defines the alignment requirement and
-      // whether the argument should go in GPRs, FPRs, or VRs if available.
-      //
-      // Note that clang uses this capability both to implement the ELFv2
-      // homogeneous float/vector aggregate ABI, and to avoid having to use
-      // "byval" when passing aggregates that might fully fit in registers.
-      return Ty->isArrayTy();
-    }
-
-    /// If a physical register, this returns the register that receives the
-    /// exception address on entry to an EH pad.
-    unsigned
-    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
-
-    /// If a physical register, this returns the register that receives the
-    /// exception typeid on entry to a landing pad.
-    unsigned
-    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
-
-    /// Override to support customized stack guard loading.
-    bool useLoadStackGuardNode() const override;
-    void insertSSPDeclarations(Module &M) const override;
-
-    bool isFPImmLegal(const APFloat &Imm, EVT VT,
-                      bool ForCodeSize) const override;
-
-    unsigned getJumpTableEncoding() const override;
-    bool isJumpTableRelative() const override;
-    SDValue getPICJumpTableRelocBase(SDValue Table,
-                                     SelectionDAG &DAG) const override;
-    const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
-                                               unsigned JTI,
-                                               MCContext &Ctx) const override;
-
-  private:
-    struct ReuseLoadInfo {
-      SDValue Ptr;
-      SDValue Chain;
-      SDValue ResChain;
-      MachinePointerInfo MPI;
-      bool IsDereferenceable = false;
-      bool IsInvariant = false;
-      unsigned Alignment = 0;
-      AAMDNodes AAInfo;
-      const MDNode *Ranges = nullptr;
-
-      ReuseLoadInfo() = default;
-
-      MachineMemOperand::Flags MMOFlags() const {
-        MachineMemOperand::Flags F = MachineMemOperand::MONone;
-        if (IsDereferenceable)
-          F |= MachineMemOperand::MODereferenceable;
-        if (IsInvariant)
-          F |= MachineMemOperand::MOInvariant;
-        return F;
+      // Returns true if the address of the global is stored in TOC entry.
+      bool isAccessedAsGotIndirect(SDValue N) const;
+
+      bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+
+      bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                              const CallInst &I,
+                              MachineFunction &MF,
+                              unsigned Intrinsic) const override;
+
+      /// getOptimalMemOpType - Returns the target specific optimal type for load
+      /// and store operations as a result of memset, memcpy, and memmove
+      /// lowering. If DstAlign is zero that means it's safe to destination
+      /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+      /// means there isn't a need to check it against alignment requirement,
+      /// probably because the source does not need to be loaded. If 'IsMemset' is
+      /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
+      /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
+      /// source is constant so it does not need to be loaded.
+      /// It returns EVT::Other if the type should be determined using generic
+      /// target-independent logic.
+      EVT
+      getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                          bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+                          const AttributeList &FuncAttributes) const override;
+
+      /// Is unaligned memory access allowed for the given type, and is it fast
+      /// relative to software emulation.
+      bool allowsMisalignedMemoryAccesses(
+          EVT VT, unsigned AddrSpace, unsigned Align = 1,
+          MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+          bool *Fast = nullptr) const override;
+
+      /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+      /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+      /// expanded to FMAs when this method returns true, otherwise fmuladd is
+      /// expanded to fmul + fadd.
+      bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+
+      const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+
+      // Should we expand the build vector with shuffles?
+      bool
+      shouldExpandBuildVectorWithShuffles(EVT VT,
+                                          unsigned DefinedValues) const override;
+
+      /// createFastISel - This method returns a target-specific FastISel object,
+      /// or null if the target does not support "fast" instruction selection.
+      FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+                              const TargetLibraryInfo *LibInfo) const override;
+
+      /// Returns true if an argument of type Ty needs to be passed in a
+      /// contiguous block of registers in calling convention CallConv.
+      bool functionArgumentNeedsConsecutiveRegisters(
+        Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override {
+        // We support any array type as "consecutive" block in the parameter
+        // save area.  The element type defines the alignment requirement and
+        // whether the argument should go in GPRs, FPRs, or VRs if available.
+        //
+        // Note that clang uses this capability both to implement the ELFv2
+        // homogeneous float/vector aggregate ABI, and to avoid having to use
+        // "byval" when passing aggregates that might fully fit in registers.
+        return Ty->isArrayTy();
       }
-    };
 
-    bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
-      // Addrspacecasts are always noops.
-      return true;
-    }
+      /// If a physical register, this returns the register that receives the
+      /// exception address on entry to an EH pad.
+      unsigned
+      getExceptionPointerRegister(const Constant *PersonalityFn) const override;
 
-    bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
-                             SelectionDAG &DAG,
-                             ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
-    void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
-                         SelectionDAG &DAG) const;
+      /// If a physical register, this returns the register that receives the
+      /// exception typeid on entry to a landing pad.
+      unsigned
+      getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
 
-    void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
-                                SelectionDAG &DAG, const SDLoc &dl) const;
-    SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
-                                     const SDLoc &dl) const;
+      /// Override to support customized stack guard loading.
+      bool useLoadStackGuardNode() const override;
+      void insertSSPDeclarations(Module &M) const override;
 
-    bool directMoveIsProfitable(const SDValue &Op) const;
-    SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
-                                     const SDLoc &dl) const;
+      bool isFPImmLegal(const APFloat &Imm, EVT VT,
+                        bool ForCodeSize) const override;
+
+      unsigned getJumpTableEncoding() const override;
+      bool isJumpTableRelative() const override;
+      SDValue getPICJumpTableRelocBase(SDValue Table,
+                                      SelectionDAG &DAG) const override;
+      const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                                unsigned JTI,
+                                                MCContext &Ctx) const override;
+
+    private:
+      struct ReuseLoadInfo {
+        SDValue Ptr;
+        SDValue Chain;
+        SDValue ResChain;
+        MachinePointerInfo MPI;
+        bool IsDereferenceable = false;
+        bool IsInvariant = false;
+        unsigned Alignment = 0;
+        AAMDNodes AAInfo;
+        const MDNode *Ranges = nullptr;
+
+        ReuseLoadInfo() = default;
+
+        MachineMemOperand::Flags MMOFlags() const {
+          MachineMemOperand::Flags F = MachineMemOperand::MONone;
+          if (IsDereferenceable)
+            F |= MachineMemOperand::MODereferenceable;
+          if (IsInvariant)
+            F |= MachineMemOperand::MOInvariant;
+          return F;
+        }
+      };
+
+      bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+        // Addrspacecasts are always noops.
+        return true;
+      }
 
-    SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
-                                 const SDLoc &dl) const;
+      bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
+                              SelectionDAG &DAG,
+                              ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
+      void spliceIntoChain(SDValue ResChain, SDValue NewResChain,
+                          SelectionDAG &DAG) const;
 
-    SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+      void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
+                                  SelectionDAG &DAG, const SDLoc &dl) const;
+      SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
+                                      const SDLoc &dl) const;
 
-    SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
-    SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+      bool directMoveIsProfitable(const SDValue &Op) const;
+      SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
+                                      const SDLoc &dl) const;
 
-    bool
-    IsEligibleForTailCallOptimization(SDValue Callee,
+      SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
+                                  const SDLoc &dl) const;
+
+      SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
+
+      SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+      SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+      bool
+      IsEligibleForTailCallOptimization(SDValue Callee,
+                                        CallingConv::ID CalleeCC,
+                                        bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                                        SelectionDAG& DAG) const;
+
+      bool
+      IsEligibleForTailCallOptimization_64SVR4(
+                                      SDValue Callee,
                                       CallingConv::ID CalleeCC,
+                                      ImmutableCallSite CS,
                                       bool isVarArg,
+                                      const SmallVectorImpl<ISD::OutputArg> &Outs,
                                       const SmallVectorImpl<ISD::InputArg> &Ins,
                                       SelectionDAG& DAG) const;
 
-    bool
-    IsEligibleForTailCallOptimization_64SVR4(
-                                    SDValue Callee,
-                                    CallingConv::ID CalleeCC,
-                                    ImmutableCallSite CS,
-                                    bool isVarArg,
-                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
-                                    const SmallVectorImpl<ISD::InputArg> &Ins,
-                                    SelectionDAG& DAG) const;
-
-    SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
-                                         SDValue Chain, SDValue &LROpOut,
-                                         SDValue &FPOpOut,
-                                         const SDLoc &dl) const;
-
-    SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
-
-    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
-                           const SDLoc &dl) const;
-    SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
-
-    SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
-
-    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+      SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
+                                          SDValue Chain, SDValue &LROpOut,
+                                          SDValue &FPOpOut,
+                                          const SDLoc &dl) const;
+
+      SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
+
+      SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                            const SDLoc &dl) const;
+      SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+
+      SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
+
+      SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              const SDLoc &dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+      SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl,
+                        bool isTailCall, bool isVarArg, bool isPatchPoint,
+                        bool hasNest, SelectionDAG &DAG,
+                        SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                        SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
+                        SDValue &Callee, int SPDiff, unsigned NumBytes,
+                        const SmallVectorImpl<ISD::InputArg> &Ins,
+                        SmallVectorImpl<SDValue> &InVals,
+                        ImmutableCallSite CS) const;
+
+      SDValue
+      LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                          const SmallVectorImpl<ISD::InputArg> &Ins,
+                          const SDLoc &dl, SelectionDAG &DAG,
+                          SmallVectorImpl<SDValue> &InVals) const override;
+
+      SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                        SmallVectorImpl<SDValue> &InVals) const override;
+
+      bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+                          bool isVarArg,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          LLVMContext &Context) const override;
+
+      SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                          const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          const SmallVectorImpl<SDValue> &OutVals,
+                          const SDLoc &dl, SelectionDAG &DAG) const override;
+
+      SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
+                                SelectionDAG &DAG, SDValue ArgVal,
+                                const SDLoc &dl) const;
+
+      SDValue LowerFormalArguments_AIX(
+          SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+          const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
+      SDValue LowerFormalArguments_Darwin(
+          SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+          const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
+      SDValue LowerFormalArguments_64SVR4(
+          SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+          const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
+      SDValue LowerFormalArguments_32SVR4(
+          SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+          const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+          SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
+
+      SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
+                                        SDValue CallSeqStart,
+                                        ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                                        const SDLoc &dl) const;
+
+      SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool isTailCall, bool isPatchPoint,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              const SDLoc &dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals,
+                              ImmutableCallSite CS) const;
+      SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool isTailCall, bool isPatchPoint,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              const SDLoc &dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals,
+                              ImmutableCallSite CS) const;
+      SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool isTailCall, bool isPatchPoint,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              const SDLoc &dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals,
+                              ImmutableCallSite CS) const;
+      SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
                             CallingConv::ID CallConv, bool isVarArg,
+                            bool isTailCall, bool isPatchPoint,
+                            const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            const SmallVectorImpl<SDValue> &OutVals,
                             const SmallVectorImpl<ISD::InputArg> &Ins,
                             const SDLoc &dl, SelectionDAG &DAG,
-                            SmallVectorImpl<SDValue> &InVals) const;
-    SDValue FinishCall(CallingConv::ID CallConv, const SDLoc &dl,
-                       bool isTailCall, bool isVarArg, bool isPatchPoint,
-                       bool hasNest, SelectionDAG &DAG,
-                       SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
-                       SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
-                       SDValue &Callee, int SPDiff, unsigned NumBytes,
-                       const SmallVectorImpl<ISD::InputArg> &Ins,
-                       SmallVectorImpl<SDValue> &InVals,
-                       ImmutableCallSite CS) const;
-
-    SDValue
-    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                         const SmallVectorImpl<ISD::InputArg> &Ins,
-                         const SDLoc &dl, SelectionDAG &DAG,
-                         SmallVectorImpl<SDValue> &InVals) const override;
-
-    SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
-                      SmallVectorImpl<SDValue> &InVals) const override;
-
-    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
-                        bool isVarArg,
-                        const SmallVectorImpl<ISD::OutputArg> &Outs,
-                        LLVMContext &Context) const override;
-
-    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-                        const SmallVectorImpl<ISD::OutputArg> &Outs,
-                        const SmallVectorImpl<SDValue> &OutVals,
-                        const SDLoc &dl, SelectionDAG &DAG) const override;
-
-    SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
-                              SelectionDAG &DAG, SDValue ArgVal,
-                              const SDLoc &dl) const;
-
-    SDValue LowerFormalArguments_Darwin(
-        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
-        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
-    SDValue LowerFormalArguments_64SVR4(
-        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
-        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
-    SDValue LowerFormalArguments_32SVR4(
-        SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
-        const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
-        SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
-
-    SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
-                                       SDValue CallSeqStart,
-                                       ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
-                                       const SDLoc &dl) const;
-
-    SDValue LowerCall_Darwin(SDValue Chain, SDValue Callee,
-                             CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall, bool isPatchPoint,
-                             const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             const SmallVectorImpl<SDValue> &OutVals,
-                             const SmallVectorImpl<ISD::InputArg> &Ins,
-                             const SDLoc &dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite CS) const;
-    SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee,
-                             CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall, bool isPatchPoint,
-                             const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             const SmallVectorImpl<SDValue> &OutVals,
-                             const SmallVectorImpl<ISD::InputArg> &Ins,
-                             const SDLoc &dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite CS) const;
-    SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee,
-                             CallingConv::ID CallConv, bool isVarArg,
-                             bool isTailCall, bool isPatchPoint,
-                             const SmallVectorImpl<ISD::OutputArg> &Outs,
-                             const SmallVectorImpl<SDValue> &OutVals,
-                             const SmallVectorImpl<ISD::InputArg> &Ins,
-                             const SDLoc &dl, SelectionDAG &DAG,
-                             SmallVectorImpl<SDValue> &InVals,
-                             ImmutableCallSite CS) const;
-    SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
-                          CallingConv::ID CallConv, bool isVarArg,
-                          bool isTailCall, bool isPatchPoint,
-                          const SmallVectorImpl<ISD::OutputArg> &Outs,
-                          const SmallVectorImpl<SDValue> &OutVals,
-                          const SmallVectorImpl<ISD::InputArg> &Ins,
-                          const SDLoc &dl, SelectionDAG &DAG,
-                          SmallVectorImpl<SDValue> &InVals,
-                          ImmutableCallSite CS) const;
-
-    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
-
-    SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
-    SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
-                                 DAGCombinerInfo &DCI) const;
-
-    /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
-    /// SETCC with integer subtraction when (1) there is a legal way of doing it
-    /// (2) keeping the result of comparison in GPR has performance benefit.
-    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
-
-    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
-                            int &RefinementSteps, bool &UseOneConstNR,
-                            bool Reciprocal) const override;
-    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
-                             int &RefinementSteps) const override;
-    unsigned combineRepeatedFPDivisors() const override;
-
-    SDValue
-    combineElementTruncationToVectorTruncation(SDNode *N,
-                                               DAGCombinerInfo &DCI) const;
-
-    /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
-    /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
-    /// essentially any shuffle of v8i16 vectors that just inserts one element
-    /// from one vector into the other.
-    SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
-
-    /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
-    /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
-    /// essentially v16i8 vector version of VINSERTH.
-    SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
-
-    // Return whether the call instruction can potentially be optimized to a
-    // tail call. This will cause the optimizers to attempt to move, or
-    // duplicate return instructions to help enable tail call optimizations.
-    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
-    bool hasBitPreservingFPLogic(EVT VT) const override;
-    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
-  }; // end class PPCTargetLowering
-
-  namespace PPC {
-
-    FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
-                             const TargetLibraryInfo *LibInfo);
-
-  } // end namespace PPC
-
-  bool isIntS16Immediate(SDNode *N, int16_t &Imm);
-  bool isIntS16Immediate(SDValue Op, int16_t &Imm);
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+                            SmallVectorImpl<SDValue> &InVals,
+                            ImmutableCallSite CS) const;
+
+      SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+      SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+      SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
+
+      SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
+      SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
+                                  DAGCombinerInfo &DCI) const;
+
+      /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
+      /// SETCC with integer subtraction when (1) there is a legal way of doing it
+      /// (2) keeping the result of comparison in GPR has performance benefit.
+      SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
+      SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                              int &RefinementSteps, bool &UseOneConstNR,
+                              bool Reciprocal) const override;
+      SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                              int &RefinementSteps) const override;
+      unsigned combineRepeatedFPDivisors() const override;
+
+      SDValue
+      combineElementTruncationToVectorTruncation(SDNode *N,
+                                                DAGCombinerInfo &DCI) const;
+
+      /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
+      /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
+      /// essentially any shuffle of v8i16 vectors that just inserts one element
+      /// from one vector into the other.
+      SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
+      /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
+      /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
+      /// essentially v16i8 vector version of VINSERTH.
+      SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
+
+      // Return whether the call instruction can potentially be optimized to a
+      // tail call. This will cause the optimizers to attempt to move, or
+      // duplicate return instructions to help enable tail call optimizations.
+      bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
+      bool hasBitPreservingFPLogic(EVT VT) const override;
+      bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
+    }; // end class PPCTargetLowering
+
+    namespace PPC {
+
+      FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+                              const TargetLibraryInfo *LibInfo);
+
+    } // end namespace PPC
+
+    bool isIntS16Immediate(SDNode *N, int16_t &Imm);
+    bool isIntS16Immediate(SDValue Op, int16_t &Imm);
+
+  } // end namespace llvm
+
+  #endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3394,15 +3394,15 @@
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+  if (Subtarget.isAIXABI())
+    return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
+                                    InVals);
   if (Subtarget.is64BitELFABI())
     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                        InVals);
-  else if (Subtarget.is32BitELFABI())
+  if (Subtarget.is32BitELFABI())
     return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                        InVals);
-
-  // FIXME: We are using this for both AIX and Darwin. We should add appropriate
-  // AIX testing, and rename it appropriately.
   return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, dl, DAG,
                                      InVals);
 }
@@ -6695,6 +6695,22 @@
                     NumBytes, Ins, InVals, CS);
 }
 
+static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
+                                                    bool IsPPC64) {
+  switch (SVT) {
+  default:
+    report_fatal_error("Unexpected value type for formal argument");
+  case MVT::i1:
+  case MVT::i32:
+  case MVT::i64:
+    return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+  case MVT::f32:
+    return &PPC::F4RCRegClass;
+  case MVT::f64:
+    return &PPC::F8RCRegClass;
+  }
+}
+
 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
                    CCState &State) {
@@ -6755,7 +6771,7 @@
     // This includes f64 in 64-bit mode for ABI compatibility.
     State.AllocateStack(IsPPC64 ? 8 : StoreSize, 4);
     if (unsigned Reg = State.AllocateReg(FPR))
-      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::f64, LocInfo));
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     else
       report_fatal_error("Handling of placing parameters on the stack is "
                          "unimplemented!");
@@ -6769,6 +6785,87 @@
   }
 }
 
+SDValue PPCTargetLowering::LowerFormalArguments_AIX(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
+    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
+          CallConv == CallingConv::Fast) &&
+         "Unexpected calling convention!");
+
+  if (isVarArg)
+    report_fatal_error("This call type is unimplemented on AIX.");
+
+  if (getTargetMachine().Options.GuaranteedTailCallOpt)
+    report_fatal_error("Tail call support is unimplemented on AIX.");
+
+  if (useSoftFloat())
+    report_fatal_error("Soft float support is unimplemented on AIX.");
+
+  const PPCSubtarget &Subtarget =
+      static_cast<const PPCSubtarget &>(DAG.getSubtarget());
+  if (Subtarget.hasQPX())
+    report_fatal_error("QPX support is not supported on AIX.");
+  if (Subtarget.hasAltivec())
+    report_fatal_error("Altivec support is unimplemented on AIX.");
+
+  const bool IsPPC64 = Subtarget.isPPC64();
+  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
+  // On AIX a minimum of 8 words is saved to the parameter save area.
+  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
+  CCInfo.AllocateStack(LinkageSize + MinParameterSaveArea, PtrByteSize);
+  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue ArgValue;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    if (VA.isRegLoc()) {
+      EVT ValVT = VA.getValVT();
+      MVT LocVT = VA.getLocVT();
+      MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
+      unsigned VReg =
+          MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
+      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+      if (ValVT == MVT::i1)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
+      // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+      // value to MVT::i64 and then truncate to the correct size.
+      if (IsPPC64 && ValVT == MVT::i32)
+        ArgValue = extendArgForPPC64(Flags, ValVT, DAG, ArgValue, dl);
+
+      InVals.push_back(ArgValue);
+    } else {
+      report_fatal_error("Handling of formal arguments on the stack is "
+                         "unimplemented!");
+    }
+  }
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = LinkageSize + MinParameterSaveArea;
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  // reserved for caller
+  MinReservedArea =
+      EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
+  FuncInfo->setMinReservedArea(MinReservedArea);
+
+  return Chain;
+}
+
 SDValue PPCTargetLowering::LowerCall_AIX(
     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
     bool isTailCall, bool isPatchPoint,
Index: llvm/test/CodeGen/PowerPC/aix_fpr_param.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/aix_fpr_param.ll
+++ llvm/test/CodeGen/PowerPC/aix_fpr_param.ll
@@ -1,8 +1,8 @@
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \
-; RUN: FileCheck --check-prefix=32BIT %s
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
 
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \
-; RUN: FileCheck --check-prefix=64BIT %s
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
 
 @f1 = global float 0.000000e+00, align 4
 @d1 = global double 0.000000e+00, align 8
@@ -14,10 +14,7 @@
   ret void
 }
 
-declare void @test_float(float)
-
-; CHECK:     name: call_test_float{{.*}}
-; CHECK-NOT: {{^}}name:{{.*}}
+; CHECK-LABEL: name: call_test_float{{.*}}
 
 ; 32BIT:      renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1)
@@ -31,6 +28,22 @@
 ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_float>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
+define void @test_float(float %f) {
+entry:
+  %f.addr = alloca float, align 4
+  store float %f, float* %f.addr, align 4
+  ret void
+}
+
+; CHECK-LABEL: name: test_float{{.*}}
+
+; CHECK:      liveins:
+; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' }
+; CHECK:      body:             |
+; CHECK-NEXT:   bb.0.entry:
+; CHECK-NEXT:     liveins: $f1
+; CHECK:          STFS killed renamable $f1, 0, %stack.0.f.addr :: (store 4 into %ir.f.addr)
+
 define void @call_test_floats() {
 entry:
   %0 = load float, float* @f1, align 4
@@ -38,10 +51,7 @@
   ret void
 }
 
-declare void @test_floats(float, float, float)
-
-; CHECK:     name: call_test_floats{{.*}}
-; CHECK-NOT: {{^}}name:{{.*}}
+; CHECK-LABEL: name: call_test_floats{{.*}}
 
 ; 32BIT:      renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: renamable $f1 = LFS 0, killed renamable $r3 :: (dereferenceable load 4 from @f1)
@@ -59,6 +69,30 @@
 ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_floats>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
+define void @test_floats(float %f1, float %f2, float %f3) {
+entry:
+  %f1.addr = alloca float, align 4
+  %f2.addr = alloca float, align 4
+  %f3.addr = alloca float, align 4
+  store float %f1, float* %f1.addr, align 4
+  store float %f2, float* %f2.addr, align 4
+  store float %f3, float* %f3.addr, align 4
+  ret void
+}
+
+; CHECK-LABEL: name: test_floats{{.*}}
+
+; CHECK:      liveins:
+; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' }
+; CHECK:      body:             |
+; CHECK-NEXT:   bb.0.entry:
+; CHECK-NEXT:     liveins: $f1, $f2, $f3
+; CHECK:          STFS killed renamable $f1, 0, %stack.0.f1.addr :: (store 4 into %ir.f1.addr)
+; CHECK:          STFS killed renamable $f2, 0, %stack.1.f2.addr :: (store 4 into %ir.f2.addr)
+; CHECK:          STFS killed renamable $f3, 0, %stack.2.f3.addr :: (store 4 into %ir.f3.addr)
+
 define void @call_test_double() {
 entry:
   %0 = load double, double* @d1, align 8
@@ -66,10 +100,7 @@
   ret void
 }
 
-declare void @test_double(double)
-
-; CHECK:     name: call_test_double{{.*}}
-; CHECK-NOT: {{^}}name:{{.*}}
+; CHECK-LABEL: name: call_test_double{{.*}}
 
 ; 32BIT:      renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1)
@@ -83,6 +114,22 @@
 ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_double>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
+define void @test_double(double %d) {
+entry:
+  %d.addr = alloca double, align 8
+  store double %d, double* %d.addr, align 8
+  ret void
+}
+
+; CHECK-LABEL: name: test_double{{.*}}
+
+; CHECK:      liveins:
+; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' }
+; CHECK:      body:             |
+; CHECK-NEXT:   bb.0.entry:
+; CHECK-NEXT:     liveins: $f1
+; CHECK:          STFD killed renamable $f1, 0, %stack.0.d.addr :: (store 8 into %ir.d.addr)
+
 define void @call_test_fpr_max() {
 entry:
   %0 = load double, double* @d1, align 8
@@ -90,10 +137,7 @@
   ret void
 }
 
-declare void @test_fpr_max(double, double, double, double, double, double, double, double, double, double, double, double, double)
-
-; CHECK:     name: call_test_fpr_max{{.*}}
-; CHECK-NOT: {{^}}name:{{.*}}
+; CHECK-LABEL: name: call_test_fpr_max{{.*}}
 
 ; 32BIT:      renamable $r3 = LWZtoc @d1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: renamable $f1 = LFD 0, killed renamable $r3 :: (dereferenceable load 8 from @d1)
@@ -131,6 +175,70 @@
 ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_fpr_max>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit killed $f2, implicit killed $f3, implicit killed $f4, implicit killed $f5, implicit killed $f6, implicit killed $f7, implicit killed $f8, implicit killed $f9, implicit killed $f10, implicit killed $f11, implicit killed $f12, implicit killed $f13, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
+define void @test_fpr_max(double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10, double %d11, double %d12, double %d13) {
+entry:
+  %d1.addr = alloca double, align 8
+  %d2.addr = alloca double, align 8
+  %d3.addr = alloca double, align 8
+  %d4.addr = alloca double, align 8
+  %d5.addr = alloca double, align 8
+  %d6.addr = alloca double, align 8
+  %d7.addr = alloca double, align 8
+  %d8.addr = alloca double, align 8
+  %d9.addr = alloca double, align 8
+  %d10.addr = alloca double, align 8
+  %d11.addr = alloca double, align 8
+  %d12.addr = alloca double, align 8
+  %d13.addr = alloca double, align 8
+  store double %d1, double* %d1.addr, align 8
+  store double %d2, double* %d2.addr, align 8
+  store double %d3, double* %d3.addr, align 8
+  store double %d4, double* %d4.addr, align 8
+  store double %d5, double* %d5.addr, align 8
+  store double %d6, double* %d6.addr, align 8
+  store double %d7, double* %d7.addr, align 8
+  store double %d8, double* %d8.addr, align 8
+  store double %d9, double* %d9.addr, align 8
+  store double %d10, double* %d10.addr, align 8
+  store double %d11, double* %d11.addr, align 8
+  store double %d12, double* %d12.addr, align 8
+  store double %d13, double* %d13.addr, align 8
+  ret void
+}
+
+; CHECK-LABEL: name: test_fpr_max{{.*}}
+
+; CHECK:      liveins:
+; CHECK-NEXT: - { reg: '$f1', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f2', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f3', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f4', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f5', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f6', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f7', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f8', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f9', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f10', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f11', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f12', virtual-reg: '' }
+; CHECK-NEXT: - { reg: '$f13', virtual-reg: '' }
+; CHECK:      body:             |
+; CHECK-NEXT:   bb.0.entry:
+; CHECK-NEXT:     liveins: $f1
+; CHECK:          STFD killed renamable $f1, 0, %stack.0.d1.addr :: (store 8 into %ir.d1.addr)
+; CHECK:          STFD killed renamable $f2, 0, %stack.1.d2.addr :: (store 8 into %ir.d2.addr)
+; CHECK:          STFD killed renamable $f3, 0, %stack.2.d3.addr :: (store 8 into %ir.d3.addr)
+; CHECK:          STFD killed renamable $f4, 0, %stack.3.d4.addr :: (store 8 into %ir.d4.addr)
+; CHECK:          STFD killed renamable $f5, 0, %stack.4.d5.addr :: (store 8 into %ir.d5.addr)
+; CHECK:          STFD killed renamable $f6, 0, %stack.5.d6.addr :: (store 8 into %ir.d6.addr)
+; CHECK:          STFD killed renamable $f7, 0, %stack.6.d7.addr :: (store 8 into %ir.d7.addr)
+; CHECK:          STFD killed renamable $f8, 0, %stack.7.d8.addr :: (store 8 into %ir.d8.addr)
+; CHECK:          STFD killed renamable $f9, 0, %stack.8.d9.addr :: (store 8 into %ir.d9.addr)
+; CHECK:          STFD killed renamable $f10, 0, %stack.9.d10.addr :: (store 8 into %ir.d10.addr)
+; CHECK:          STFD killed renamable $f11, 0, %stack.10.d11.addr :: (store 8 into %ir.d11.addr)
+; CHECK:          STFD killed renamable $f12, 0, %stack.11.d12.addr :: (store 8 into %ir.d12.addr)
+; CHECK:          STFD killed renamable $f13, 0, %stack.12.d13.addr :: (store 8 into %ir.d13.addr)
+
 define void @call_test_mix() {
 entry:
   %0 = load float, float* @f1, align 4
@@ -139,10 +247,7 @@
   ret void
 }
 
-declare void @test_mix(float, i32, double, i8 signext)
-
-; CHECK:     name: call_test_mix{{.*}}
-; CHECK-NOT: {{^}}name:{{.*}}
+; CHECK-LABEL: name: call_test_mix{{.*}}
 
 ; 32BIT:      renamable $r3 = LWZtoc @f1, $r2 :: (load 4 from got)
 ; 32BIT-NEXT: renamable $r4 = LWZtoc @d1, $r2 :: (load 4 from got)
@@ -163,3 +268,44 @@
 ; 64BIT-NEXT: $x6 = LI8 97
 ; 64BIT-NEXT: BL8_NOP <mcsymbol .test_mix>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $f1, implicit $x4, implicit $f2, implicit killed $x6, implicit $x2, implicit-def $r1
 ; 64BIT-NEXT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
+
+define void @test_mix(float %f, i32 signext %i, double %d, i8 signext %c) {
+entry:
+  %f.addr = alloca float, align 4
+  %i.addr = alloca i32, align 4
+  %d.addr = alloca double, align 8
+  %c.addr = alloca i8, align 1
+  store float %f, float* %f.addr, align 4
+  store i32 %i, i32* %i.addr, align 4
+  store double %d, double* %d.addr, align 8
+  store i8 %c, i8* %c.addr, align 1
+  ret void
+}
+
+; CHECK-LABEL: name: test_mix{{.*}}
+
+; 32BIT:      liveins:
+; 32BIT-NEXT: - { reg: '$f1', virtual-reg: '' }
+; 32BIT-NEXT: - { reg: '$r4', virtual-reg: '' }
+; 32BIT-NEXT: - { reg: '$f2', virtual-reg: '' }
+; 32BIT-NEXT: - { reg: '$r7', virtual-reg: '' }
+; 32BIT:      body:             |
+; 32BIT-NEXT:   bb.0.entry:
+; 32BIT-NEXT:     liveins: $f1, $f2, $r4, $r7
+; 32BIT:          STFS killed renamable $f1, 0, %stack.0.f.addr :: (store 4 into %ir.f.addr)
+; 32BIT:          STW killed renamable $r4, 0, %stack.1.i.addr :: (store 4 into %ir.i.addr)
+; 32BIT:          STFD killed renamable $f2, 0, %stack.2.d.addr :: (store 8 into %ir.d.addr)
+; 32BIT:          STB killed renamable $r7, 0, %stack.3.c.addr :: (store 1 into %ir.c.addr)
+
+; 64BIT:      liveins:
+; 64BIT-NEXT: - { reg: '$f1', virtual-reg: '' }
+; 64BIT-NEXT: - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT: - { reg: '$f2', virtual-reg: '' }
+; 64BIT-NEXT: - { reg: '$x6', virtual-reg: '' }
+; 64BIT:      body:             |
+; 64BIT-NEXT:   bb.0.entry:
+; 64BIT-NEXT:     liveins: $f1, $f2, $x4, $x6
+; 64BIT:          STFS killed renamable $f1, 0, %stack.0.f.addr :: (store 4 into %ir.f.addr)
+; 64BIT:          STW8 killed renamable $x4, 0, %stack.1.i.addr :: (store 4 into %ir.i.addr)
+; 64BIT:          STFD killed renamable $f2, 0, %stack.2.d.addr :: (store 8 into %ir.d.addr)
+; 64BIT:          STB8 killed renamable $x6, 0, %stack.3.c.addr :: (store 1 into %ir.c.addr)
Index: llvm/test/CodeGen/PowerPC/aix_gpr_param.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/aix_gpr_param.ll
+++ llvm/test/CodeGen/PowerPC/aix_gpr_param.ll
@@ -1,11 +1,17 @@
-; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \
-; RUN: FileCheck --check-prefix=32BIT %s
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,32BIT %s
 
-; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \
-; RUN: FileCheck --check-prefix=64BIT %s
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp -verify-machineinstrs < %s | \
+; RUN: FileCheck --check-prefixes=CHECK,64BIT %s
 
 define void @call_test_char() {
 entry:
+  call void @test_char(i8 signext 97)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_char
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 97
 ; 32BIT: BL_NOP <mcsymbol .test_char>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1
@@ -16,12 +22,37 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_char>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_char(i8 signext 97)
+define void @test_char(i8 signext %c) {
+entry:
+  %c.addr = alloca i8, align 1
+  store i8 %c, i8* %c.addr, align 1
   ret void
 }
 
+; CHECK-LABEL: name: test_char
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT:       body:
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3
+; 32BIT:           STB killed renamable $r3, 0, %stack.0.c.addr :: (store 1 into %ir.c.addr)
+ 
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT:       body:
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3
+; 64BIT:           STB8 killed renamable $x3, 0, %stack.0.c.addr :: (store 1 into %ir.c.addr)
+
 define void @call_test_chars() {
 entry:
+  call void @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_chars
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 97
 ; 32BIT: $r4 = LI 97
@@ -38,12 +69,55 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_chars>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_chars(i8 signext 97, i8 signext 97, i8 signext 97, i8 signext 97)
+define void @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 signext %c4) {
+entry:
+  %c1.addr = alloca i8, align 1
+  %c2.addr = alloca i8, align 1
+  %c3.addr = alloca i8, align 1
+  %c4.addr = alloca i8, align 1
+  store i8 %c1, i8* %c1.addr, align 1
+  store i8 %c2, i8* %c2.addr, align 1
+  store i8 %c3, i8* %c3.addr, align 1
+  store i8 %c4, i8* %c4.addr, align 1
   ret void
 }
 
+; CHECK-LABEL: name: test_chars
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r4', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r5', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r6', virtual-reg: '' }
+; 32BIT:       body:
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3, $r4, $r5, $r6
+; 32BIT:           STB killed renamable $r3, 0, %stack.0.c1.addr :: (store 1 into %ir.c1.addr)
+; 32BIT:           STB killed renamable $r4, 0, %stack.1.c2.addr :: (store 1 into %ir.c2.addr)
+; 32BIT:           STB killed renamable $r5, 0, %stack.2.c3.addr :: (store 1 into %ir.c3.addr)
+; 32BIT:           STB killed renamable $r6, 0, %stack.3.c4.addr :: (store 1 into %ir.c4.addr)
+ 
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x5', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x6', virtual-reg: '' }
+; 64BIT:       body:
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3, $x4, $x5, $x6
+; 64BIT:           STB8 killed renamable $x3, 0, %stack.0.c1.addr :: (store 1 into %ir.c1.addr)
+; 64BIT:           STB8 killed renamable $x4, 0, %stack.1.c2.addr :: (store 1 into %ir.c2.addr)
+; 64BIT:           STB8 killed renamable $x5, 0, %stack.2.c3.addr :: (store 1 into %ir.c3.addr)
+; 64BIT:           STB8 killed renamable $x6, 0, %stack.3.c4.addr :: (store 1 into %ir.c4.addr)
+
 define void @call_test_chars_mix() {
 entry:
+  call void @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_chars_mix
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 97
 ; 32BIT: $r4 = LI 225
@@ -60,12 +134,55 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_chars_mix>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_chars_mix(i8 signext 97, i8 zeroext -31, i8 zeroext 97, i8 signext -31)
+define void @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3, i8 signext %c4) {
+entry:
+  %c1.addr = alloca i8, align 1
+  %c2.addr = alloca i8, align 1
+  %c3.addr = alloca i8, align 1
+  %c4.addr = alloca i8, align 1
+  store i8 %c1, i8* %c1.addr, align 1
+  store i8 %c2, i8* %c2.addr, align 1
+  store i8 %c3, i8* %c3.addr, align 1
+  store i8 %c4, i8* %c4.addr, align 1
   ret void
 }
 
+; CHECK-LABEL: name: test_chars_mix
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r4', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r5', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r6', virtual-reg: '' }
+; 32BIT:       body:
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3, $r4, $r5, $r6
+; 32BIT:           STB killed renamable $r3, 0, %stack.0.c1.addr :: (store 1 into %ir.c1.addr)
+; 32BIT:           STB killed renamable $r4, 0, %stack.1.c2.addr :: (store 1 into %ir.c2.addr)
+; 32BIT:           STB killed renamable $r5, 0, %stack.2.c3.addr :: (store 1 into %ir.c3.addr)
+; 32BIT:           STB killed renamable $r6, 0, %stack.3.c4.addr :: (store 1 into %ir.c4.addr)
+ 
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x5', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x6', virtual-reg: '' }
+; 64BIT:       body:
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3, $x4, $x5, $x6
+; 64BIT:           STB8 killed renamable $x3, 0, %stack.0.c1.addr :: (store 1 into %ir.c1.addr)
+; 64BIT:           STB8 killed renamable $x4, 0, %stack.1.c2.addr :: (store 1 into %ir.c2.addr)
+; 64BIT:           STB8 killed renamable $x5, 0, %stack.2.c3.addr :: (store 1 into %ir.c3.addr)
+; 64BIT:           STB8 killed renamable $x6, 0, %stack.3.c4.addr :: (store 1 into %ir.c4.addr)
+
 define void @call_test_int() {
 entry:
+  call void @test_int(i32 1)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_int
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 1
 ; 32BIT: BL_NOP <mcsymbol .test_int>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1
@@ -76,42 +193,134 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_int>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_int(i32 1)
+define void @test_int(i32 signext %i) {
+entry:
+  %i.addr = alloca i32, align 4
+  store i32 %i, i32* %i.addr, align 4
   ret void
 }
 
+; CHECK-LABEL: name: test_int
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT:       body:             |
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3
+; 32BIT:           STW killed renamable $r3, 0, %stack.0.i.addr :: (store 4 into %ir.i.addr)
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3
+; 64BIT:           STW8 killed renamable $x3, 0, %stack.0.i.addr :: (store 4 into %ir.i.addr)
+
 define void @call_test_ints() {
 entry:
+  call void @test_ints(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_ints
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 1
-; 32BIT: $r4 = LI 1
-; 32BIT: $r5 = LI 1
-; 32BIT: $r6 = LI 1
-; 32BIT: $r7 = LI 1
-; 32BIT: $r8 = LI 1
-; 32BIT: $r9 = LI 1
-; 32BIT: $r10 = LI 1
+; 32BIT: $r4 = LI 2
+; 32BIT: $r5 = LI 3
+; 32BIT: $r6 = LI 4
+; 32BIT: $r7 = LI 5
+; 32BIT: $r8 = LI 6
+; 32BIT: $r9 = LI 7
+; 32BIT: $r10 = LI 8
 ; 32BIT: BL_NOP <mcsymbol .test_ints>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1
 ; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; 64BIT: $x3 = LI8 1
-; 64BIT: $x4 = LI8 1
-; 64BIT: $x5 = LI8 1
-; 64BIT: $x6 = LI8 1
-; 64BIT: $x7 = LI8 1
-; 64BIT: $x8 = LI8 1
-; 64BIT: $x9 = LI8 1
-; 64BIT: $x10 = LI8 1
+; 64BIT: $x4 = LI8 2
+; 64BIT: $x5 = LI8 3
+; 64BIT: $x6 = LI8 4
+; 64BIT: $x7 = LI8 5
+; 64BIT: $x8 = LI8 6
+; 64BIT: $x9 = LI8 7
+; 64BIT: $x10 = LI8 8
 ; 64BIT: BL8_NOP <mcsymbol .test_ints>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_ints(i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1)
+define void @test_ints(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i32, align 4
+  %e.addr = alloca i32, align 4
+  %f.addr = alloca i32, align 4
+  %g.addr = alloca i32, align 4
+  %h.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  store i32 %d, i32* %d.addr, align 4
+  store i32 %e, i32* %e.addr, align 4
+  store i32 %f, i32* %f.addr, align 4
+  store i32 %g, i32* %g.addr, align 4
+  store i32 %h, i32* %h.addr, align 4
   ret void
 }
 
+; CHECK-LABEL: name: test_ints
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r4', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r5', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r6', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r7', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r8', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r9', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r10', virtual-reg: '' }
+; 32BIT:       body:             |
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT:           STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr)
+; 32BIT:           STW killed renamable $r4, 0, %stack.1.b.addr :: (store 4 into %ir.b.addr)
+; 32BIT:           STW killed renamable $r5, 0, %stack.2.c.addr :: (store 4 into %ir.c.addr)
+; 32BIT:           STW killed renamable $r6, 0, %stack.3.d.addr :: (store 4 into %ir.d.addr)
+; 32BIT:           STW killed renamable $r7, 0, %stack.4.e.addr :: (store 4 into %ir.e.addr)
+; 32BIT:           STW killed renamable $r8, 0, %stack.5.f.addr :: (store 4 into %ir.f.addr)
+; 32BIT:           STW killed renamable $r9, 0, %stack.6.g.addr :: (store 4 into %ir.g.addr)
+; 32BIT:           STW killed renamable $r10, 0, %stack.7.h.addr :: (store 4 into %ir.h.addr)
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x5', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x6', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x7', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x8', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x9', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x10', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
+; 64BIT:           STW8 killed renamable $x3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr)
+; 64BIT:           STW8 killed renamable $x4, 0, %stack.1.b.addr :: (store 4 into %ir.b.addr)
+; 64BIT:           STW8 killed renamable $x5, 0, %stack.2.c.addr :: (store 4 into %ir.c.addr)
+; 64BIT:           STW8 killed renamable $x6, 0, %stack.3.d.addr :: (store 4 into %ir.d.addr)
+; 64BIT:           STW8 killed renamable $x7, 0, %stack.4.e.addr :: (store 4 into %ir.e.addr)
+; 64BIT:           STW8 killed renamable $x8, 0, %stack.5.f.addr :: (store 4 into %ir.f.addr)
+; 64BIT:           STW8 killed renamable $x9, 0, %stack.6.g.addr :: (store 4 into %ir.g.addr)
+; 64BIT:           STW8 killed renamable $x10, 0, %stack.7.h.addr :: (store 4 into %ir.h.addr)
+
 define void @call_test_ints_64bit() {
 entry:
+  call void @test_ints_64bit(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_ints_64bit
+
 ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; 64BIT: renamable $x3 = LI8 1
 ; 64BIT: renamable $x5 = RLDICR killed renamable $x3, 31, 32
@@ -125,12 +334,58 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_ints_64bit>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit killed $x4, implicit $x5, implicit killed $x6, implicit killed $x7, implicit killed $x8, implicit killed $x9, implicit killed $x10, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_ints_64bit(i32 signext 1, i32 zeroext 1, i32 zeroext 2147483648, i32 signext -2147483648, i32 signext 1, i32 signext 1, i32 signext 1, i32 signext 1)
+define void @test_ints_64bit(i32 signext %a, i32 zeroext %b, i32 zeroext %c, i32 signext %d, i32 signext %e, i32 signext %f, i32 signext %g, i32 signext %h) {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  %c.addr = alloca i32, align 4
+  %d.addr = alloca i32, align 4
+  %e.addr = alloca i32, align 4
+  %f.addr = alloca i32, align 4
+  %g.addr = alloca i32, align 4
+  %h.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store i32 %b, i32* %b.addr, align 4
+  store i32 %c, i32* %c.addr, align 4
+  store i32 %d, i32* %d.addr, align 4
+  store i32 %e, i32* %e.addr, align 4
+  store i32 %f, i32* %f.addr, align 4
+  store i32 %g, i32* %g.addr, align 4
+  store i32 %h, i32* %h.addr, align 4
   ret void
 }
 
+; CHECK-LABEL: name: test_ints_64bit
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x5', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x6', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x7', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x8', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x9', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x10', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3, $x4, $x5, $x6, $x7, $x8, $x9, $x10
+; 64BIT:           STW8 killed renamable $x3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr)
+; 64BIT:           STW8 killed renamable $x4, 0, %stack.1.b.addr :: (store 4 into %ir.b.addr)
+; 64BIT:           STW8 killed renamable $x5, 0, %stack.2.c.addr :: (store 4 into %ir.c.addr)
+; 64BIT:           STW8 killed renamable $x6, 0, %stack.3.d.addr :: (store 4 into %ir.d.addr)
+; 64BIT:           STW8 killed renamable $x7, 0, %stack.4.e.addr :: (store 4 into %ir.e.addr)
+; 64BIT:           STW8 killed renamable $x8, 0, %stack.5.f.addr :: (store 4 into %ir.f.addr)
+; 64BIT:           STW8 killed renamable $x9, 0, %stack.6.g.addr :: (store 4 into %ir.g.addr)
+; 64BIT:           STW8 killed renamable $x10, 0, %stack.7.h.addr :: (store 4 into %ir.h.addr)
+
 define void @call_test_i1() {
 entry:
+  call void @test_i1(i1 1)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_i1
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 1
 ; 32BIT: BL_NOP <mcsymbol .test_i1>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit $r2, implicit-def $r1
@@ -141,30 +396,118 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_i1>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_i1(i1 1)
+define void @test_i1(i1 %a) {
+entry:
+  %a.addr = alloca i1, align 1
+  store i1 %a, i1* %a.addr, align 1
   ret void
 }
 
+; CHECK-LABEL: name: test_i1
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT:       body:             |
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3
+; 32BIT:           renamable $r3 = RLWINM killed renamable $r3, 0, 31, 31
+; 32BIT-NEXT:      STB killed renamable $r3, 0, %stack.0.a.addr :: (store 1 into %ir.a.addr)
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3
+; 64BIT:           renamable $x3 = RLDICL killed renamable $x3, 0, 63
+; 64BIT-NEXT:      STB8 killed renamable $x3, 0, %stack.0.a.addr :: (store 1 into %ir.a.addr)
+
 define void @call_test_i64() {
 entry:
+  call void @test_i64(i64 1, i64 2, i64 3, i64 4)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_i64
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: $r3 = LI 0
 ; 32BIT: $r4 = LI 1
-; 32BIT: BL_NOP <mcsymbol .test_i64>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit $r2, implicit-def $r1
+; 32BIT: $r5 = LI 0
+; 32BIT: $r6 = LI 2
+; 32BIT: $r7 = LI 0
+; 32BIT: $r8 = LI 3
+; 32BIT: $r9 = LI 0
+; 32BIT: $r10 = LI 4
+; 32BIT: BL_NOP <mcsymbol .test_i64>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit killed $r3, implicit killed $r4, implicit killed $r5, implicit killed $r6, implicit killed $r7, implicit killed $r8, implicit killed $r9, implicit killed $r10, implicit $r2, implicit-def $r1
 ; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
 
 ; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
 ; 64BIT: $x3 = LI8 1
-; 64BIT: BL8_NOP <mcsymbol .test_i64>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1
+; 64BIT: $x4 = LI8 2
+; 64BIT: $x5 = LI8 3
+; 64BIT: $x6 = LI8 4
+; 64BIT: BL8_NOP <mcsymbol .test_i64>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit killed $x5, implicit killed $x6, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  call void @test_i64(i64 1)
+define void @test_i64(i64 %a, i64 %b, i64 %c, i64 %d) {
+entry:
+  %a.addr = alloca i64, align 8
+  %b.addr = alloca i64, align 8
+  %c.addr = alloca i64, align 8
+  %d.addr = alloca i64, align 8
+  store i64 %a, i64* %a.addr, align 8
+  store i64 %b, i64* %b.addr, align 8
+  store i64 %c, i64* %c.addr, align 8
+  store i64 %d, i64* %d.addr, align 8
   ret void
 }
 
+; CHECK-LABEL: name: test_i64
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r4', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r5', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r6', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r7', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r8', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r9', virtual-reg: '' }
+; 32BIT-NEXT:  - { reg: '$r10', virtual-reg: '' }
+; 32BIT:       body:             |
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10
+; 32BIT:           STW killed renamable $r4, 4, %stack.0.a.addr :: (store 4 into %ir.a.addr + 4)
+; 32BIT:           STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr, align 8)
+; 32BIT:           STW killed renamable $r6, 4, %stack.1.b.addr :: (store 4 into %ir.b.addr + 4)
+; 32BIT:           STW killed renamable $r5, 0, %stack.1.b.addr :: (store 4 into %ir.b.addr, align 8)
+; 32BIT:           STW killed renamable $r8, 4, %stack.2.c.addr :: (store 4 into %ir.c.addr + 4)
+; 32BIT:           STW killed renamable $r7, 0, %stack.2.c.addr :: (store 4 into %ir.c.addr, align 8)
+; 32BIT:           STW killed renamable $r10, 4, %stack.3.d.addr :: (store 4 into %ir.d.addr + 4)
+; 32BIT:           STW killed renamable $r9, 0, %stack.3.d.addr :: (store 4 into %ir.d.addr, align 8)
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x4', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x5', virtual-reg: '' }
+; 64BIT-NEXT:  - { reg: '$x6', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3, $x4, $x5, $x6
+; 64BIT:           STD killed renamable $x3, 0, %stack.0.a.addr :: (store 8 into %ir.a.addr)
+; 64BIT:           STD killed renamable $x4, 0, %stack.1.b.addr :: (store 8 into %ir.b.addr)
+; 64BIT:           STD killed renamable $x5, 0, %stack.2.c.addr :: (store 8 into %ir.c.addr)
+; 64BIT:           STD killed renamable $x6, 0, %stack.3.d.addr :: (store 8 into %ir.d.addr)
+
 define void @call_test_int_ptr() {
 entry:
   %b = alloca i32, align 4
+  store i32 0, i32* %b, align 4
+  call void @test_int_ptr(i32* %b)
+  ret void
+}
+
+; CHECK-LABEL: name: call_test_int_ptr
+
 ; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
 ; 32BIT: renamable $r3 = ADDI %stack.0.b, 0
 ; 32BIT: BL_NOP <mcsymbol .test_int_ptr>, csr_aix32, implicit-def dead $lr, implicit $rm, implicit $r3, implicit $r2, implicit-def $r1
@@ -175,25 +518,25 @@
 ; 64BIT: BL8_NOP <mcsymbol .test_int_ptr>, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit $x3, implicit $x2, implicit-def $r1
 ; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
 
-  store i32 0, i32* %b, align 4
-  call void @test_int_ptr(i32* %b)
+define void @test_int_ptr(i32* %a) {
+entry:
+  %a.addr = alloca i32*, align 8
+  store i32* %a, i32** %a.addr, align 8
   ret void
 }
 
-declare void @test_char(i8 signext)
-
-declare void @test_chars(i8 signext, i8 signext, i8 signext, i8 signext)
-
-declare void @test_chars_mix(i8 signext, i8 zeroext, i8 zeroext, i8 signext)
-
-declare void @test_int(i32)
-
-declare void @test_ints(i32, i32, i32, i32, i32, i32, i32, i32)
-
-declare void @test_ints_64bit(i32 signext, i32 zeroext, i32 zeroext, i32 signext, i32 signext, i32 signext, i32 signext, i32 signext)
-
-declare void @test_i1(i1)
-
-declare void @test_i64(i64)
-
-declare void @test_int_ptr(i32*)
+; CHECK-LABEL: name: test_int_ptr
+
+; 32BIT:       liveins:
+; 32BIT-NEXT:  - { reg: '$r3', virtual-reg: '' }
+; 32BIT:       body:             |
+; 32BIT-NEXT:    bb.0.entry:
+; 32BIT-NEXT:      liveins: $r3
+; 32BIT:           STW killed renamable $r3, 0, %stack.0.a.addr :: (store 4 into %ir.a.addr, align 8)
+
+; 64BIT:       liveins:
+; 64BIT-NEXT:  - { reg: '$x3', virtual-reg: '' }
+; 64BIT:       body:             |
+; 64BIT-NEXT:    bb.0.entry:
+; 64BIT-NEXT:      liveins: $x3
+; 64BIT:           STD killed renamable $x3, 0, %stack.0.a.addr :: (store 8 into %ir.a.addr)