Index: lib/Target/PowerPC/P9InstrResources.td =================================================================== --- lib/Target/PowerPC/P9InstrResources.td +++ lib/Target/PowerPC/P9InstrResources.td @@ -711,7 +711,8 @@ LXV, LXVX, LXSD, - DFLOADf64 + DFLOADf64, + XFLOADf64 )>; // 4 Cycle load uses a single slice. @@ -751,7 +752,10 @@ LXSSPX, LXSIWAX, LXSSP, - DFLOADf32 + DFLOADf32, + XFLOADf32, + LIWAX, + LIWZX )>; // Cracked Load that requires the PM resource. @@ -781,7 +785,10 @@ STXSSPX, STXSIWX, DFSTOREf32, - DFSTOREf64 + DFSTOREf64, + XFSTOREf32, + XFSTOREf64, + STIWX )>; // Store operation that requires the whole superslice. Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -282,6 +282,9 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + // Heuristic to choose between a VSX instruction and a FP instruction. + bool targetInstrHeuristic(MachineInstr &MI) const; + // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1984,29 +1984,13 @@ return makeArrayRef(TargetFlags); } -bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - auto &MBB = *MI.getParent(); - auto DL = MI.getDebugLoc(); - switch (MI.getOpcode()) { - case TargetOpcode::LOAD_STACK_GUARD: { - assert(Subtarget.isTargetLinux() && - "Only Linux target is expected to contain LOAD_STACK_GUARD"); - const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; - const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; - MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(Offset) - .addReg(Reg); - return true; - } - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: { - assert(Subtarget.hasP9Vector() && - "Invalid D-Form Pseudo-ops on non-P9 target."); - assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && - "D-form op must have register and immediate operands"); +// Heuristic to choose between a VSX instruction and a FP instruction. The VSX +// versions have the advantage of a full 64-register target whereas the FP ones +// have the advantage of lower latency and higher throughput. So what we are +// after is using the faster instructions in low register pressure situations +// and using the larger register file in high register pressure situations. +// using the larger register file in high register pressure situations. +bool PPCInstrInfo::targetInstrHeuristic(MachineInstr &MI) const { unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: @@ -2025,7 +2009,38 @@ UpperOpcode = PPC::STXSD; LowerOpcode = PPC::STFD; break; + case PPC::XFLOADf32: + UpperOpcode = PPC::LXSSPX; + LowerOpcode = PPC::LFSX; + break; + case PPC::XFLOADf64: + UpperOpcode = PPC::LXSDX; + LowerOpcode = PPC::LFDX; + break; + case PPC::XFSTOREf32: + UpperOpcode = PPC::STXSSPX; + LowerOpcode = PPC::STFSX; + break; + case PPC::XFSTOREf64: + UpperOpcode = PPC::STXSDX; + LowerOpcode = PPC::STFDX; + break; + case PPC::LIWAX: + UpperOpcode = PPC::LXSIWAX; + LowerOpcode = PPC::LFIWAX; + break; + case PPC::LIWZX: + UpperOpcode = PPC::LXSIWZX; + LowerOpcode = PPC::LFIWZX; + break; + case PPC::STIWX: + UpperOpcode = PPC::STXSIWX; + LowerOpcode = PPC::STFIWX; + break; + default: + llvm_unreachable("Unknown Operation!"); } + unsigned TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || @@ -2035,6 +2050,46 @@ Opcode = UpperOpcode; MI.setDesc(get(Opcode)); return true; +} + +bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + auto &MBB = *MI.getParent(); + auto DL = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: { + assert(Subtarget.hasP9Vector() && + "Invalid D-Form Pseudo-ops on Pre-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); + return targetInstrHeuristic(MI); + } + case PPC::XFLOADf32: + case PPC::XFLOADf64: + case PPC::XFSTOREf32: + case PPC::XFSTOREf64: + case PPC::LIWAX: + case PPC::LIWZX: + case PPC::STIWX: { + assert(Subtarget.hasP8Vector() && + "Invalid X-Form Pseudo-ops on Pre-P8 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && + "X-form op must have register and register operands"); + return targetInstrHeuristic(MI); } case PPC::SPILLTOVSR_LD: { unsigned TargetReg = MI.getOperand(0).getReg(); Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -1994,7 +1994,7 @@ def STFIWX: XForm_28<31, 983, (outs), (ins f8rc:$frS, memrr:$dst), "stfiwx $frS, $dst", IIC_LdStSTFD, [(PPCstfiwx f64:$frS, xoaddr:$dst)]>; - + def STFSX : XForm_28<31, 663, (outs), (ins f4rc:$frS, memrr:$dst), "stfsx $frS, $dst", IIC_LdStSTFD, [(store f32:$frS, xaddr:$dst)]>; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1215,32 +1215,23 @@ let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), - "lxsspx $XT, $src", IIC_LdStLFD, - [(set f32:$XT, (load xoaddr:$src))]>; + "lxsspx $XT, $src", IIC_LdStLFD, []>; def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwax $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + "lxsiwax $XT, $src", IIC_LdStLFD, []>; def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwzx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + "lxsiwzx $XT, $src", IIC_LdStLFD, []>; } // mayLoad // VSX scalar stores introduced in ISA 2.07 let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), - "stxsspx $XT, $dst", IIC_LdStSTFD, - [(store f32:$XT, xoaddr:$dst)]>; + "stxsspx $XT, $dst", IIC_LdStSTFD, []>; def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), - "stxsiwx $XT, $dst", IIC_LdStSTFD, - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; } // mayStore } // UseVSXReg = 1 - def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; - def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))), - (f32 (LXSSPX xoaddr:$src))>; def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; @@ -2870,6 +2861,39 @@ (f32 (DFLOADf32 ixaddr:$src))>; } // end HasP9Vector, AddedComplexity +let AddedComplexity = 400, Predicates = [HasP8Vector] in { + let isPseudo = 1 in { + def XFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrr:$src), + "#XFLOADf32", + [(set f32:$XT, (load xoaddr:$src))]>; + def XFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrr:$dst), + "#XFSTOREf32", + [(store f32:$XT, xoaddr:$dst)]>; + def XFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#XFLOADf64", + [(set f64:$XT, (load xoaddr:$src))]>; + def XFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst), + "#XFSTOREf64", + [(store f64:$XT, xoaddr:$dst)]>; + + // Load VSX/Floating as Integer Word Algebraic/Zero Indexed + def LIWAX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWAX", + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + def LIWZX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWZX", + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + def STIWX : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst), + "#STIWX", + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } // end isPseudo + + def : Pat<(f64 (extloadf32 xoaddr:$src)), + (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; + def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))), + (f32 (XFLOADf32 xoaddr:$src))>; +} // end HasP8Vector, AddedComplexity + let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { @@ -3040,10 +3064,10 @@ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; @@ -3061,19 +3085,19 @@ } let Predicates = [HasVSX, NoP9Vector] in { - // Load-and-splat with fp-to-int conversion (using X-Form VSX loads). + // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (LXSSPX xoaddr:$A), VSFRC)), 0))>; + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (LXSSPX xoaddr:$A), VSFRC)), 0))>; + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; } // Big endian, available on all targets with VSX Index: lib/Target/PowerPC/PPCVSXSwapRemoval.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -353,6 +353,8 @@ break; case PPC::LXSDX: case PPC::LXSSPX: + case PPC::XFLOADf64: + case PPC::XFLOADf32: // A load of a floating-point value into the high-order half of // a vector register is safe, provided that we introduce a swap // following the load, which will be done by the SUBREG_TO_REG Index: test/CodeGen/PowerPC/branch_coalesce.ll =================================================================== --- test/CodeGen/PowerPC/branch_coalesce.ll +++ test/CodeGen/PowerPC/branch_coalesce.ll @@ -15,8 +15,8 @@ ; CHECK-NOT: beq ; CHECK-DAG: addi [[LD1BASE:[0-9]+]], [[LD1REG]] ; CHECK-DAG: addi [[LD2BASE:[0-9]+]], [[LD2REG]] -; CHECK-DAG: lxsdx 1, 0, [[LD1BASE]] -; CHECK-DAG: lxsdx 3, 0, [[LD2BASE]] +; CHECK-DAG: lfdx 1, 0, [[LD1BASE]] +; CHECK-DAG: lfdx 3, 0, [[LD2BASE]] ; CHECK: .LBB[[LAB1]] ; CHECK: xsadddp 0, 1, 2 ; CHECK: xsadddp 1, 0, 3 @@ -33,7 +33,7 @@ ; CHECK-NOCOALESCE-NEXT: .LBB0_3: # %entry ; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_1@toc@ha ; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; CHECK-NOCOALESCE-NEXT: lxsdx 3, 0, 3 +; CHECK-NOCOALESCE-NEXT: lfdx 3, 0, 3 ; CHECK-NOCOALESCE-NEXT: .LBB0_4: # %entry ; CHECK-NOCOALESCE-NEXT: xsadddp 0, 1, 2 ; CHECK-NOCOALESCE-NEXT: xsadddp 1, 0, 3 @@ -41,7 +41,7 @@ ; CHECK-NOCOALESCE-NEXT: .LBB0_5: # %entry ; CHECK-NOCOALESCE-NEXT: addis 3, 2, .LCPI0_0@toc@ha ; CHECK-NOCOALESCE-NEXT: addi 3, 3, .LCPI0_0@toc@l -; CHECK-NOCOALESCE-NEXT: lxsdx 1, 0, 3 +; CHECK-NOCOALESCE-NEXT: lfdx 1, 0, 3 ; CHECK-NOCOALESCE-NEXT: beq 0, .LBB0_2 ; CHECK-NOCOALESCE-NEXT: .LBB0_6: # %entry ; CHECK-NOCOALESCE-NEXT: xxlxor 2, 2, 2 Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -1485,10 +1485,10 @@ ; P9BE: xvcvspsxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspsxws v2, [[REG1]] -; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } @@ -1667,20 +1667,20 @@ ; P9LE: xvcvdpsp ; P9LE: vmrgew ; P9LE: xvcvspsxws v2 -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp ; P8BE: xvcvdpsp ; P8BE: vmrgew ; P8BE: xvcvspsxws v2 -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -1741,9 +1741,9 @@ ; P9LE: vmrgew ; P9LE: xvcvspsxws v2 ; P8BE: lfdux -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp @@ -1751,9 +1751,9 @@ ; P8BE: vmrgew ; P8BE: xvcvspsxws v2 ; P8LE: lfdux -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -1814,9 +1814,9 @@ ; P9LE: vmrgew ; P9LE: xvcvspsxws v2 ; P8BE: lfdux -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp @@ -1824,9 +1824,9 @@ ; P8BE: vmrgew ; P8BE: xvcvspsxws v2 ; P8LE: lfdux -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -1880,11 +1880,11 @@ ; P9LE: xscvdpsxws ; P9LE: xxspltw ; P9LE: blr -; P8BE: lxsdx +; P8BE: lfdx ; P8BE: xscvdpsxws ; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsdx +; P8LE: lfdx ; P8LE: xscvdpsxws ; P8LE: xxspltw ; P8LE: blr @@ -2645,10 +2645,10 @@ ; P9BE: xvcvspuxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspuxws v2, [[REG1]] -; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } @@ -2827,20 +2827,20 @@ ; P9LE: xvcvdpsp ; P9LE: vmrgew ; P9LE: xvcvspuxws v2 -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp ; P8BE: xvcvdpsp ; P8BE: vmrgew ; P8BE: xvcvspuxws v2 -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -2901,9 +2901,9 @@ ; P9LE: vmrgew ; P9LE: xvcvspuxws v2 ; P8BE: lfdux -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp @@ -2911,9 +2911,9 @@ ; P8BE: vmrgew ; P8BE: xvcvspuxws v2 ; P8LE: lfdux -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -2974,9 +2974,9 @@ ; P9LE: vmrgew ; P9LE: xvcvspuxws v2 ; P8BE: lfdux -; P8BE: lxsdx -; P8BE: lxsdx -; P8BE: lxsdx +; P8BE: lfdx +; P8BE: lfdx +; P8BE: lfdx ; P8BE: xxmrghd ; P8BE: xxmrghd ; P8BE: xvcvdpsp @@ -2984,9 +2984,9 @@ ; P8BE: vmrgew ; P8BE: xvcvspuxws v2 ; P8LE: lfdux -; P8LE: lxsdx -; P8LE: lxsdx -; P8LE: lxsdx +; P8LE: lfdx +; P8LE: lfdx +; P8LE: lfdx ; P8LE: xxmrghd ; P8LE: xxmrghd ; P8LE: xvcvdpsp @@ -3040,11 +3040,11 @@ ; P9LE: xscvdpuxws ; P9LE: xxspltw ; P9LE: blr -; P8BE: lxsdx +; P8BE: lfdx ; P8BE: xscvdpuxws ; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsdx +; P8LE: lfdx ; P8LE: xscvdpuxws ; P8LE: xxspltw ; P8LE: blr @@ -3508,13 +3508,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3546,13 +3546,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3591,13 +3591,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3636,13 +3636,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3693,11 +3693,11 @@ ; P9LE-NEXT: xscvdpsxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr -; P8BE: lxsspx +; P8BE: lfsx ; P8BE-NEXT: xscvdpsxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr -; P8LE: lxsspx +; P8LE: lfsx ; P8LE-NEXT: xscvdpsxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr @@ -4412,13 +4412,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4450,13 +4450,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4495,13 +4495,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4540,13 +4540,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4597,11 +4597,11 @@ ; P9LE-NEXT: xscvdpuxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr -; P8BE: lxsspx +; P8BE: lfsx ; P8BE-NEXT: xscvdpuxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr -; P8LE: lxsspx +; P8LE: lfsx ; P8LE-NEXT: xscvdpuxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr Index: test/CodeGen/PowerPC/direct-move-profit.ll =================================================================== --- test/CodeGen/PowerPC/direct-move-profit.ll +++ test/CodeGen/PowerPC/direct-move-profit.ll @@ -17,7 +17,7 @@ ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa -; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK: lfiwax [[REG:[0-9]+]], {{.*}} ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa ; CHECK: xscvsxdsp {{.*}}, [[REG]] @@ -40,7 +40,7 @@ ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa -; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK: lfiwax [[REG:[0-9]+]], {{.*}} ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa ; CHECK: xscvsxdsp {{.*}}, [[REG]] Index: test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll =================================================================== --- test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -1034,10 +1034,10 @@ define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { entry: ; CHECK-LABEL: insertVarF -; CHECK: stxsspx 1, +; CHECK: stfsx 1, ; CHECK: lxv ; CHECK-BE-LABEL: insertVarF -; CHECK-BE: stxsspx 1, +; CHECK-BE: stfsx 1, ; CHECK-BE: lxv %vecins = insertelement <4 x float> %a, float %f, i32 %el ret <4 x float> %vecins Index: test/CodeGen/PowerPC/ppc64le-smallarg.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -43,7 +43,7 @@ } ; CHECK: @callee2 ; CHECK: addi [[TOCREG:[0-9]+]], 1, 136 -; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] +; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] ; CHECK: blr define void @caller2() { @@ -54,7 +54,7 @@ } ; CHECK: @caller2 ; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136 -; CHECK: stxsspx {{[0-9]+}}, 0, [[TOCOFF]] +; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]] ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) Index: test/CodeGen/PowerPC/pr25157-peephole.ll =================================================================== --- test/CodeGen/PowerPC/pr25157-peephole.ll +++ test/CodeGen/PowerPC/pr25157-peephole.ll @@ -57,7 +57,7 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lxsspx +; CHECK: lfsx ; CHECK: xxspltd ; CHECK: stxvd2x ; CHECK-NOT: xxswapd Index: test/CodeGen/PowerPC/pr25157.ll =================================================================== --- test/CodeGen/PowerPC/pr25157.ll +++ test/CodeGen/PowerPC/pr25157.ll @@ -57,6 +57,6 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lxsspx +; CHECK: lfsx ; CHECK-P9-LABEL: @aercalc_ ; CHECK-P9: lfs Index: test/CodeGen/PowerPC/pr30715.ll =================================================================== --- test/CodeGen/PowerPC/pr30715.ll +++ test/CodeGen/PowerPC/pr30715.ll @@ -67,7 +67,7 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body -; CHECK: stxsdx +; CHECK: stfdx ; CHECK: lxvd2x } Index: test/CodeGen/PowerPC/select-addrRegRegOnly.ll =================================================================== --- test/CodeGen/PowerPC/select-addrRegRegOnly.ll +++ test/CodeGen/PowerPC/select-addrRegRegOnly.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: testSingleAccess: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: addi 3, 3, 8 -; CHECK-NEXT: lxsiwax 0, 0, 3 +; CHECK-NEXT: lfiwax 0, 0, 3 ; CHECK-NEXT: xscvsxdsp 1, 0 ; CHECK-NEXT: blr entry: Index: test/CodeGen/PowerPC/select_const.ll =================================================================== --- test/CodeGen/PowerPC/select_const.ll +++ test/CodeGen/PowerPC/select_const.ll @@ -652,7 +652,7 @@ ; ISEL-NEXT: addi 4, 4, .LCPI34_0@toc@l ; ISEL-NEXT: addi 3, 3, .LCPI34_1@toc@l ; ISEL-NEXT: isel 3, 3, 4, 1 -; ISEL-NEXT: lxsdx 1, 0, 3 +; ISEL-NEXT: lfdx 1, 0, 3 ; ISEL-NEXT: blr ; ; NO_ISEL-LABEL: sel_constants_fadd_constant: @@ -667,7 +667,7 @@ ; NO_ISEL-NEXT: ori 3, 4, 0 ; NO_ISEL-NEXT: b .LBB34_2 ; NO_ISEL-NEXT: .LBB34_2: -; NO_ISEL-NEXT: lxsdx 1, 0, 3 +; NO_ISEL-NEXT: lfdx 1, 0, 3 ; NO_ISEL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = fadd double %sel, 5.1 @@ -683,7 +683,7 @@ ; ISEL-NEXT: addi 4, 4, .LCPI35_0@toc@l ; ISEL-NEXT: addi 3, 3, .LCPI35_1@toc@l ; ISEL-NEXT: isel 3, 3, 4, 1 -; ISEL-NEXT: lxsdx 1, 0, 3 +; ISEL-NEXT: lfdx 1, 0, 3 ; ISEL-NEXT: blr ; ; NO_ISEL-LABEL: sel_constants_fsub_constant: @@ -698,7 +698,7 @@ ; NO_ISEL-NEXT: ori 3, 4, 0 ; NO_ISEL-NEXT: b .LBB35_2 ; NO_ISEL-NEXT: .LBB35_2: -; NO_ISEL-NEXT: lxsdx 1, 0, 3 +; NO_ISEL-NEXT: lfdx 1, 0, 3 ; NO_ISEL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = fsub double %sel, 5.1 @@ -714,7 +714,7 @@ ; ISEL-NEXT: addi 4, 4, .LCPI36_0@toc@l ; ISEL-NEXT: addi 3, 3, .LCPI36_1@toc@l ; ISEL-NEXT: isel 3, 3, 4, 1 -; ISEL-NEXT: lxsdx 1, 0, 3 +; ISEL-NEXT: lfdx 1, 0, 3 ; ISEL-NEXT: blr ; ; NO_ISEL-LABEL: sel_constants_fmul_constant: @@ -729,7 +729,7 @@ ; NO_ISEL-NEXT: ori 3, 4, 0 ; NO_ISEL-NEXT: b .LBB36_2 ; NO_ISEL-NEXT: .LBB36_2: -; NO_ISEL-NEXT: lxsdx 1, 0, 3 +; NO_ISEL-NEXT: lfdx 1, 0, 3 ; NO_ISEL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = fmul double %sel, 5.1 @@ -745,7 +745,7 @@ ; ISEL-NEXT: addi 4, 4, .LCPI37_0@toc@l ; ISEL-NEXT: addi 3, 3, .LCPI37_1@toc@l ; ISEL-NEXT: isel 3, 3, 4, 1 -; ISEL-NEXT: lxsdx 1, 0, 3 +; ISEL-NEXT: lfdx 1, 0, 3 ; ISEL-NEXT: blr ; ; NO_ISEL-LABEL: sel_constants_fdiv_constant: @@ -760,7 +760,7 @@ ; NO_ISEL-NEXT: ori 3, 4, 0 ; NO_ISEL-NEXT: b .LBB37_2 ; NO_ISEL-NEXT: .LBB37_2: -; NO_ISEL-NEXT: lxsdx 1, 0, 3 +; NO_ISEL-NEXT: lfdx 1, 0, 3 ; NO_ISEL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = fdiv double %sel, 5.1 @@ -775,12 +775,12 @@ ; ALL-NEXT: # BB#1: ; ALL-NEXT: addis 3, 2, .LCPI38_0@toc@ha ; ALL-NEXT: addi 3, 3, .LCPI38_0@toc@l -; ALL-NEXT: lxsdx 1, 0, 3 +; ALL-NEXT: lfdx 1, 0, 3 ; ALL-NEXT: blr ; ALL-NEXT: .LBB38_2: ; ALL-NEXT: addis 3, 2, .LCPI38_1@toc@ha ; ALL-NEXT: addi 3, 3, .LCPI38_1@toc@l -; ALL-NEXT: lxsspx 1, 0, 3 +; ALL-NEXT: lfsx 1, 0, 3 ; ALL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double %sel, 5.1 Index: test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- test/CodeGen/PowerPC/swaps-le-6.ll +++ test/CodeGen/PowerPC/swaps-le-6.ll @@ -27,7 +27,7 @@ ; CHECK-LABEL: @bar0 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-DAG: lfdx [[REG2:[0-9]+]] ; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 ; CHECK: stxvd2x [[REG5]] @@ -50,7 +50,7 @@ ; CHECK-LABEL: @bar1 ; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-DAG: lfdx [[REG2:[0-9]+]] ; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] ; CHECK: stxvd2x [[REG5]] Index: test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -16,9 +16,9 @@ ; CHECK-LABEL: testi0 ; CHECK: lxvd2x 0, 0, 3 -; CHECK: lxsdx 1, 0, 4 -; CHECK: xxswapd 0, 0 -; CHECK: xxspltd 1, 1, 0 +; CHECK: lfdx 1, 0, 4 +; CHECK-DAG: xxspltd 1, 1, 0 +; CHECK-DAG: xxswapd 0, 0 ; CHECK: xxpermdi 34, 0, 1, 1 ; CHECK-P9-LABEL: testi0 @@ -36,9 +36,9 @@ ; CHECK-LABEL: testi1 ; CHECK: lxvd2x 0, 0, 3 -; CHECK: lxsdx 1, 0, 4 -; CHECK: xxswapd 0, 0 -; CHECK: xxspltd 1, 1, 0 +; CHECK: lfdx 1, 0, 4 +; CHECK-DAG: xxspltd 1, 1, 0 +; CHECK-DAG: xxswapd 0, 0 ; CHECK: xxmrgld 34, 1, 0 ; CHECK-P9-LABEL: testi1 Index: test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -20,7 +20,7 @@ ret void ; CHECK-LABEL: @dblToInt ; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]], -; CHECK: stxsiwx [[REGCONV1]], +; CHECK: stfiwx [[REGCONV1]], } ; Function Attrs: nounwind @@ -33,7 +33,7 @@ ret void ; CHECK-LABEL: @fltToInt ; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]], -; CHECK: stxsiwx [[REGCONV2]], +; CHECK: stfiwx [[REGCONV2]], } ; Function Attrs: nounwind @@ -45,7 +45,7 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @intToDbl -; CHECK: lxsiwax [[REGLD1:[0-9]+]], +; CHECK: lfiwax [[REGLD1:[0-9]+]], ; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]] } @@ -58,7 +58,7 @@ store volatile float %conv, float* %ff, align 4 ret void ; CHECK-LABEL: @intToFlt -; CHECK: lxsiwax [[REGLD2:[0-9]+]], +; CHECK: lfiwax [[REGLD2:[0-9]+]], ; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]] } @@ -72,7 +72,7 @@ ret void ; CHECK-LABEL: @dblToUInt ; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]], -; CHECK: stxsiwx [[REGCONV3]], +; CHECK: stfiwx [[REGCONV3]], } ; Function Attrs: nounwind @@ -85,7 +85,7 @@ ret void ; CHECK-LABEL: @fltToUInt ; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]], -; CHECK: stxsiwx [[REGCONV4]], +; CHECK: stfiwx [[REGCONV4]], } ; Function Attrs: nounwind @@ -97,7 +97,7 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @uIntToDbl -; CHECK: lxsiwzx [[REGLD3:[0-9]+]], +; CHECK: lfiwzx [[REGLD3:[0-9]+]], ; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]] } @@ -110,7 +110,7 @@ store volatile float %conv, float* %ff, align 4 ret void ; CHECK-LABEL: @uIntToFlt -; CHECK: lxsiwzx [[REGLD4:[0-9]+]], +; CHECK: lfiwzx [[REGLD4:[0-9]+]], ; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]] } @@ -123,8 +123,8 @@ store volatile float %conv, float* %ff, align 4 ret void ; CHECK-LABEL: @dblToFloat -; CHECK: lxsdx [[REGLD5:[0-9]+]], -; CHECK: stxsspx [[REGLD5]], +; CHECK: lfdx [[REGLD5:[0-9]+]], +; CHECK: stfsx [[REGLD5]], ; CHECK-P9-LABEL: @dblToFloat ; CHECK-P9: lfd [[REGLD5:[0-9]+]], ; CHECK-P9: stfs [[REGLD5]], @@ -139,8 +139,8 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @floatToDbl -; CHECK: lxsspx [[REGLD5:[0-9]+]], -; CHECK: stxsdx [[REGLD5]], +; CHECK: lfsx [[REGLD5:[0-9]+]], +; CHECK: stfdx [[REGLD5]], ; CHECK-P9-LABEL: @floatToDbl ; CHECK-P9: lfs [[REGLD5:[0-9]+]], ; CHECK-P9: stfd [[REGLD5]],