Index: lib/Target/PowerPC/P9InstrResources.td =================================================================== --- lib/Target/PowerPC/P9InstrResources.td +++ lib/Target/PowerPC/P9InstrResources.td @@ -711,7 +711,8 @@ LXV, LXVX, LXSD, - DFLOADf64 + DFLOADf64, + XFLOADf64 )>; // 4 Cycle load uses a single slice. @@ -751,7 +752,10 @@ LXSSPX, LXSIWAX, LXSSP, - DFLOADf32 + DFLOADf32, + XFLOADf32, + LIWAX, + LIWZX )>; // Cracked Load that requires the PM resource. @@ -781,7 +785,10 @@ STXSSPX, STXSIWX, DFSTOREf32, - DFSTOREf64 + DFSTOREf64, + XFSTOREf32, + XFSTOREf64, + STIWX )>; // Store operation that requires the whole superslice. Index: lib/Target/PowerPC/PPCInstrInfo.h =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.h +++ lib/Target/PowerPC/PPCInstrInfo.h @@ -282,6 +282,9 @@ ArrayRef> getSerializableBitmaskMachineOperandTargetFlags() const override; + // Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. + bool expandVSXMemPseudo(MachineInstr &MI) const; + // Lower pseudo instructions after register allocation. bool expandPostRAPseudo(MachineInstr &MI) const override; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1968,29 +1968,13 @@ return makeArrayRef(TargetFlags); } -bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - auto &MBB = *MI.getParent(); - auto DL = MI.getDebugLoc(); - switch (MI.getOpcode()) { - case TargetOpcode::LOAD_STACK_GUARD: { - assert(Subtarget.isTargetLinux() && - "Only Linux target is expected to contain LOAD_STACK_GUARD"); - const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; - const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; - MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); - MachineInstrBuilder(*MI.getParent()->getParent(), MI) - .addImm(Offset) - .addReg(Reg); - return true; - } - case PPC::DFLOADf32: - case PPC::DFLOADf64: - case PPC::DFSTOREf32: - case PPC::DFSTOREf64: { - assert(Subtarget.hasP9Vector() && - "Invalid D-Form Pseudo-ops on non-P9 target."); - assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && - "D-form op must have register and immediate operands"); +// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction. +// The VSX versions have the advantage of a full 64-register target whereas +// the FP ones have the advantage of lower latency and higher throughput. So +// what we are after is using the faster instructions in low register pressure +// situations and using the larger register file in high register pressure +// situations. +bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const { unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: @@ -2009,7 +1993,38 @@ UpperOpcode = PPC::STXSD; LowerOpcode = PPC::STFD; break; + case PPC::XFLOADf32: + UpperOpcode = PPC::LXSSPX; + LowerOpcode = PPC::LFSX; + break; + case PPC::XFLOADf64: + UpperOpcode = PPC::LXSDX; + LowerOpcode = PPC::LFDX; + break; + case PPC::XFSTOREf32: + UpperOpcode = PPC::STXSSPX; + LowerOpcode = PPC::STFSX; + break; + case PPC::XFSTOREf64: + UpperOpcode = PPC::STXSDX; + LowerOpcode = PPC::STFDX; + break; + case PPC::LIWAX: + UpperOpcode = PPC::LXSIWAX; + LowerOpcode = PPC::LFIWAX; + break; + case PPC::LIWZX: + UpperOpcode = PPC::LXSIWZX; + LowerOpcode = PPC::LFIWZX; + break; + case PPC::STIWX: + UpperOpcode = PPC::STXSIWX; + LowerOpcode = PPC::STFIWX; + break; + default: + llvm_unreachable("Unknown Operation!"); } + unsigned TargetReg = MI.getOperand(0).getReg(); unsigned Opcode; if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || @@ -2019,6 +2034,52 @@ Opcode = UpperOpcode; MI.setDesc(get(Opcode)); return true; +} + +bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { + auto &MBB = *MI.getParent(); + auto DL = MI.getDebugLoc(); + + switch (MI.getOpcode()) { + case TargetOpcode::LOAD_STACK_GUARD: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_STACK_GUARD"); + const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ)); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: { + assert(Subtarget.hasP9Vector() && + "Invalid D-Form Pseudo-ops on Pre-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); + return expandVSXMemPseudo(MI); + } + case PPC::XFLOADf32: + case PPC::XFSTOREf32: + case PPC::LIWAX: + case PPC::LIWZX: + case PPC::STIWX: { + assert(Subtarget.hasP8Vector() && + "Invalid X-Form Pseudo-ops on Pre-P8 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && + "X-form op must have register and register operands"); + return expandVSXMemPseudo(MI); + } + case PPC::XFLOADf64: + case PPC::XFSTOREf64: { + assert(Subtarget.hasVSX() && + "Invalid X-Form Pseudo-ops on target that has no VSX."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() && + "X-form op must have register and register operands"); + return expandVSXMemPseudo(MI); } case PPC::SPILLTOVSR_LD: { unsigned TargetReg = MI.getOperand(0).getReg(); Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -131,6 +131,14 @@ "lxsdx $XT, $src", IIC_LdStLFD, [(set f64:$XT, (load xoaddr:$src))]>; + let isPseudo = 1 in { + // Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later + let CodeSize = 3 in + def XFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#XFLOADf64", + [(set f64:$XT, (load xoaddr:$src))]>; + } + let Predicates = [HasVSX, HasOnlySwappingMemOps] in def LXVD2X : XX1Form<31, 844, (outs vsrc:$XT), (ins memrr:$src), @@ -156,6 +164,14 @@ "stxsdx $XT, $dst", IIC_LdStSTFD, [(store f64:$XT, xoaddr:$dst)]>; + let isPseudo = 1 in { + // Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later + let CodeSize = 3 in + def XFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst), + "#XFSTOREf64", + [(store f64:$XT, xoaddr:$dst)]>; + } + let Predicates = [HasVSX, HasOnlySwappingMemOps] in { // The behaviour of this instruction is endianness-specific so we provide no // pattern to match it without considering endianness. @@ -1215,32 +1231,53 @@ let mayLoad = 1, mayStore = 0 in { let CodeSize = 3 in def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), - "lxsspx $XT, $src", IIC_LdStLFD, - [(set f32:$XT, (load xoaddr:$src))]>; + "lxsspx $XT, $src", IIC_LdStLFD, []>; def LXSIWAX : XX1Form<31, 76, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwax $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + "lxsiwax $XT, $src", IIC_LdStLFD, []>; def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src), - "lxsiwzx $XT, $src", IIC_LdStLFD, - [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + "lxsiwzx $XT, $src", IIC_LdStLFD, []>; + let isPseudo = 1 in { + // Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later + let CodeSize = 3 in + def XFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrr:$src), + "#XFLOADf32", + [(set f32:$XT, (load xoaddr:$src))]>; + // Pseudo instruction LIWAX will be expanded to LXSIWAX or LFIWAX later + def LIWAX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWAX", + [(set f64:$XT, (PPClfiwax xoaddr:$src))]>; + // Pseudo instruction LIWZX will be expanded to LXSIWZX or LFIWZX later + def LIWZX : Pseudo<(outs vsfrc:$XT), (ins memrr:$src), + "#LIWZX", + [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>; + } } // mayLoad // VSX scalar stores introduced in ISA 2.07 let mayStore = 1, mayLoad = 0 in { let CodeSize = 3 in def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), - "stxsspx $XT, $dst", IIC_LdStSTFD, - [(store f32:$XT, xoaddr:$dst)]>; + "stxsspx $XT, $dst", IIC_LdStSTFD, []>; def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst), - "stxsiwx $XT, $dst", IIC_LdStSTFD, - [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + "stxsiwx $XT, $dst", IIC_LdStSTFD, []>; + let isPseudo = 1 in { + // Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later + let CodeSize = 3 in + def XFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrr:$dst), + "#XFSTOREf32", + [(store f32:$XT, xoaddr:$dst)]>; + // Pseudo instruction STIWX will be expanded to STXSIWX or STFIWX later + def STIWX : Pseudo<(outs), (ins vsfrc:$XT, memrr:$dst), + "#STIWX", + [(PPCstfiwx f64:$XT, xoaddr:$dst)]>; + } } // mayStore } // UseVSXReg = 1 def : Pat<(f64 (extloadf32 xoaddr:$src)), - (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>; + (COPY_TO_REGCLASS (XFLOADf32 xoaddr:$src), VSFRC)>; def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))), - (f32 (LXSSPX xoaddr:$src))>; + (f32 (XFLOADf32 xoaddr:$src))>; def : Pat<(f64 (fpextend f32:$src)), (COPY_TO_REGCLASS $src, VSFRC)>; @@ -1414,7 +1451,7 @@ (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>; } def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)), - (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>; + (v4i32 (XXSPLTWs (LIWAX xoaddr:$src), 1))>; } // AddedComplexity = 400 } // HasP8Vector @@ -3040,10 +3077,10 @@ (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>; def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + (XSCVDPSXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>; + (XSCVDPUXWSs (XFLOADf32 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)), (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>; @@ -3061,19 +3098,19 @@ } let Predicates = [HasVSX, NoP9Vector] in { - // Load-and-splat with fp-to-int conversion (using X-Form VSX loads). + // Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads). def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + (XSCVDPSXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS - (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>; + (XSCVDPUXWS (XFLOADf64 xoaddr:$A)), VSRC), 1))>; def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS - (LXSSPX xoaddr:$A), VSFRC)), 0))>; + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS - (LXSSPX xoaddr:$A), VSFRC)), 0))>; + (XFLOADf32 xoaddr:$A), VSFRC)), 0))>; } // Big endian, available on all targets with VSX Index: lib/Target/PowerPC/PPCVSXSwapRemoval.cpp =================================================================== --- lib/Target/PowerPC/PPCVSXSwapRemoval.cpp +++ lib/Target/PowerPC/PPCVSXSwapRemoval.cpp @@ -353,6 +353,8 @@ break; case PPC::LXSDX: case PPC::LXSSPX: + case PPC::XFLOADf64: + case PPC::XFLOADf32: // A load of a floating-point value into the high-order half of // a vector register is safe, provided that we introduce a swap // following the load, which will be done by the SUBREG_TO_REG Index: test/CodeGen/PowerPC/VSX-XForm-Scalars.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/VSX-XForm-Scalars.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=CHECK-P8 +; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s + +@a = external local_unnamed_addr global <4 x i32>, align 16 +@pb = external local_unnamed_addr global float*, align 8 + +define void @testExpandPostRAPseudo(i32* nocapture readonly %ptr) { +; CHECK-LABEL: testExpandPostRAPseudo: + +; CHECK-P8: lxsiwax 34, 0, 3 +; CHECK-P8-NEXT: xxspltw 34, 34, 1 +; CHECK-P8-NEXT: stvx 2, 0, 4 + +; CHECK: #APP +; CHECK-NEXT: #Clobber Rigisters +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lis 4, 1024 +; CHECK-NEXT: lfiwax 0, 0, 3 +; CHECK: xscvsxdsp 0, 0 +; CHECK-NEXT: ld 3, 0(3) +; CHECK-NEXT: stfsx 0, 3, 4 +; CHECK-NEXT: blr +entry: + %0 = load i32, i32* %ptr, align 4 + %splat.splatinsert = insertelement <4 x i32> undef, i32 %0, i32 0 + %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer + store <4 x i32> %splat.splat, <4 x i32>* @a, align 16 + tail call void asm sideeffect "#Clobber Rigisters", "~{f0},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15},~{f16},~{f17},~{f18},~{f19},~{f20},~{f21},~{f22},~{f23},~{f24},~{f25},~{f26},~{f27},~{f28},~{f29},~{f30},~{f31}"() + %1 = load i32, i32* %ptr, align 4 + %conv = sitofp i32 %1 to float + %2 = load float*, float** @pb, align 8 + %add.ptr = getelementptr inbounds float, float* %2, i64 16777216 + store float %conv, float* %add.ptr, align 4 + ret void +} Index: test/CodeGen/PowerPC/build-vector-tests.ll =================================================================== --- test/CodeGen/PowerPC/build-vector-tests.ll +++ test/CodeGen/PowerPC/build-vector-tests.ll @@ -1485,10 +1485,10 @@ ; P9BE: xvcvspsxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspsxws v2, [[REG1]] -; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpsxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpsxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } @@ -1880,11 +1880,11 @@ ; P9LE: xscvdpsxws ; P9LE: xxspltw ; P9LE: blr -; P8BE: lxsdx +; P8BE: lfdx ; P8BE: xscvdpsxws ; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsdx +; P8LE: lfdx ; P8LE: xscvdpsxws ; P8LE: xxspltw ; P8LE: blr @@ -2645,10 +2645,10 @@ ; P9BE: xvcvspuxws v2, [[REG1]] ; P9LE: [[REG1:[vs0-9]+]], 0, r3 ; P9LE: xvcvspuxws v2, [[REG1]] -; P8BE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8BE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8BE: xscvdpuxws f[[REG2:[0-9]+]], [[REG1]] ; P8BE: xxspltw v2, vs[[REG2]], 1 -; P8LE: lxsspx [[REG1:f[0-9]+]], 0, r3 +; P8LE: lfsx [[REG1:f[0-9]+]], 0, r3 ; P8LE: xscvdpuxws f[[REG2:[vs0-9]+]], [[REG1]] ; P8LE: xxspltw v2, vs[[REG2]], 1 } @@ -3040,11 +3040,11 @@ ; P9LE: xscvdpuxws ; P9LE: xxspltw ; P9LE: blr -; P8BE: lxsdx +; P8BE: lfdx ; P8BE: xscvdpuxws ; P8BE: xxspltw ; P8BE: blr -; P8LE: lxsdx +; P8LE: lfdx ; P8LE: xscvdpuxws ; P8LE: xxspltw ; P8LE: blr @@ -3508,13 +3508,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3546,13 +3546,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpsxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3591,13 +3591,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3636,13 +3636,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpsxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpsxds v2 ; P8LE-NEXT: blr @@ -3693,11 +3693,11 @@ ; P9LE-NEXT: xscvdpsxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr -; P8BE: lxsspx +; P8BE: lfsx ; P8BE-NEXT: xscvdpsxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr -; P8LE: lxsspx +; P8LE: lfsx ; P8LE-NEXT: xscvdpsxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr @@ -4412,13 +4412,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4450,13 +4450,13 @@ ; P9LE: xxmrghd ; P9LE-NEXT: xvcvdpuxds v2 ; P9LE-NEXT: blr -; P8BE: lxsspx -; P8BE: lxsspx +; P8BE: lfsx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr -; P8LE: lxsspx -; P8LE: lxsspx +; P8LE: lfsx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4495,13 +4495,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4540,13 +4540,13 @@ ; P9LE-NEXT: blr ; P8BE: sldi ; P8BE: lfsux -; P8BE: lxsspx +; P8BE: lfsx ; P8BE: xxmrghd ; P8BE-NEXT: xvcvdpuxds v2 ; P8BE-NEXT: blr ; P8LE: sldi ; P8LE: lfsux -; P8LE: lxsspx +; P8LE: lfsx ; P8LE: xxmrghd ; P8LE-NEXT: xvcvdpuxds v2 ; P8LE-NEXT: blr @@ -4597,11 +4597,11 @@ ; P9LE-NEXT: xscvdpuxds ; P9LE-NEXT: xxspltd v2 ; P9LE-NEXT: blr -; P8BE: lxsspx +; P8BE: lfsx ; P8BE-NEXT: xscvdpuxds ; P8BE-NEXT: xxspltd v2 ; P8BE-NEXT: blr -; P8LE: lxsspx +; P8LE: lfsx ; P8LE-NEXT: xscvdpuxds ; P8LE-NEXT: xxspltd v2 ; P8LE-NEXT: blr Index: test/CodeGen/PowerPC/direct-move-profit.ll =================================================================== --- test/CodeGen/PowerPC/direct-move-profit.ll +++ test/CodeGen/PowerPC/direct-move-profit.ll @@ -17,7 +17,7 @@ ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa -; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK: lfiwax [[REG:[0-9]+]], {{.*}} ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa ; CHECK: xscvsxdsp {{.*}}, [[REG]] @@ -40,7 +40,7 @@ ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa -; CHECK: lxsiwax [[REG:[0-9]+]], {{.*}} +; CHECK: lfiwax [[REG:[0-9]+]], {{.*}} ; CHECK-NOT: mtvsrwa ; CHECK-NOT: mtfprwa ; CHECK: xscvsxdsp {{.*}}, [[REG]] Index: test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll =================================================================== --- test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll +++ test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll @@ -1034,10 +1034,10 @@ define <4 x float> @insertVarF(<4 x float> %a, float %f, i32 %el) { entry: ; CHECK-LABEL: insertVarF -; CHECK: stxsspx 1, +; CHECK: stfsx 1, ; CHECK: lxv ; CHECK-BE-LABEL: insertVarF -; CHECK-BE: stxsspx 1, +; CHECK-BE: stfsx 1, ; CHECK-BE: lxv %vecins = insertelement <4 x float> %a, float %f, i32 %el ret <4 x float> %vecins Index: test/CodeGen/PowerPC/ppc64le-smallarg.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-smallarg.ll +++ test/CodeGen/PowerPC/ppc64le-smallarg.ll @@ -43,7 +43,7 @@ } ; CHECK: @callee2 ; CHECK: addi [[TOCREG:[0-9]+]], 1, 136 -; CHECK: lxsspx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] +; CHECK: lfsx {{[0-9]+}}, {{[0-9]+}}, [[TOCREG]] ; CHECK: blr define void @caller2() { @@ -54,7 +54,7 @@ } ; CHECK: @caller2 ; CHECK: addi [[TOCOFF:[0-9]+]], {{[0-9]+}}, 136 -; CHECK: stxsspx {{[0-9]+}}, 0, [[TOCOFF]] +; CHECK: stfsx {{[0-9]+}}, 0, [[TOCOFF]] ; CHECK: bl test2 declare float @test2(float, float, float, float, float, float, float, float, float, float, float, float, float, float) Index: test/CodeGen/PowerPC/pr25157-peephole.ll =================================================================== --- test/CodeGen/PowerPC/pr25157-peephole.ll +++ test/CodeGen/PowerPC/pr25157-peephole.ll @@ -57,7 +57,7 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lxsspx +; CHECK: lfsx ; CHECK: xxspltd ; CHECK: stxvd2x ; CHECK-NOT: xxswapd Index: test/CodeGen/PowerPC/pr25157.ll =================================================================== --- test/CodeGen/PowerPC/pr25157.ll +++ test/CodeGen/PowerPC/pr25157.ll @@ -57,6 +57,6 @@ } ; CHECK-LABEL: @aercalc_ -; CHECK: lxsspx +; CHECK: lfsx ; CHECK-P9-LABEL: @aercalc_ ; CHECK-P9: lfs Index: test/CodeGen/PowerPC/select-addrRegRegOnly.ll =================================================================== --- test/CodeGen/PowerPC/select-addrRegRegOnly.ll +++ test/CodeGen/PowerPC/select-addrRegRegOnly.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: testSingleAccess: ; CHECK: # BB#0: # %entry ; CHECK-NEXT: addi 3, 3, 8 -; CHECK-NEXT: lxsiwax 0, 0, 3 +; CHECK-NEXT: lfiwax 0, 0, 3 ; CHECK-NEXT: xscvsxdsp 1, 0 ; CHECK-NEXT: blr entry: Index: test/CodeGen/PowerPC/select_const.ll =================================================================== --- test/CodeGen/PowerPC/select_const.ll +++ test/CodeGen/PowerPC/select_const.ll @@ -780,7 +780,7 @@ ; ALL-NEXT: .LBB38_2: ; ALL-NEXT: addis 3, 2, .LCPI38_1@toc@ha ; ALL-NEXT: addi 3, 3, .LCPI38_1@toc@l -; ALL-NEXT: lxsspx 1, 0, 3 +; ALL-NEXT: lfsx 1, 0, 3 ; ALL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double %sel, 5.1 Index: test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -17,8 +17,8 @@ ; CHECK-LABEL: testi0 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: lxsdx 1, 0, 4 -; CHECK: xxswapd 0, 0 -; CHECK: xxspltd 1, 1, 0 +; CHECK-DAG: xxspltd 1, 1, 0 +; CHECK-DAG: xxswapd 0, 0 ; CHECK: xxpermdi 34, 0, 1, 1 ; CHECK-P9-LABEL: testi0 @@ -37,8 +37,8 @@ ; CHECK-LABEL: testi1 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: lxsdx 1, 0, 4 -; CHECK: xxswapd 0, 0 -; CHECK: xxspltd 1, 1, 0 +; CHECK-DAG: xxspltd 1, 1, 0 +; CHECK-DAG: xxswapd 0, 0 ; CHECK: xxmrgld 34, 1, 0 ; CHECK-P9-LABEL: testi1 Index: test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -20,7 +20,7 @@ ret void ; CHECK-LABEL: @dblToInt ; CHECK: xscvdpsxws [[REGCONV1:[0-9]+]], -; CHECK: stxsiwx [[REGCONV1]], +; CHECK: stfiwx [[REGCONV1]], } ; Function Attrs: nounwind @@ -33,7 +33,7 @@ ret void ; CHECK-LABEL: @fltToInt ; CHECK: xscvdpsxws [[REGCONV2:[0-9]+]], -; CHECK: stxsiwx [[REGCONV2]], +; CHECK: stfiwx [[REGCONV2]], } ; Function Attrs: nounwind @@ -45,7 +45,7 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @intToDbl -; CHECK: lxsiwax [[REGLD1:[0-9]+]], +; CHECK: lfiwax [[REGLD1:[0-9]+]], ; CHECK: xscvsxddp {{[0-9]+}}, [[REGLD1]] } @@ -58,7 +58,7 @@ store volatile float %conv, float* %ff, align 4 ret void ; CHECK-LABEL: @intToFlt -; CHECK: lxsiwax [[REGLD2:[0-9]+]], +; CHECK: lfiwax [[REGLD2:[0-9]+]], ; CHECK: xscvsxdsp {{[0-9]}}, [[REGLD2]] } @@ -72,7 +72,7 @@ ret void ; CHECK-LABEL: @dblToUInt ; CHECK: xscvdpuxws [[REGCONV3:[0-9]+]], -; CHECK: stxsiwx [[REGCONV3]], +; CHECK: stfiwx [[REGCONV3]], } ; Function Attrs: nounwind @@ -85,7 +85,7 @@ ret void ; CHECK-LABEL: @fltToUInt ; CHECK: xscvdpuxws [[REGCONV4:[0-9]+]], -; CHECK: stxsiwx [[REGCONV4]], +; CHECK: stfiwx [[REGCONV4]], } ; Function Attrs: nounwind @@ -97,7 +97,7 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @uIntToDbl -; CHECK: lxsiwzx [[REGLD3:[0-9]+]], +; CHECK: lfiwzx [[REGLD3:[0-9]+]], ; CHECK: xscvuxddp {{[0-9]+}}, [[REGLD3]] } @@ -110,7 +110,7 @@ store volatile float %conv, float* %ff, align 4 ret void ; CHECK-LABEL: @uIntToFlt -; CHECK: lxsiwzx [[REGLD4:[0-9]+]], +; CHECK: lfiwzx [[REGLD4:[0-9]+]], ; CHECK: xscvuxdsp {{[0-9]+}}, [[REGLD4]] } @@ -124,7 +124,7 @@ ret void ; CHECK-LABEL: @dblToFloat ; CHECK: lxsdx [[REGLD5:[0-9]+]], -; CHECK: stxsspx [[REGLD5]], +; CHECK: stfsx [[REGLD5]], ; CHECK-P9-LABEL: @dblToFloat ; CHECK-P9: lfd [[REGLD5:[0-9]+]], ; CHECK-P9: stfs [[REGLD5]], @@ -139,7 +139,7 @@ store volatile double %conv, double* %dd, align 8 ret void ; CHECK-LABEL: @floatToDbl -; CHECK: lxsspx [[REGLD5:[0-9]+]], +; CHECK: lfsx [[REGLD5:[0-9]+]], ; CHECK: stxsdx [[REGLD5]], ; CHECK-P9-LABEL: @floatToDbl ; CHECK-P9: lfs [[REGLD5:[0-9]+]],