Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -996,13 +996,15 @@ FrameIdx)); NonRI = true; } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX)) + unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf64 : PPC::STXSDX; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); NonRI = true; } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX)) + unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf32 : PPC::STXSSPX; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); @@ -1122,12 +1124,14 @@ FrameIdx)); NonRI = true; } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg), - FrameIdx)); + unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf64 : PPC::LXSDX; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc), + DestReg), FrameIdx)); NonRI = true; } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg), - FrameIdx)); + unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf32 : PPC::LXSSPX; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc), + DestReg), FrameIdx)); NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { assert(Subtarget.isDarwin() && @@ -1873,6 +1877,41 @@ .addReg(Reg); return true; } + case PPC::DFLOADf32: + case PPC::DFLOADf64: + case PPC::DFSTOREf32: + case PPC::DFSTOREf64: { + assert(Subtarget.hasP9Vector() && + "Invalid D-Form Pseudo-ops on non-P9 target."); + unsigned UpperOpcode, LowerOpcode; + switch (MI.getOpcode()) { + case PPC::DFLOADf32: + UpperOpcode = PPC::LXSSP; + LowerOpcode = PPC::LFS; + break; + case PPC::DFLOADf64: + UpperOpcode = PPC::LXSD; + LowerOpcode = PPC::LFD; + break; + case PPC::DFSTOREf32: + UpperOpcode = PPC::STXSSP; + LowerOpcode = PPC::STFS; + break; + case PPC::DFSTOREf64: + UpperOpcode = PPC::STXSD; + LowerOpcode = PPC::STFD; + break; + } + unsigned TargetReg = MI.getOperand(0).getReg(); + unsigned Opcode; + if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) || + (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31)) + Opcode = LowerOpcode; + else + Opcode = UpperOpcode; + MI.setDesc(get(Opcode)); + return true; + } } return false; } Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -112,6 +112,7 @@ // Load indexed instructions let mayLoad = 1 in { + let CodeSize = 3 in def LXSDX : XX1Form<31, 588, (outs vsfrc:$XT), (ins memrr:$src), "lxsdx $XT, $src", IIC_LdStLFD, @@ -134,6 +135,7 @@ // Store indexed instructions let mayStore = 1 in { + let CodeSize = 3 in def STXSDX : XX1Form<31, 716, (outs), (ins vsfrc:$XT, memrr:$dst), "stxsdx $XT, $dst", IIC_LdStSTFD, @@ -1128,6 +1130,7 @@ // VSX scalar loads introduced in ISA 2.07 let mayLoad = 1 in { + let CodeSize = 3 in def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src), "lxsspx $XT, $src", IIC_LdStLFD, [(set f32:$XT, (load xoaddr:$src))]>; @@ -1141,6 +1144,7 @@ // VSX scalar stores introduced in ISA 2.07 let mayStore = 1 in { + let CodeSize = 3 in def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst), "stxsspx $XT, $dst", IIC_LdStSTFD, [(store f32:$XT, xoaddr:$dst)]>; @@ -2220,6 +2224,8 @@ //===--------------------------------------------------------------------===// // Vector/Scalar Load/Store Instructions + // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in + // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. let mayLoad = 1 in { // Load Vector def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src), @@ -2259,6 +2265,8 @@ def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>; } // mayLoad + // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in + // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging. let mayStore = 1 in { // Store Vector def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst), @@ -2492,4 +2500,20 @@ (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>; def : Pat<(f64 (PPCVexts f64:$A, 2)), (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>; + let isPseudo = 1 in { + def DFLOADf32 : Pseudo<(outs vssrc:$XT), (ins memrix:$src), + "#DFLOADf32", + [(set f32:$XT, (load iaddr:$src))]>; + def DFLOADf64 : Pseudo<(outs vsfrc:$XT), (ins memrix:$src), + "#DFLOADf64", + [(set f64:$XT, (load iaddr:$src))]>; + def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst), + "#DFSTOREf32", + [(store f32:$XT, iaddr:$dst)]>; + def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst), + "#DFSTOREf64", + [(store f64:$XT, iaddr:$dst)]>; + } + def : Pat<(f64 (extloadf32 iaddr:$src)), + (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>; } // HasP9Vector, AddedComplexity Index: lib/Target/PowerPC/PPCRegisterInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCRegisterInfo.cpp +++ lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -78,6 +78,18 @@ ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8; ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX; ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; + + // VSX + ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX; + ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX; + ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX; + ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX; + ImmToIdxMap[PPC::LXV] = PPC::LXVX; + ImmToIdxMap[PPC::LXSD] = PPC::LXSDX; + ImmToIdxMap[PPC::LXSSP] = PPC::LXSSPX; + ImmToIdxMap[PPC::STXV] = PPC::STXVX; + ImmToIdxMap[PPC::STXSD] = PPC::STXSDX; + ImmToIdxMap[PPC::STXSSP] = PPC::STXSSPX; } /// getPointerRegClass - Return the register class to use to hold pointers. Index: test/CodeGen/PowerPC/VSX-DForm-Scalars.ll =================================================================== --- test/CodeGen/PowerPC/VSX-DForm-Scalars.ll +++ test/CodeGen/PowerPC/VSX-DForm-Scalars.ll @@ -0,0 +1,73 @@ +; RUN: llc < %s -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -verify-machineinstrs | FileCheck %s + +@gd = external local_unnamed_addr global [500 x double], align 8 +@gf = external local_unnamed_addr global [500 x float], align 4 + +; Function Attrs: nounwind +define double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) local_unnamed_addr #0 { +entry: + %0 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 10), align 8 + %1 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 17), align 8 + %2 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 87), align 8 + %3 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 97), align 8 + %4 = load double, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 77), align 8 + store double %3, double* getelementptr inbounds ([500 x double], [500 x double]* @gd, i64 0, i64 122), align 8 + %add = fadd double %a, %b + %add1 = fadd double %add, %c + %add2 = fadd double %add1, %d + %add3 = fadd double %add2, %e + %add4 = fadd double %add3, %f + %add5 = fadd double %add4, %g + %add6 = fadd double %add5, %h + %add7 = fadd double %add6, %i + %add8 = fadd double %add7, %j + %add9 = fadd double %add8, %k + %add10 = fadd double %add9, %l + %add11 = fadd double %add10, %m + %add12 = fadd double %add11, %0 + %add13 = fadd double %add12, %1 + %add14 = fadd double %add13, %2 + %add15 = fadd double %add14, %3 + %add16 = fadd double %add15, %4 + %call = tail call double @_Z7getLXSDddddddddddddd(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, double %j, double %k, double %l, double %m) + %add17 = fadd double %add16, %call + ret double %add17 +; CHECK-LABEL: _Z7getLXSDddddddddddddd +; CHECK: lxsd [[LD:[0-9]+]], 776(3) +; CHECK: stxsd [[LD]], 976(3) +} + +; Function Attrs: nounwind +define float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m) local_unnamed_addr #0 { +entry: + %0 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 10), align 4 + %1 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 17), align 4 + %2 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 87), align 4 + %3 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 97), align 4 + %4 = load float, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 77), align 4 + store float %3, float* getelementptr inbounds ([500 x float], [500 x float]* @gf, i64 0, i64 122), align 4 + %add = fadd float %a, %b + %add1 = fadd float %add, %c + %add2 = fadd float %add1, %d + %add3 = fadd float %add2, %e + %add4 = fadd float %add3, %f + %add5 = fadd float %add4, %g + %add6 = fadd float %add5, %h + %add7 = fadd float %add6, %i + %add8 = fadd float %add7, %j + %add9 = fadd float %add8, %k + %add10 = fadd float %add9, %l + %add11 = fadd float %add10, %m + %add12 = fadd float %add11, %0 + %add13 = fadd float %add12, %1 + %add14 = fadd float %add13, %2 + %add15 = fadd float %add14, %3 + %add16 = fadd float %add15, %4 + %call = tail call float @_Z8getLXSSPfffffffffffff(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i, float %j, float %k, float %l, float %m) + %add17 = fadd float %add16, %call + ret float %add17 +; CHECK-LABEL: _Z8getLXSSPfffffffffffff +; CHECK: lxssp [[LD:[0-9]+]], 388(3) +; CHECK: stxssp [[LD]], 488(3) +} Index: test/CodeGen/PowerPC/bitcasts-direct-move.ll =================================================================== --- test/CodeGen/PowerPC/bitcasts-direct-move.ll +++ test/CodeGen/PowerPC/bitcasts-direct-move.ll @@ -1,6 +1,7 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \ ; RUN: --check-prefix=CHECK-P7 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s define signext i32 @f32toi32(float %a) { entry: Index: test/CodeGen/PowerPC/float-to-int.ll =================================================================== --- test/CodeGen/PowerPC/float-to-int.ll +++ test/CodeGen/PowerPC/float-to-int.ll @@ -1,6 +1,11 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=g5 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=a2 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=g5 +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-direct-move | FileCheck -check-prefix=CHECK-P9 %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -19,6 +24,12 @@ ; CHECK-VSX: stxsdx [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr + +; CHECK-LABEL-P9: @foo +; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1 +; CHECK-P9: stfd [[REG]], +; CHECK-P9: ld 3, +; CHECK-P9: blr } define i64 @foo2(double %a) nounwind { @@ -36,6 +47,12 @@ ; CHECK-VSX: stxsdx [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr + +; CHECK-LABEL-P9: @foo2 +; CHECK-P9: xscvdpsxds [[REG:[0-9]+]], 1 +; CHECK-P9: stfd [[REG]], +; CHECK-P9: ld 3, +; CHECK-P9: blr } define i64 @foo3(float %a) nounwind { @@ -53,6 +70,12 @@ ; CHECK-VSX: stxsdx [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr + +; CHECK-LABEL-P9: @foo3 +; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1 +; CHECK-P9: stfd [[REG]], +; CHECK-P9: ld 3, +; CHECK-P9: blr } define i64 @foo4(double %a) nounwind { @@ -70,6 +93,12 @@ ; CHECK-VSX: stxsdx [[REG]], ; CHECK-VSX: ld 3, ; CHECK-VSX: blr + +; CHECK-LABEL-P9: @foo4 +; CHECK-P9: xscvdpuxds [[REG:[0-9]+]], 1 +; CHECK-P9: stfd [[REG]], +; CHECK-P9: ld 3, +; CHECK-P9: blr } define i32 @goo(float %a) nounwind { Index: test/CodeGen/PowerPC/i64-to-float.ll =================================================================== --- test/CodeGen/PowerPC/i64-to-float.ll +++ test/CodeGen/PowerPC/i64-to-float.ll @@ -1,5 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=a2 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -19,6 +23,12 @@ ; CHECK-VSX: lxsdx [[REG:[0-9]+]], ; CHECK-VSX: fcfids 1, [[REG]] ; CHECK-VSX: blr + +; CHECK-P9: @foo +; CHECK-P9: std 3, +; CHECK-P9: lfd [[REG:[0-9]+]], +; CHECK-P9: xscvsxdsp 1, [[REG]] +; CHECK-P9: blr } define double @goo(i64 %a) nounwind { @@ -37,6 +47,12 @@ ; CHECK-VSX: lxsdx [[REG:[0-9]+]], ; CHECK-VSX: xscvsxddp 1, [[REG]] ; CHECK-VSX: blr + +; CHECK-P9: @goo +; CHECK-P9: std 3, +; CHECK-P9: lfd [[REG:[0-9]+]], +; CHECK-P9: xscvsxddp 1, [[REG]] +; CHECK-P9: blr } define float @foou(i64 %a) nounwind { @@ -55,6 +71,12 @@ ; CHECK-VSX: lxsdx [[REG:[0-9]+]], ; CHECK-VSX: fcfidus 1, [[REG]] ; CHECK-VSX: blr + +; CHECK-P9: @foou +; CHECK-P9: std 3, +; CHECK-P9: lfd [[REG:[0-9]+]], +; CHECK-P9: xscvuxdsp 1, [[REG]] +; CHECK-P9: blr } define double @goou(i64 %a) nounwind { @@ -73,5 +95,11 @@ ; CHECK-VSX: lxsdx [[REG:[0-9]+]], ; CHECK-VSX: xscvuxddp 1, [[REG]] ; CHECK-VSX: blr + +; CHECK-P9: @goou +; CHECK-P9: std 3, +; CHECK-P9: lfd [[REG:[0-9]+]], +; CHECK-P9: xscvuxddp 1, [[REG]] +; CHECK-P9: blr } Index: test/CodeGen/PowerPC/mcm-12.ll =================================================================== --- test/CodeGen/PowerPC/mcm-12.ll +++ test/CodeGen/PowerPC/mcm-12.ll @@ -1,5 +1,9 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=-vsx < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \ +; RUN: -mattr=-vsx < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O1 -code-model=medium \ +; RUN: -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O1 -code-model=medium < %s | \ +; RUN: FileCheck -check-prefix=CHECK-P9 %s ; Test peephole optimization for medium code model (32-bit TOC offsets) ; for loading a value from the constant pool (TOC-relative). @@ -24,3 +28,10 @@ ; CHECK-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha ; CHECK-VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l ; CHECK-VSX: lxsdx {{[0-9]+}}, 0, [[REG1]] + +; CHECK-P9: [[VAR:[a-z0-9A-Z_.]+]]: +; CHECK-P9: .quad 4562098671269285104 +; CHECK-P9-LABEL: test_double_const: +; CHECK-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; CHECK-P9: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l +; CHECK-P9: lfd {{[0-9]+}}, 0([[REG1]]) Index: test/CodeGen/PowerPC/mcm-4.ll =================================================================== --- test/CodeGen/PowerPC/mcm-4.ll +++ test/CodeGen/PowerPC/mcm-4.ll @@ -1,7 +1,15 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \ +; RUN: -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=medium \ +; RUN: -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \ +; RUN: -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -code-model=large \ +; RUN: -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=medium \ +; RUN: -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-P9 %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -code-model=large \ +; RUN: -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-P9 %s ; Test correct code generation for medium and large code model ; for loading a value from the constant pool (TOC-relative). @@ -41,3 +49,17 @@ ; LARGE-VSX: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha ; LARGE-VSX: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]]) ; LARGE-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]] + +; MEDIUM-P9: [[VAR:[a-z0-9A-Z_.]+]]: +; MEDIUM-P9: .quad 4562098671269285104 +; MEDIUM-P9-LABEL: test_double_const: +; MEDIUM-P9: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; MEDIUM-P9: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l +; MEDIUM-P9: lfd {{[0-9]+}}, 0([[REG2]]) + +; LARGE-P9: [[VAR:[a-z0-9A-Z_.]+]]: +; LARGE-P9: .quad 4562098671269285104 +; LARGE-P9-LABEL: test_double_const: +; LARGE-P9: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha +; LARGE-P9: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]]) +; LARGE-P9: lfd {{[0-9]+}}, 0([[REG2]]) Index: test/CodeGen/PowerPC/ppc64-align-long-double.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -1,5 +1,6 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck %s ; Verify internal alignment of long double in a struct. The double ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain Index: test/CodeGen/PowerPC/ppc64le-aggregates.ll =================================================================== --- test/CodeGen/PowerPC/ppc64le-aggregates.ll +++ test/CodeGen/PowerPC/ppc64le-aggregates.ll @@ -1,5 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 -mattr=+altivec -mattr=-vsx | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec -mattr=-vsx | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr8 \ +; RUN: -mattr=+altivec -mattr=-vsx | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mattr=+altivec \ +; RUN: -mattr=-vsx | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -march=ppc64le -mcpu=pwr9 \ +; RUN: -mattr=-direct-move -mattr=+altivec | FileCheck %s ; Currently VSX support is disabled for this test because we generate lxsdx ; instead of lfd, and stxsdx instead of stfd. That is a poor choice when we Index: test/CodeGen/PowerPC/pr25157-peephole.ll =================================================================== --- test/CodeGen/PowerPC/pr25157-peephole.ll +++ test/CodeGen/PowerPC/pr25157-peephole.ll @@ -1,4 +1,6 @@ ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: %s --check-prefix=CHECK-P9 ; Verify peephole simplification of splats and swaps. Bugpoint-reduced ; test from Eric Schweitz. @@ -59,3 +61,9 @@ ; CHECK: xxspltd ; CHECK: stxvd2x ; CHECK-NOT: xxswapd + +; CHECK-P9-LABEL: @aercalc_ +; CHECK-P9: lfs +; CHECK-P9: xxspltd +; CHECK-P9: stxvd2x +; CHECK-P9-NOT: xxswapd Index: test/CodeGen/PowerPC/pr25157.ll =================================================================== --- test/CodeGen/PowerPC/pr25157.ll +++ test/CodeGen/PowerPC/pr25157.ll @@ -1,4 +1,6 @@ ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \ +; RUN: --check-prefix=CHECK-P9 %s ; Verify correct generation of an lxsspx rather than an invalid optimization ; to lxvdsx. Bugpoint-reduced test from Eric Schweitz. @@ -56,3 +58,5 @@ ; CHECK-LABEL: @aercalc_ ; CHECK: lxsspx +; CHECK-P9-LABEL: @aercalc_ +; CHECK-P9: lfs Index: test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- test/CodeGen/PowerPC/swaps-le-6.ll +++ test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,4 +1,5 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. Index: test/CodeGen/PowerPC/vsx-spill.ll =================================================================== --- test/CodeGen/PowerPC/vsx-spill.ll +++ test/CodeGen/PowerPC/vsx-spill.ll @@ -1,7 +1,14 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=CHECK-REG %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | FileCheck -check-prefix=CHECK-FISL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx < %s | FileCheck \ +; RUN: -check-prefix=CHECK-REG %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \ +; RUN: FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mattr=+vsx -fast-isel -O0 < %s | \ +; RUN: FileCheck -check-prefix=CHECK-FISL %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 < %s | FileCheck \ +; RUN: -check-prefix=CHECK-P9-REG %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -fast-isel -O0 < %s | FileCheck \ +; RUN: -check-prefix=CHECK-P9-FISL %s target datalayout = "E-m:e-i64:64-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -21,6 +28,15 @@ ; CHECK-FISL: stxsdx 1, 1, 0 ; CHECK-FISL: blr +; CHECK-P9-REG: @foo1 +; CHECK-P9-REG: xxlor [[R1:[0-9]+]], 1, 1 +; CHECK-P9-REG: xxlor 1, [[R1]], [[R1]] +; CHECK-P9-REG: blr + +; CHECK-P9-FISL: @foo1 +; CHECK-P9-FISL: stfd 31, -8(1) +; CHECK-P9-FISL: blr + return: ; preds = %entry ret double %a } @@ -42,6 +58,17 @@ ; CHECK-FISL: lxsdx [[R1]], [[R1]], 0 ; CHECK-FISL: blr +; CHECK-P9-REG: @foo2 +; CHECK-P9-REG: {{xxlor|xsadddp}} [[R1:[0-9]+]], 1, 1 +; CHECK-P9-REG: {{xxlor|xsadddp}} 1, [[R1]], [[R1]] +; CHECK-P9-REG: blr + +; CHECK-P9-FISL: @foo2 +; CHECK-P9-FISL: xsadddp [[R1:[0-9]+]], 1, 1 +; CHECK-P9-FISL: stfd [[R1]], [[OFF:[0-9\-]+]](1) +; CHECK-P9-FISL: lfd [[R1]], [[OFF]](1) +; CHECK-P9-FISL: blr + return: ; preds = %entry ret double %b } @@ -57,6 +84,15 @@ ; CHECK: xsadddp 1, [[R1]], [[R1]] ; CHECK: blr +; CHECK-P9-REG-LABEL: foo3 +; CHECK-P9-REG: stfd 1, [[OFF:[0-9\-]+]](1) +; CHECK-P9-REG: lfd [[FPR:[0-9]+]], [[OFF]](1) +; CHECK-P9-REG: xsadddp 1, [[FPR]], [[FPR]] + +; CHECK-P9-FISL-LABEL: foo3 +; CHECK-P9-FISL: stfd 1, [[OFF:[0-9\-]+]](1) +; CHECK-P9-FISL: lfd [[FPR:[0-9]+]], [[OFF]](1) +; CHECK-P9-FISL: xsadddp 1, [[FPR]], [[FPR]] return: ; preds = %entry %b = fadd double %a, %a ret double %b Index: test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -1,4 +1,8 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=CHECK-P9 define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { %v = load <2 x double>, <2 x double>* %p1 @@ -12,6 +16,13 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxpermdi 34, 0, 1, 1 + +; CHECK-P9-LABEL: testi0 +; CHECK-P9: lxvd2x 0, 0, 3 +; CHECK-P9: lfd 1, 0(4) +; CHECK-P9: xxswapd 0, 0 +; CHECK-P9: xxspltd 1, 1, 0 +; CHECK-P9: xxpermdi 34, 0, 1, 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { @@ -26,6 +37,13 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxmrgld 34, 1, 0 + +; CHECK-P9-LABEL: testi1 +; CHECK-P9: lxvd2x 0, 0, 3 +; CHECK-P9: lfd 1, 0(4) +; CHECK-P9: xxswapd 0, 0 +; CHECK-P9: xxspltd 1, 1, 0 +; CHECK-P9: xxmrgld 34, 1, 0 } define double @teste0(<2 x double>* %p1) { Index: test/CodeGen/PowerPC/vsx_scalar_ld_st.ll =================================================================== --- test/CodeGen/PowerPC/vsx_scalar_ld_st.ll +++ test/CodeGen/PowerPC/vsx_scalar_ld_st.ll @@ -1,5 +1,9 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-direct-move | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-direct-move | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -mattr=-direct-move | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -mattr=-direct-move | FileCheck %s -check-prefix=CHECK-P9 @d = common global double 0.000000e+00, align 8 @f = common global float 0.000000e+00, align 4 @@ -121,6 +125,9 @@ ; CHECK-LABEL: @dblToFloat ; CHECK: lxsdx [[REGLD5:[0-9]+]], ; CHECK: stxsspx [[REGLD5]], +; CHECK-P9-LABEL: @dblToFloat +; CHECK-P9: lfd [[REGLD5:[0-9]+]], +; CHECK-P9: stfs [[REGLD5]], } ; Function Attrs: nounwind @@ -134,4 +141,7 @@ ; CHECK-LABEL: @floatToDbl ; CHECK: lxsspx [[REGLD5:[0-9]+]], ; CHECK: stxsdx [[REGLD5]], +; CHECK-P9-LABEL: @floatToDbl +; CHECK-P9: lfs [[REGLD5:[0-9]+]], +; CHECK-P9: stfd [[REGLD5]], }