Index: lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp =================================================================== --- lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp +++ lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp @@ -33,6 +33,11 @@ FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false), cl::desc("Use full register names when printing assembly")); +// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively. +static cl::opt +ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false), + cl::desc("Prints full register names with vs{31-63} as v{0-31}")); + #define PRINT_ALIAS_INSTR #include "PPCGenAsmWriter.inc" @@ -434,6 +439,14 @@ const MCOperand &Op = MI->getOperand(OpNo); if (Op.isReg()) { const char *RegName = getRegisterName(Op.getReg()); + if (ShowVSRNumsAsVR) { + unsigned RegNum = Op.getReg(); + if (RegNum >= PPC::VSH0 && RegNum <= PPC::VSH31) + O << 'v' << RegNum - PPC::VSH0; + else + O << RegName; + return; + } // The linux and AIX assembler does not take register prefixes. if (!isDarwinSyntax()) RegName = stripRegisterPrefix(RegName); Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -10466,6 +10466,8 @@ DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); + bool NeedsSwapsForVSXMemOps = Subtarget.hasVSX() && + Subtarget.isLittleEndian() && !Subtarget.isISA3_0(); switch (N->getOpcode()) { default: break; case PPCISD::SHL: @@ -10545,10 +10547,11 @@ } // For little endian, VSX stores require generating xxswapd/lxvd2x. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. EVT VT = N->getOperand(1).getValueType(); if (VT.isSimple()) { MVT StoreVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (NeedsSwapsForVSXMemOps && (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) return expandVSXStoreForLE(N, DCI); @@ -10560,9 +10563,10 @@ EVT VT = LD->getValueType(0); // For little endian, VSX loads require generating lxvd2x/xxswapd. + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. if (VT.isSimple()) { MVT LoadVT = VT.getSimpleVT(); - if (Subtarget.hasVSX() && Subtarget.isLittleEndian() && + if (NeedsSwapsForVSXMemOps && (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) return expandVSXLoadForLE(N, DCI); @@ -10879,7 +10883,8 @@ break; case ISD::INTRINSIC_W_CHAIN: { // For little endian, VSX loads require generating lxvd2x/xxswapd. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting load. + if (NeedsSwapsForVSXMemOps) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; @@ -10892,7 +10897,8 @@ } case ISD::INTRINSIC_VOID: { // For little endian, VSX stores require generating xxswapd/stxvd2x. - if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) { + // Not needed on ISA 3.0 based CPUs since we have a non-permuting store. + if (NeedsSwapsForVSXMemOps) { switch (cast(N->getOperand(1))->getZExtValue()) { default: break; Index: lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.cpp +++ lib/Target/PowerPC/PPCInstrInfo.cpp @@ -273,6 +273,7 @@ case PPC::RESTORE_CRBIT: case PPC::LVX: case PPC::LXVD2X: + case PPC::LXVX: case PPC::QVLFDX: case PPC::QVLFSXs: case PPC::QVLFDXb: @@ -302,6 +303,7 @@ case PPC::SPILL_CRBIT: case PPC::STVX: case PPC::STXVD2X: + case PPC::STXVX: case PPC::QVSTFDX: case PPC::QVSTFSXs: case PPC::QVSTFDXb: @@ -1004,7 +1006,8 @@ FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X)) + unsigned Op = Subtarget.isISA3_0() ? PPC::STXVX : PPC::STXVD2X; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op)) .addReg(SrcReg, getKillRegState(isKill)), FrameIdx)); @@ -1126,7 +1129,8 @@ FrameIdx)); NonRI = true; } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) { - NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg), + unsigned Op = Subtarget.isISA3_0() ? PPC::LXVX : PPC::LXVD2X; + NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg), FrameIdx)); NonRI = true; } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) { Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -2113,7 +2113,8 @@ def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>; // Load Vector Indexed - def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, []>; + def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, + [(set v2f64:$XT, (load xoaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : X_XT6_RA5_RB5<31, 269, "lxvl" , vsrc, []>; @@ -2149,10 +2150,29 @@ def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>; // Store Vector Indexed - def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, []>; + def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, + [(store v2f64:$XT, xoaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : X_XS6_RA5_RB5<31, 397, "stxvl" , vsrc, []>; def STXVLL : X_XS6_RA5_RB5<31, 429, "stxvll" , vsrc, []>; } // end mayStore + + let AddedComplexity = 500 in { + def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; + def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), + (STXVX $rS, xoaddr:$dst)>; + } + } // end HasP9Vector Index: test/CodeGen/PowerPC/lxvw4x-bug.ll =================================================================== --- test/CodeGen/PowerPC/lxvw4x-bug.ll +++ test/CodeGen/PowerPC/lxvw4x-bug.ll @@ -1,4 +1,6 @@ ; RUN: llc -O0 -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd ; Function Attrs: nounwind define void @test() { entry: @@ -17,6 +19,8 @@ ; CHECK: lwa [[REG0:[0-9]+]], ; CHECK: lxvd2x [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] ; CHECK: xxswapd [[REG1]], [[REG1]] +; CHECK-P9: lwa [[REG0:[0-9]+]], +; CHECK-P9: lxvx [[REG1:[0-9]+]], {{[0-9]+}}, [[REG0]] store <4 x i32> %4, <4 x i32>* %j, align 16 ret void } Index: test/CodeGen/PowerPC/ppc64-i128-abi.ll =================================================================== --- test/CodeGen/PowerPC/ppc64-i128-abi.ll +++ test/CodeGen/PowerPC/ppc64-i128-abi.ll @@ -4,6 +4,10 @@ ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-NOVSX ; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-BE-NOVSX ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s -check-prefix=CHECK-LE-NOVSX +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-P9 --implicit-check-not xxswapd +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -mattr=-vsx < %s \ +; RUN: | FileCheck %s -check-prefix=CHECK-NOVSX --implicit-check-not xxswapd @x = common global <1 x i128> zeroinitializer, align 16 @y = common global <1 x i128> zeroinitializer, align 16 @@ -31,6 +35,11 @@ ; CHECK-LE: vadduqm 2, 2, 3 ; CHECK-LE: blr +; CHECK-P9-LABEL: @v1i128_increment_by_one +; CHECK-P9: lxvx +; CHECK-P9: vadduqm 2, 2, 3 +; CHECK-P9: blr + ; CHECK-BE-LABEL: @v1i128_increment_by_one ; CHECK-BE: lxvd2x 35, {{[0-9]+}}, {{[0-9]+}} ; CHECK-BE-NOT: xxswapd @@ -171,6 +180,11 @@ ; CHECK-LE: bl v1i128_increment_by_one ; CHECK-LE: blr +; CHECK-P9-LABEL: @call_v1i128_increment_by_one +; CHECK-P9: lxvx +; CHECK-P9: bl v1i128_increment_by_one +; CHECK-P9: blr + ; CHECK-BE-LABEL: @call_v1i128_increment_by_one ; CHECK-BE: lxvw4x 34, {{[0-9]+}}, {{[0-9]+}} ; CHECK-BE-NOT: xxswapd 34, {{[0-9]+}} @@ -198,6 +212,12 @@ ; CHECK-LE: bl v1i128_increment_by_val ; CHECK-LE: blr +; CHECK-P9-LABEL: @call_v1i128_increment_by_val +; CHECK-P9-DAG: lxvx 34 +; CHECK-P9-DAG: lxvx 35 +; CHECK-P9: bl v1i128_increment_by_val +; CHECK-P9: blr + ; CHECK-BE-LABEL: @call_v1i128_increment_by_val Index: test/CodeGen/PowerPC/swaps-le-1.ll =================================================================== --- test/CodeGen/PowerPC/swaps-le-1.ll +++ test/CodeGen/PowerPC/swaps-le-1.ll @@ -1,5 +1,8 @@ ; RUN: llc -O3 -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s ; RUN: llc -O3 -mcpu=pwr8 -disable-ppc-vsx-swap-removal -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck -check-prefix=NOOPTSWAP %s +; RUN: llc -O3 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck -check-prefix=CHECK-P9 \ +; RUN: --implicit-check-not xxswapd %s ; This test was generated from the following source: ; @@ -98,6 +101,7 @@ ; CHECK-LABEL: @foo ; CHECK-NOT: xxpermdi ; CHECK-NOT: xxswapd +; CHECK-P9-NOT: xxpermdi ; CHECK: lxvd2x ; CHECK: lxvd2x @@ -127,7 +131,6 @@ ; CHECK: vmuluwm ; CHECK: stxvd2x - ; NOOPTSWAP-LABEL: @foo ; NOOPTSWAP: lxvd2x @@ -145,3 +148,29 @@ ; NOOPTSWAP-DAG: stxvd2x ; NOOPTSWAP: stxvd2x +; CHECK-P9-LABEL: @foo +; CHECK-P9: lxvx [[R1:v[0-9]+]], +; CHECK-P9: lxvx [[R2:v[0-9]+]], +; CHECK-P9: lxvx [[R3:v[0-9]+]], +; CHECK-P9: lxvx [[R4:v[0-9]+]], +; CHECK-P9: lxvx [[R5:v[0-9]+]], +; CHECK-P9: lxvx [[R6:v[0-9]+]], +; CHECK-P9: lxvx [[R7:v[0-9]+]], +; CHECK-P9: lxvx [[R8:v[0-9]+]], +; CHECK-P9-DAG: vadduwm v{{[0-9]+}}, [[R2]], [[R1]] +; CHECK-P9: lxvx [[R9:v[0-9]+]], +; CHECK-P9: lxvx [[R10:v[0-9]+]], +; CHECK-P9-DAG: vadduwm v{{[0-9]+}}, [[R4]], [[R3]] +; CHECK-P9: lxvx [[R11:v[0-9]+]], +; CHECK-P9: lxvx [[R12:v[0-9]+]], +; CHECK-P9-DAG: vadduwm v{{[0-9]+}}, [[R6]], [[R5]] +; CHECK-P9-DAG: vadduwm v{{[0-9]+}}, [[R8]], [[R7]] +; CHECK-P9-DAG: vmuluwm v{{[0-9]+}}, v{{[0-9]+}}, [[R9]] +; CHECK-P9-DAG: vmuluwm v{{[0-9]+}}, v{{[0-9]+}}, [[R10]] +; CHECK-P9-DAG: vmuluwm v{{[0-9]+}}, v{{[0-9]+}}, [[R11]] +; CHECK-P9-DAG: vmuluwm v{{[0-9]+}}, v{{[0-9]+}}, [[R12]] +; CHECK-P9-DAG: stxvx +; CHECK-P9: stxvx +; CHECK-P9: stxvx +; CHECK-P9: stxvx + Index: test/CodeGen/PowerPC/swaps-le-6.ll =================================================================== --- test/CodeGen/PowerPC/swaps-le-6.ll +++ test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,4 +1,6 @@ ; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @@ -24,6 +26,13 @@ ; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 ; CHECK: stxvd2x [[REG5]] +; CHECK-P9-LABEL: @bar0 +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 +; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 +; CHECK-P9: stxvx [[REG5]] + define void @bar1() { entry: %0 = load <2 x double>, <2 x double>* @x, align 16 @@ -40,3 +49,10 @@ ; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] ; CHECK: stxvd2x [[REG5]] +; CHECK-P9-LABEL: @bar1 +; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] +; CHECK-P9-DAG: lxsdx [[REG2:[0-9]+]] +; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 +; CHECK-P9: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] +; CHECK-P9: stxvx [[REG5]] + Index: test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll =================================================================== --- test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll +++ test/CodeGen/PowerPC/vsx-ldst-builtin-le.ll @@ -2,6 +2,10 @@ ; RUN: grep lxvd2x < %t | count 18 ; RUN: grep stxvd2x < %t | count 18 +; RUN: llc -mcpu=pwr9 -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t +; RUN: grep lxvx < %t | count 18 +; RUN: grep stxvx < %t | count 18 + @vf = global <4 x float> , align 16 @vd = global <2 x double> , align 16 @vsi = global <4 x i32> , align 16 Index: test/CodeGen/PowerPC/vsx-ldst.ll =================================================================== --- test/CodeGen/PowerPC/vsx-ldst.ll +++ test/CodeGen/PowerPC/vsx-ldst.ll @@ -13,6 +13,11 @@ ; RUN: grep lxvd2x < %t | count 6 ; RUN: grep stxvd2x < %t | count 6 +; RUN: llc -mcpu=pwr9 -O2 -mtriple=powerpc64le-unknown-linux-gnu < %s > %t +; RUN: grep lxvx < %t | count 6 +; RUN: grep stxvx < %t | count 6 + + @vsi = global <4 x i32> , align 16 @vui = global <4 x i32> , align 16 @vf = global <4 x float> , align 16 Index: test/CodeGen/PowerPC/vsx-p9.ll =================================================================== --- test/CodeGen/PowerPC/vsx-p9.ll +++ test/CodeGen/PowerPC/vsx-p9.ll @@ -0,0 +1,143 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s + +@uca = global <16 x i8> zeroinitializer, align 16 +@ucb = global <16 x i8> zeroinitializer, align 16 +@sca = global <16 x i8> zeroinitializer, align 16 +@scb = global <16 x i8> zeroinitializer, align 16 +@usa = global <8 x i16> zeroinitializer, align 16 +@usb = global <8 x i16> zeroinitializer, align 16 +@ssa = global <8 x i16> zeroinitializer, align 16 +@ssb = global <8 x i16> zeroinitializer, align 16 +@uia = global <4 x i32> zeroinitializer, align 16 +@uib = global <4 x i32> zeroinitializer, align 16 +@sia = global <4 x i32> zeroinitializer, align 16 +@sib = global <4 x i32> zeroinitializer, align 16 +@ulla = global <2 x i64> zeroinitializer, align 16 +@ullb = global <2 x i64> zeroinitializer, align 16 +@slla = global <2 x i64> zeroinitializer, align 16 +@sllb = global <2 x i64> zeroinitializer, align 16 +@uxa = global <1 x i128> zeroinitializer, align 16 +@uxb = global <1 x i128> zeroinitializer, align 16 +@sxa = global <1 x i128> zeroinitializer, align 16 +@sxb = global <1 x i128> zeroinitializer, align 16 +@vfa = global <4 x float> zeroinitializer, align 16 +@vfb = global <4 x float> zeroinitializer, align 16 +@vda = global <2 x double> zeroinitializer, align 16 +@vdb = global <2 x double> zeroinitializer, align 16 + +define void @_Z4testv() { +entry: +; CHECK-LABEL: @_Z4testv + %0 = load <16 x i8>, <16 x i8>* @uca, align 16 + %1 = load <16 x i8>, <16 x i8>* @ucb, align 16 + %add.i = add <16 x i8> %1, %0 + tail call void (...) @sink(<16 x i8> %add.i) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddubm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %2 = load <16 x i8>, <16 x i8>* @sca, align 16 + %3 = load <16 x i8>, <16 x i8>* @scb, align 16 + %add.i22 = add <16 x i8> %3, %2 + tail call void (...) @sink(<16 x i8> %add.i22) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddubm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %4 = load <8 x i16>, <8 x i16>* @usa, align 16 + %5 = load <8 x i16>, <8 x i16>* @usb, align 16 + %add.i21 = add <8 x i16> %5, %4 + tail call void (...) @sink(<8 x i16> %add.i21) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduhm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %6 = load <8 x i16>, <8 x i16>* @ssa, align 16 + %7 = load <8 x i16>, <8 x i16>* @ssb, align 16 + %add.i20 = add <8 x i16> %7, %6 + tail call void (...) @sink(<8 x i16> %add.i20) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduhm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %8 = load <4 x i32>, <4 x i32>* @uia, align 16 + %9 = load <4 x i32>, <4 x i32>* @uib, align 16 + %add.i19 = add <4 x i32> %9, %8 + tail call void (...) @sink(<4 x i32> %add.i19) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduwm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %10 = load <4 x i32>, <4 x i32>* @sia, align 16 + %11 = load <4 x i32>, <4 x i32>* @sib, align 16 + %add.i18 = add <4 x i32> %11, %10 + tail call void (...) @sink(<4 x i32> %add.i18) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduwm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %12 = load <2 x i64>, <2 x i64>* @ulla, align 16 + %13 = load <2 x i64>, <2 x i64>* @ullb, align 16 + %add.i17 = add <2 x i64> %13, %12 + tail call void (...) @sink(<2 x i64> %add.i17) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddudm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %14 = load <2 x i64>, <2 x i64>* @slla, align 16 + %15 = load <2 x i64>, <2 x i64>* @sllb, align 16 + %add.i16 = add <2 x i64> %15, %14 + tail call void (...) @sink(<2 x i64> %add.i16) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vaddudm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %16 = load <1 x i128>, <1 x i128>* @uxa, align 16 + %17 = load <1 x i128>, <1 x i128>* @uxb, align 16 + %add.i15 = add <1 x i128> %17, %16 + tail call void (...) @sink(<1 x i128> %add.i15) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduqm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %18 = load <1 x i128>, <1 x i128>* @sxa, align 16 + %19 = load <1 x i128>, <1 x i128>* @sxb, align 16 + %add.i14 = add <1 x i128> %19, %18 + tail call void (...) @sink(<1 x i128> %add.i14) +; CHECK: lxvx 34, 0, 3 +; CHECK: lxvx 35, 0, 4 +; CHECK: vadduqm 2, 3, 2 +; CHECK: stxvx 34, +; CHECK: bl sink + %20 = load <4 x float>, <4 x float>* @vfa, align 16 + %21 = load <4 x float>, <4 x float>* @vfb, align 16 + %add.i13 = fadd <4 x float> %20, %21 + tail call void (...) @sink(<4 x float> %add.i13) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 +; CHECK: xvaddsp 34, 0, 1 +; CHECK: stxvx 34, +; CHECK: bl sink + %22 = load <2 x double>, <2 x double>* @vda, align 16 + %23 = load <2 x double>, <2 x double>* @vdb, align 16 + %add.i12 = fadd <2 x double> %22, %23 + tail call void (...) @sink(<2 x double> %add.i12) +; CHECK: lxvx 0, 0, 3 +; CHECK: lxvx 1, 0, 4 +; CHECK: xvadddp 0, 0, 1 +; CHECK: stxvx 0, +; CHECK: bl sink + ret void +} + +declare void @sink(...) Index: test/CodeGen/PowerPC/vsx_insert_extract_le.ll =================================================================== --- test/CodeGen/PowerPC/vsx_insert_extract_le.ll +++ test/CodeGen/PowerPC/vsx_insert_extract_le.ll @@ -1,4 +1,6 @@ ; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @testi0(<2 x double>* %p1, double* %p2) { %v = load <2 x double>, <2 x double>* %p1 @@ -12,6 +14,12 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxpermdi 34, 0, 1, 1 + +; CHECK-P9-LABEL: testi0 +; CHECK-P9: lxsdx 0, 0, 4 +; CHECK-P9: lxvx 1, 0, 3 +; CHECK-P9: xxspltd 0, 0, 0 +; CHECK-P9: xxpermdi 34, 1, 0, 1 } define <2 x double> @testi1(<2 x double>* %p1, double* %p2) { @@ -26,6 +34,12 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxspltd 1, 1, 0 ; CHECK: xxmrgld 34, 1, 0 + +; CHECK-P9-LABEL: testi1 +; CHECK-P9: lxsdx 0, 0, 4 +; CHECK-P9: lxvx 1, 0, 3 +; CHECK-P9: xxspltd 0, 0, 0 +; CHECK-P9: xxmrgld 34, 0, 1 } define double @teste0(<2 x double>* %p1) { @@ -35,6 +49,9 @@ ; CHECK-LABEL: teste0 ; CHECK: lxvd2x 1, 0, 3 + +; CHECK-P9-LABEL: teste0 +; CHECK-P9: lxsdx 1, 0, 3 } define double @teste1(<2 x double>* %p1) { @@ -45,4 +62,8 @@ ; CHECK-LABEL: teste1 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxswapd 1, 0 + +; CHECK-P9-LABEL: teste1 +; CHECK-P9: li 4, 8 +; CHECK-P9: lxsdx 1, 3, 4 } Index: test/CodeGen/PowerPC/vsx_shuffle_le.ll =================================================================== --- test/CodeGen/PowerPC/vsx_shuffle_le.ll +++ test/CodeGen/PowerPC/vsx_shuffle_le.ll @@ -1,4 +1,6 @@ ; RUN: llc -mcpu=pwr8 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mattr=+vsx -mtriple=powerpc64le-unknown-linux-gnu < %s \ +; RUN: | FileCheck %s --check-prefix=CHECK-P9 --implicit-check-not xxswapd define <2 x double> @test00(<2 x double>* %p1, <2 x double>* %p2) { %v1 = load <2 x double>, <2 x double>* %p1 @@ -9,6 +11,10 @@ ; CHECK-LABEL: test00 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxspltd 34, 0, 0 + +; CHECK-P9-LABEL: test00 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxspltd 34, 0, 1 } define <2 x double> @test01(<2 x double>* %p1, <2 x double>* %p2) { @@ -20,6 +26,9 @@ ; CHECK-LABEL: test01 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxswapd 34, 0 + +; CHECK-P9-LABEL: test01 +; CHECK-P9: lxvx 34, 0, 3 } define <2 x double> @test02(<2 x double>* %p1, <2 x double>* %p2) { @@ -34,6 +43,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrgld 34, 1, 0 + +; CHECK-P9-LABEL: @test02 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrgld 34, 1, 0 } define <2 x double> @test03(<2 x double>* %p1, <2 x double>* %p2) { @@ -48,6 +62,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 1, 0, 1 + +; CHECK-P9-LABEL: @test03 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 1, 0, 1 } define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) { @@ -58,6 +77,10 @@ ; CHECK-LABEL: @test10 ; CHECK: lxvd2x 34, 0, 3 + +; CHECK-P9-LABEL: @test10 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { @@ -69,6 +92,10 @@ ; CHECK-LABEL: @test11 ; CHECK: lxvd2x 0, 0, 3 ; CHECK: xxspltd 34, 0, 1 + +; CHECK-P9-LABEL: @test11 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: xxspltd 34, 0, 0 } define <2 x double> @test12(<2 x double>* %p1, <2 x double>* %p2) { @@ -83,6 +110,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 1, 0, 2 + +; CHECK-P9-LABEL: @test12 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 1, 0, 2 } define <2 x double> @test13(<2 x double>* %p1, <2 x double>* %p2) { @@ -97,6 +129,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrghd 34, 1, 0 + +; CHECK-P9-LABEL: @test13 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrghd 34, 1, 0 } define <2 x double> @test20(<2 x double>* %p1, <2 x double>* %p2) { @@ -111,6 +148,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrgld 34, 0, 1 + +; CHECK-P9-LABEL: @test20 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrgld 34, 0, 1 } define <2 x double> @test21(<2 x double>* %p1, <2 x double>* %p2) { @@ -125,6 +167,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 0, 1, 1 + +; CHECK-P9-LABEL: @test21 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 0, 1, 1 } define <2 x double> @test22(<2 x double>* %p1, <2 x double>* %p2) { @@ -136,6 +183,10 @@ ; CHECK-LABEL: @test22 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxspltd 34, 0, 0 + +; CHECK-P9-LABEL: @test22 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxspltd 34, 0, 1 } define <2 x double> @test23(<2 x double>* %p1, <2 x double>* %p2) { @@ -147,6 +198,9 @@ ; CHECK-LABEL: @test23 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxswapd 34, 0 + +; CHECK-P9-LABEL: @test23 +; CHECK-P9: lxvx 34, 0, 4 } define <2 x double> @test30(<2 x double>* %p1, <2 x double>* %p2) { @@ -161,6 +215,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxpermdi 34, 0, 1, 2 + +; CHECK-P9-LABEL: @test30 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxpermdi 34, 0, 1, 2 } define <2 x double> @test31(<2 x double>* %p1, <2 x double>* %p2) { @@ -175,6 +234,11 @@ ; CHECK: xxswapd 0, 0 ; CHECK: xxswapd 1, 1 ; CHECK: xxmrghd 34, 0, 1 + +; CHECK-P9-LABEL: @test31 +; CHECK-P9: lxvx 0, 0, 3 +; CHECK-P9: lxvx 1, 0, 4 +; CHECK-P9: xxmrghd 34, 0, 1 } define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) { @@ -185,6 +249,10 @@ ; CHECK-LABEL: @test32 ; CHECK: lxvd2x 34, 0, 4 + +; CHECK-P9-LABEL: @test32 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) { @@ -196,4 +264,8 @@ ; CHECK-LABEL: @test33 ; CHECK: lxvd2x 0, 0, 4 ; CHECK: xxspltd 34, 0, 1 + +; CHECK-P9-LABEL: @test33 +; CHECK-P9: lxvx 0, 0, 4 +; CHECK-P9: xxspltd 34, 0, 0 }