diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9397,10 +9397,10 @@ } } - // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be - // lowered to VSX instructions under certain conditions. + // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to + // 32-bits can be lowered to VSX instructions under certain conditions. // Without VSX, there is no pattern more efficient than expanding the node. - if (Subtarget.hasVSX() && + if (Subtarget.hasVSX() && Subtarget.isPPC64() && haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -145,6 +145,7 @@ def HasVSX : Predicate<"Subtarget->hasVSX()">; def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">; def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">; +def IsPPC64 : Predicate<"Subtarget->isPPC64()">; def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">; def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">; def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">; @@ -2414,24 +2415,24 @@ // [HasVSX, HasOnlySwappingMemOps] // [HasVSX, HasOnlySwappingMemOps, IsBigEndian] // [HasVSX, HasP8Vector] -// [HasVSX, HasP8Vector, IsBigEndian] +// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, IsLittleEndian] -// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] +// [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] // [HasVSX, HasDirectMove] // [HasVSX, HasDirectMove, IsBigEndian] // [HasVSX, HasDirectMove, IsLittleEndian] -// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian] +// [HasVSX, HasDirectMove, NoP9Altivec, IsBigEndian, IsPPC64] +// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian] -// [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] // [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] // [HasVSX, HasP9Vector] -// [HasVSX, HasP9Vector, IsBigEndian] +// [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] // [HasVSX, HasP9Vector, IsLittleEndian] // [HasVSX, HasP9Altivec] -// [HasVSX, HasP9Altivec, IsBigEndian] +// [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] // [HasVSX, HasP9Altivec, IsLittleEndian] -// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] +// [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] let AddedComplexity = 400 in { @@ -3015,8 +3016,8 @@ def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } // HasVSX, HasOnlySwappingMemOps -// Big endian VSX subtarget that only has loads and stores that always load -// in big endian order. Really big endian pre-Power9 subtargets. +// Big endian VSX subtarget that only has loads and stores that always +// load in big endian order. Really big endian pre-Power9 subtargets. let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -3109,7 +3110,7 @@ } // HasVSX, HasP8Vector // Big endian Power8 VSX subtarget. -let Predicates = [HasVSX, HasP8Vector, IsBigEndian] in { +let Predicates = [HasVSX, HasP8Vector, IsBigEndian, IsPPC64] in { def : Pat; def : Pat; } -} // HasVSX, HasP8Vector, IsBigEndian +} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64 // Little endian Power8 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, IsLittleEndian] in { @@ -3286,7 +3287,7 @@ } // HasVSX, HasP8Vector, IsLittleEndian // Big endian pre-Power9 VSX subtarget. -let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian] in { +let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(store (i64 (extractelt v2i64:$A, 0)), xoaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG $A, sub_64), xoaddr:$src)>; def : Pat<(store (f64 (extractelt v2f64:$A, 0)), xoaddr:$src), @@ -3297,7 +3298,7 @@ def : Pat<(store (f64 (extractelt v2f64:$A, 1)), xoaddr:$src), (XFSTOREf64 (EXTRACT_SUBREG (XXPERMDI $A, $A, 2), sub_64), xoaddr:$src)>; -} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian +} // HasVSX, HasP8Vector, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget. let Predicates = [HasVSX, HasP8Vector, NoP9Vector, IsLittleEndian] in { @@ -3554,8 +3555,8 @@ (i32 VectorExtractions.LE_VARIABLE_WORD)>; } // HasVSX, HasDirectMove, NoP9Altivec, IsLittleEndian -// Big endian pre-Power9 VSX subtarget that has direct moves. -let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian] in { +// Big endian pre-Power9 64Bit VSX subtarget that has direct moves. +let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64] in { // Big endian integer vectors using direct moves. def : Pat<(v2i64 (build_vector i64:$A, i64:$B)), (v2i64 (XXPERMDI @@ -3569,7 +3570,7 @@ (MTVSRD (RLDIMI AnyExts.D, AnyExts.C, 32, 0)), VSRC), 0)>; def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)), (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>; -} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian +} // HasVSX, HasDirectMove, NoP9Vector, IsBigEndian, IsPPC64 // Little endian pre-Power9 VSX subtarget that has direct moves. let Predicates = [HasVSX, HasDirectMove, NoP9Vector, IsLittleEndian] in { @@ -3922,8 +3923,8 @@ (v4i32 (LXVWSX xoaddr:$A))>; } // HasVSX, HasP9Vector -// Big endian Power9 subtarget. -let Predicates = [HasVSX, HasP9Vector, IsBigEndian] in { +// Big endian 64Bit Power9 subtarget. +let Predicates = [HasVSX, HasP9Vector, IsBigEndian, IsPPC64] in { def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 0)))))), (f32 (XSCVUXDSP (XXEXTRACTUW $A, 0)))>; def : Pat<(f32 (PPCfcfidus (f64 (PPCmtvsrz (i32 (extractelt v4i32:$A, 1)))))), @@ -4096,7 +4097,7 @@ def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))), (f128 (XSCVUDQP (XXEXTRACTUW (SUBREG_TO_REG (i64 1), $src, sub_64), 4)))>; -} // HasVSX, HasP9Vector, IsBigEndian +} // HasVSX, HasP9Vector, IsBigEndian, IsPPC64 // Little endian Power9 subtarget. let Predicates = [HasVSX, HasP9Vector, IsLittleEndian] in { @@ -4321,8 +4322,8 @@ (v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>; } // HasVSX, HasP9Altivec -// Big endian Power9 VSX subtargets with P9 Altivec support. -let Predicates = [HasVSX, HasP9Altivec, IsBigEndian] in { +// Big endian Power9 64Bit VSX subtargets with P9 Altivec support. +let Predicates = [HasVSX, HasP9Altivec, IsBigEndian, IsPPC64] in { def : Pat<(i64 (anyext (i32 (vector_extract v16i8:$S, i64:$Idx)))), (VEXTUBLX $Idx, $S)>; @@ -4455,7 +4456,7 @@ (v4i32 (VEXTSB2W $A))>; def : Pat<(v2i64 (build_vector ByteToDWord.BE_A0, ByteToDWord.BE_A1)), (v2i64 (VEXTSB2D $A))>; -} // HasVSX, HasP9Altivec, IsBigEndian +} // HasVSX, HasP9Altivec, IsBigEndian, IsPPC64 // Little endian Power9 VSX subtargets with P9 Altivec support. let Predicates = [HasVSX, HasP9Altivec, IsLittleEndian] in { @@ -4592,8 +4593,9 @@ (v2i64 (VEXTSB2D $A))>; } // HasVSX, HasP9Altivec, IsLittleEndian -// Big endian VSX subtarget that supports additional direct moves from ISA3.0. -let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian] in { +// Big endian 64Bit VSX subtarget that supports additional direct moves from +// ISA3.0. +let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] in { def : Pat<(i64 (extractelt v2i64:$A, 1)), (i64 (MFVSRLD $A))>; // Better way to build integer vectors if we have MTVSRDD. Big endian. @@ -4606,7 +4608,7 @@ def : Pat<(f128 (PPCbuild_fp128 i64:$rB, i64:$rA)), (f128 (COPY_TO_REGCLASS (MTVSRDD $rB, $rA), VRRC))>; -} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian +} // HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64 // Little endian VSX subtarget that supports direct moves from ISA3.0. let Predicates = [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] in { diff --git a/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc-32bit-build-vector.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc -mcpu=pwr8 < %s |\ +; RUN: FileCheck %s --check-prefix=32BIT + +; RUN: llc -verify-machineinstrs -mtriple=powerpc64 -mcpu=pwr8 < %s |\ +; RUN: FileCheck %s --check-prefix=64BIT + +define dso_local fastcc void @BuildVectorICE() unnamed_addr { +; 32BIT-LABEL: BuildVectorICE: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT: stwu 1, -64(1) +; 32BIT-NEXT: .cfi_def_cfa_offset 64 +; 32BIT-NEXT: lxvw4x 34, 0, 3 +; 32BIT-NEXT: li 3, 0 +; 32BIT-NEXT: addi 4, 1, 16 +; 32BIT-NEXT: addi 5, 1, 32 +; 32BIT-NEXT: addi 6, 1, 48 +; 32BIT-NEXT: li 7, 0 +; 32BIT-NEXT: .p2align 4 +; 32BIT-NEXT: .LBB0_1: # %while.body +; 32BIT-NEXT: # +; 32BIT-NEXT: stw 7, 16(1) +; 32BIT-NEXT: stw 3, 32(1) +; 32BIT-NEXT: lxvw4x 0, 0, 4 +; 32BIT-NEXT: lxvw4x 1, 0, 5 +; 32BIT-NEXT: xxsldwi 0, 1, 0, 1 +; 32BIT-NEXT: xxspltw 1, 1, 0 +; 32BIT-NEXT: xxsldwi 35, 0, 1, 3 +; 32BIT-NEXT: vadduwm 3, 2, 3 +; 32BIT-NEXT: xxspltw 36, 35, 1 +; 32BIT-NEXT: vadduwm 3, 3, 4 +; 32BIT-NEXT: stxvw4x 35, 0, 6 +; 32BIT-NEXT: lwz 7, 48(1) +; 32BIT-NEXT: b .LBB0_1 +; +; 64BIT-LABEL: BuildVectorICE: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT: li 3, 0 +; 64BIT-NEXT: lxvw4x 34, 0, 3 +; 64BIT-NEXT: rldimi 3, 3, 32, 0 +; 64BIT-NEXT: mtfprd 0, 3 +; 64BIT-NEXT: li 3, 0 +; 64BIT-NEXT: .p2align 4 +; 64BIT-NEXT: .LBB0_1: # %while.body +; 64BIT-NEXT: # +; 64BIT-NEXT: li 4, 0 +; 64BIT-NEXT: rldimi 4, 3, 32, 0 +; 64BIT-NEXT: mtfprd 1, 4 +; 64BIT-NEXT: xxmrghd 35, 1, 0 +; 64BIT-NEXT: vadduwm 3, 2, 3 +; 64BIT-NEXT: xxspltw 36, 35, 1 +; 64BIT-NEXT: vadduwm 3, 3, 4 +; 64BIT-NEXT: xxsldwi 1, 35, 35, 3 +; 64BIT-NEXT: mffprwz 3, 1 +; 64BIT-NEXT: b .LBB0_1 + entry: + br label %while.body + while.body: ; preds = %while.body, %entry + %newelement = phi i32 [ 0, %entry ], [ %5, %while.body ] + %0 = insertelement <4 x i32> , i32 %newelement, i32 0 + %1 = load <4 x i32>, <4 x i32>* undef, align 1 + %2 = add <4 x i32> %1, %0 + %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> + %4 = add <4 x i32> %2, %3 + %5 = extractelement <4 x i32> %4, i32 0 + br label %while.body +}