Index: /home/seurer/llvm/llvm-test/include/llvm/CodeGen/FastISel.h =================================================================== --- /home/seurer/llvm/llvm-test/include/llvm/CodeGen/FastISel.h +++ /home/seurer/llvm/llvm-test/include/llvm/CodeGen/FastISel.h @@ -480,6 +480,12 @@ return 0; } + /// \brief Verify that a type is valid for FastISel. Should be overriden + /// by targets with more restrictive type legality in FastIsel. + virtual bool isTypeLegal(EVT Evt) const { + return TLI.isTypeLegal(Evt); + } + /// \brief Check if \c Add is an add that can be safely folded into \c GEP. /// /// \c Add can be folded into \c GEP if: Index: /home/seurer/llvm/llvm-test/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- /home/seurer/llvm/llvm-test/lib/CodeGen/SelectionDAG/FastISel.cpp +++ /home/seurer/llvm/llvm-test/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -173,7 +173,7 @@ // in ValueMap because Arguments are given virtual registers regardless // of whether FastISel can handle them. MVT VT = RealVT.getSimpleVT(); - if (!TLI.isTypeLegal(VT)) { + if (!isTypeLegal(RealVT)) { // Handle integer promotions, though, because they're common and easy. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); @@ -387,7 +387,7 @@ // selector contains all of the 64-bit instructions from x86-64, // under the assumption that i64 won't be used if the target doesn't // support it. - if (!TLI.isTypeLegal(VT)) { + if (!isTypeLegal(VT)) { // MVT::i1 is special. Allow AND, OR, or XOR because they // don't require additional zeroing, which makes them easy. if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || @@ -1216,11 +1216,11 @@ return false; // Check if the destination type is legal. - if (!TLI.isTypeLegal(DstVT)) + if (!isTypeLegal(DstVT)) return false; // Check if the source operand is legal. - if (!TLI.isTypeLegal(SrcVT)) + if (!isTypeLegal(SrcVT)) return false; unsigned InputReg = getRegForValue(I->getOperand(0)); @@ -1253,7 +1253,7 @@ EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstEVT = TLI.getValueType(I->getType()); if (SrcEVT == MVT::Other || DstEVT == MVT::Other || - !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) + !isTypeLegal(SrcEVT) || !isTypeLegal(DstEVT)) // Unhandled type. Halt "fast" selection and bail. return false; @@ -1388,7 +1388,7 @@ if (VT.getSizeInBits() > 64) return false; EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); - if (!TLI.isTypeLegal(IntVT)) + if (!isTypeLegal(IntVT)) return false; unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), @@ -1422,7 +1422,7 @@ if (!RealVT.isSimple()) return false; MVT VT = RealVT.getSimpleVT(); - if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + if (!isTypeLegal(RealVT) && VT != MVT::i1) return false; const Value *Op0 = EVI->getOperand(0); @@ -1993,7 +1993,7 @@ // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); - if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + if (VT == MVT::Other || !isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); Index: /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPC.td =================================================================== --- /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPC.td +++ /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPC.td @@ -296,7 +296,7 @@ FeatureFPRND, Feature64Bit, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr7", P7Model, - [DirectivePwr7, FeatureAltivec, + [DirectivePwr7, FeatureAltivec, FeatureVSX, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, @@ -305,7 +305,7 @@ Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */, - [DirectivePwr8, FeatureAltivec, + [DirectivePwr8, FeatureAltivec, FeatureVSX, FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, Index: /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPCFastISel.cpp =================================================================== --- /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPCFastISel.cpp +++ /home/seurer/llvm/llvm-test/lib/Target/PowerPC/PPCFastISel.cpp @@ -118,6 +118,7 @@ const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + bool isTypeLegal(EVT VT) const override; // Instruction selection routines. private: @@ -137,7 +138,8 @@ // Utility routines. private: - bool isTypeLegal(Type *Ty, MVT &VT); + bool isTypeLegalCommon(EVT Evt, MVT &VT) const; + bool isTypeLegal(Type *Ty, MVT &VT) const; bool isLoadTypeLegal(Type *Ty, MVT &VT); bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt, unsigned DestReg); @@ -248,22 +250,41 @@ } } -// Determine whether the type Ty is simple enough to be handled by -// fast-isel, and return its equivalent machine type in VT. -// FIXME: Copied directly from ARM -- factor into base class? -bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) { - EVT Evt = TLI.getValueType(Ty, true); - +// Utility function for the following two functions that determines +// whether the extended value type Evt is simple enough to be handled +// by fast-isel. The equivalent machine type is returned in VT. +bool PPCFastISel::isTypeLegalCommon(EVT Evt, MVT &VT) const { // Only handle simple types. if (Evt == MVT::Other || !Evt.isSimple()) return false; + // Note: overwrites VT's (second parameter's) value VT = Evt.getSimpleVT(); + // FastISel can't handle VSX registers (yet). + // FIXME: remove when VSX support is added. + if (PPCSubTarget->hasVSX() && (VT.isVector() || VT == MVT::f64)) { + return false; + } + // Handle all legal types, i.e. a register that will directly hold this // value. return TLI.isTypeLegal(VT); } // Determine whether the type Ty is simple enough to be handled by +// fast-isel, and return its equivalent machine type in VT. +bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) const { + EVT Evt = TLI.getValueType(Ty, true); + return isTypeLegalCommon(Evt, VT); +} + +// Determine whether the extended value type Evt is simple enough to be handled +// by fast-isel. +bool PPCFastISel::isTypeLegal(EVT Evt) const { + MVT VT; + return isTypeLegalCommon(Evt, VT); +} + +// Determine whether the type Ty is simple enough to be handled by // fast-isel as a load target, and return its equivalent machine type in VT. bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { if (isTypeLegal(Ty, VT)) return true; @@ -1568,7 +1589,6 @@ } else { unsigned Reg = getRegForValue(RV); - if (Reg == 0) return false; Index: /home/seurer/llvm/llvm-test/test/CodeGen/Generic/select-cc.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/Generic/select-cc.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/Generic/select-cc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s +; RUN: llc -mattr=-vsx < %s ; PR2504 ; XFAIL: hexagon define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind { Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2007-09-08-unaligned.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2007-09-08-unaligned.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2007-09-08-unaligned.ll @@ -1,7 +1,11 @@ -; RUN: llc < %s | grep stfd | count 3 -; RUN: llc < %s | grep stfs | count 1 -; RUN: llc < %s | grep lfd | count 2 -; RUN: llc < %s | grep lfs | count 2 +; RUN: llc -mattr=-vsx < %s | grep stfd | count 3 +; RUN: llc -mattr=-vsx < %s | grep stfs | count 1 +; RUN: llc -mattr=-vsx < %s | grep lfd | count 2 +; RUN: llc -mattr=-vsx < %s | grep lfs | count 2 +; RUN: llc -mattr=+vsx < %s | grep stxsdx | count 3 +; RUN: llc -mattr=+vsx < %s | grep stfs | count 1 +; RUN: llc -mattr=+vsx < %s | grep lxsdx | count 2 +; RUN: llc -mattr=+vsx < %s | grep lfs | count 2 ; ModuleID = 'foo.c' target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc-apple-darwin8" Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2012-10-12-bitcast.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2012-10-12-bitcast.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/2012-10-12-bitcast.ll @@ -1,4 +1,5 @@ -; RUN: llc -mattr=+altivec < %s | FileCheck %s +; RUN: llc -mattr=-vsx -mattr=+altivec < %s | FileCheck %s +; RUN: llc -mattr=+vsx -mattr=+altivec < %s | FileCheck -check-prefix=VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -18,3 +19,7 @@ ; CHECK: lwz 3, -16(1) ; CHECK: blr +; VSX: addi [[REGISTER:[0-9]+]], 1, -16 +; VSX: stxvd2x 34, 0, [[REGISTER]] +; VSX: lwz 3, -16(1) +; VSX: blr Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/buildvec_canonicalize.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/buildvec_canonicalize.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/buildvec_canonicalize.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mattr=+vsx -march=ppc32 -mattr=+altivec --enable-unsafe-fp-math | FileCheck -check-prefix=VSX %s define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) { %tmp = load <4 x float>* %P3 ; <<4 x float>> [#uses=1] @@ -14,6 +15,9 @@ ; CHECK: @VXOR ; CHECK: vsplti ; CHECK: vxor +; VSX: @VXOR +; VSX: vxor +; VSX: xvmulsp define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) { store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2 @@ -22,3 +26,5 @@ } ; CHECK: @VSPLTI ; CHECK: vsplti +; VSX: @VSPLTI +; VSX: vsplti Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/copysignl.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/copysignl.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/copysignl.ll @@ -1,4 +1,5 @@ -; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=-vsx < %s | FileCheck %s -check-prefix=NOVSX +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu -mattr=+vsx < %s | FileCheck %s -check-prefix=VSX target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -8,9 +9,12 @@ %conv = fptrunc ppc_fp128 %call to double ret double %conv -; CHECK-LABEL: @foo_d_ll -; CHECK: fcpsgn 1, 3, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_d_ll +; NOVSX: fcpsgn 1, 3, 1 +; NOVSX: blr +; VSX-LABEL: @foo_d_ll +; VSX: xscpsgndp 1, 3, 1 +; VSX: blr } declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) #0 @@ -21,9 +25,12 @@ %call = tail call double @copysign(double %a, double %conv) #0 ret double %call -; CHECK-LABEL: @foo_dl -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_dl +; NOVSX: fcpsgn 1, 2, 1 +; NOVSX: blr +; VSX-LABEL: @foo_dl +; VSX: xscpsgndp 1, 2, 1 +; VSX: blr } declare double @copysign(double, double) #0 @@ -34,9 +41,12 @@ %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %b) #0 ret ppc_fp128 %call -; CHECK-LABEL: @foo_ll -; CHECK: bl copysignl -; CHECK: blr +; NOVSX-LABEL: @foo_ll +; NOVSX: bl copysignl +; NOVSX: blr +; VSX-LABEL: @foo_ll +; VSX: bl copysignl +; VSX: blr } define ppc_fp128 @foo_ld(double %a, double %b) #0 { @@ -46,9 +56,12 @@ %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0 ret ppc_fp128 %call -; CHECK-LABEL: @foo_ld -; CHECK: bl copysignl -; CHECK: blr +; NOVSX-LABEL: @foo_ld +; NOVSX: bl copysignl +; NOVSX: blr +; VSX-LABEL: @foo_ld +; VSX: bl copysignl +; VSX: blr } define ppc_fp128 @foo_lf(double %a, float %b) #0 { @@ -58,9 +71,12 @@ %call = tail call ppc_fp128 @copysignl(ppc_fp128 %conv, ppc_fp128 %conv1) #0 ret ppc_fp128 %call -; CHECK-LABEL: @foo_lf -; CHECK: bl copysignl -; CHECK: blr +; NOVSX-LABEL: @foo_lf +; NOVSX: bl copysignl +; NOVSX: blr +; VSX-LABEL: @foo_lf +; VSX: bl copysignl +; VSX: blr } attributes #0 = { nounwind readnone } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fabs.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fabs.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fabs.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1" +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin | grep "fabs f1, f1" +; RUN: llc < %s -mattr=+vsx -march=ppc32 -mtriple=powerpc-apple-darwin | grep "xsabsdp f1, f1" define double @fabs(double %f) { entry: Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-call.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-call.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-call.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -mattr=-vsx -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 define i32 @t1(i8 signext %a) nounwind { %1 = sext i8 %a to i32 Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-cmp-imm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64 define void @t1a(float %a) uwtable ssp { entry: Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-conversion.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-conversion.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-conversion.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 -; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 | FileCheck %s --check-prefix=PPC970 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=970 -mattr=-vsx | FileCheck %s --check-prefix=PPC970 ;; Tests for 970 don't use -fast-isel-abort because we intentionally punt ;; to SelectionDAG in some cases. Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-load-store.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-load-store.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-load-store.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel -fast-isel-abort -mattr=-vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel -mattr=+vsx -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64VSX ; This test verifies that load/store instructions are properly generated, ; and that they pass MI verification. @@ -22,55 +23,73 @@ define i8 @t1() nounwind uwtable ssp { ; ELF64: t1 +; ELF64VSX: t1 %1 = load i8* @a, align 1 ; ELF64: lbz +; ELF64VSX: lbz %2 = add nsw i8 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i8 %2 } define i16 @t2() nounwind uwtable ssp { ; ELF64: t2 +; ELF64VSX: t2 %1 = load i16* @b, align 2 ; ELF64: lhz +; ELF64VSX: lhz %2 = add nsw i16 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i16 %2 } define i32 @t3() nounwind uwtable ssp { ; ELF64: t3 +; ELF64VSX: t3 %1 = load i32* @c, align 4 ; ELF64: lwz +; ELF64VSX: lwz %2 = add nsw i32 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i32 %2 } define i64 @t4() nounwind uwtable ssp { ; ELF64: t4 +; ELF64VSX: t4 %1 = load i64* @d, align 4 ; ELF64: ld +; ELF64VSX: ld %2 = add nsw i64 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i64 %2 } define float @t5() nounwind uwtable ssp { ; ELF64: t5 +; ELF64VSX: t5 %1 = load float* @e, align 4 ; ELF64: lfs +; ELF64VSX: lfs %2 = fadd float %1, 1.0 ; ELF64: fadds +; ELF64VSX: fadds ret float %2 } define double @t6() nounwind uwtable ssp { ; ELF64: t6 +; ELF64VSX: t6 %1 = load double* @f, align 8 ; ELF64: lfd +; ELF64VSX: lxsdx %2 = fadd double %1, 1.0 ; ELF64: fadd +; ELF64VSX: xsadddp ret double %2 } @@ -78,92 +97,127 @@ define void @t7(i8 %v) nounwind uwtable ssp { ; ELF64: t7 +; ELF64VSX: t7 %1 = add nsw i8 %v, 1 store i8 %1, i8* @a, align 1 ; ELF64: addis ; ELF64: addi ; ELF64: addi ; ELF64: stb +; ELF64VSX: addis +; ELF64VSX: addi +; ELF64VSX: addi +; ELF64VSX: stb ret void } define void @t8(i16 %v) nounwind uwtable ssp { ; ELF64: t8 +; ELF64VSX: t8 %1 = add nsw i16 %v, 1 store i16 %1, i16* @b, align 2 ; ELF64: addis ; ELF64: addi ; ELF64: addi ; ELF64: sth +; ELF64VSX: addis +; ELF64VSX: addi +; ELF64VSX: addi +; ELF64VSX: sth ret void } define void @t9(i32 %v) nounwind uwtable ssp { ; ELF64: t9 +; ELF64VSX: t9 %1 = add nsw i32 %v, 1 store i32 %1, i32* @c, align 4 ; ELF64: addis ; ELF64: addi ; ELF64: addi ; ELF64: stw +; ELF64VSX: addis +; ELF64VSX: addi +; ELF64VSX: addi +; ELF64VSX: stw ret void } define void @t10(i64 %v) nounwind uwtable ssp { ; ELF64: t10 +; ELF64VSX: t10 %1 = add nsw i64 %v, 1 store i64 %1, i64* @d, align 4 ; ELF64: addis ; ELF64: addi ; ELF64: addi ; ELF64: std +; ELF64VSX: addis +; ELF64VSX: addi +; ELF64VSX: addi +; ELF64VSX: std ret void } define void @t11(float %v) nounwind uwtable ssp { ; ELF64: t11 +; ELF64VSX: t11 %1 = fadd float %v, 1.0 store float %1, float* @e, align 4 ; ELF64: fadds ; ELF64: stfs +; ELF64VSX: fadds +; ELF64VSX: stfs ret void } define void @t12(double %v) nounwind uwtable ssp { ; ELF64: t12 +; ELF64VSX: t12 %1 = fadd double %v, 1.0 store double %1, double* @f, align 8 ; ELF64: fadd ; ELF64: stfd +; ELF64VSX: xsadddp +; ELF64VSX: stxsdx ret void } ;; lwa requires an offset divisible by 4, so we need lwax here. define i64 @t13() nounwind uwtable ssp { ; ELF64: t13 +; ELF64VSX: t13 %1 = load i32* getelementptr inbounds (%struct.s* @g, i32 0, i32 1), align 1 %2 = sext i32 %1 to i64 ; ELF64: li ; ELF64: lwax +; ELF64VSX: li +; ELF64VSX: lwax %3 = add nsw i64 %2, 1 ; ELF64: addi +; ELF64VSX: addi ret i64 %3 } ;; ld requires an offset divisible by 4, so we need ldx here. define i64 @t14() nounwind uwtable ssp { ; ELF64: t14 +; ELF64VSX: t14 %1 = load i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1 ; ELF64: li ; ELF64: ldx +; ELF64VSX: li +; ELF64VSX: ldx %2 = add nsw i64 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i64 %2 } ;; std requires an offset divisible by 4, so we need stdx here. define void @t15(i64 %v) nounwind uwtable ssp { ; ELF64: t15 +; ELF64VSX: t15 %1 = add nsw i64 %v, 1 store i64 %1, i64* getelementptr inbounds (%struct.t* @h, i32 0, i32 1), align 1 ; ELF64: addis @@ -171,24 +225,35 @@ ; ELF64: addi ; ELF64: li ; ELF64: stdx +; ELF64VSX: addis +; ELF64VSX: addi +; ELF64VSX: addi +; ELF64VSX: li +; ELF64VSX: stdx ret void } ;; ld requires an offset that fits in 16 bits, so we need ldx here. define i64 @t16() nounwind uwtable ssp { ; ELF64: t16 +; ELF64VSX: t16 %1 = load i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8 ; ELF64: lis ; ELF64: ori ; ELF64: ldx +; ELF64VSX: lis +; ELF64VSX: ori +; ELF64VSX: ldx %2 = add nsw i64 %1, 1 ; ELF64: addi +; ELF64VSX: addi ret i64 %2 } ;; std requires an offset that fits in 16 bits, so we need stdx here. define void @t17(i64 %v) nounwind uwtable ssp { ; ELF64: t17 +; ELF64VSX: t17 %1 = add nsw i64 %v, 1 store i64 %1, i64* getelementptr inbounds ([8192 x i64]* @i, i32 0, i64 5000), align 8 ; ELF64: addis @@ -197,6 +262,12 @@ ; ELF64: lis ; ELF64: ori ; ELF64: stdx +; ELF64VSX: addis +; ELF64VSX: ld +; ELF64VSX: addi +; ELF64VSX: lis +; ELF64VSX: ori +; ELF64VSX: stdx ret void } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-ret.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-ret.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fast-isel-ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s --check-prefix=ELF64 +; RUN: llc < %s -O0 -verify-machineinstrs -fast-isel-abort -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s --check-prefix=ELF64 define zeroext i1 @rettrue() nounwind uwtable ssp { entry: Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fcpsgn.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fcpsgn.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fcpsgn.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck -check-prefix=NOVSX %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -7,9 +8,12 @@ %call = tail call double @copysign(double %a, double %b) #0 ret double %call -; CHECK-LABEL: @foo_dd -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_dd +; NOVSX: fcpsgn 1, 2, 1 +; NOVSX: blr +; VSX-LABEL: @foo_dd +; VSX: xscpsgndp 1, 2, 1 +; VSX: blr } declare double @copysign(double, double) #0 @@ -19,9 +23,12 @@ %call = tail call float @copysignf(float %a, float %b) #0 ret float %call -; CHECK-LABEL: @foo_ss -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_ss +; NOVSX: fcpsgn 1, 2, 1 +; NOVSX: blr +; VSX-LABEL: @foo_ss +; VSX: fcpsgn 1, 2, 1 +; VSX: blr } declare float @copysignf(float, float) #0 @@ -32,9 +39,12 @@ %call = tail call float @copysignf(float %a, float %conv) #0 ret float %call -; CHECK-LABEL: @foo_sd -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_sd +; NOVSX: fcpsgn 1, 2, 1 +; NOVSX: blr +; VSX-LABEL: @foo_sd +; VSX: fcpsgn 1, 2, 1 +; VSX: blr } define double @foo_ds(double %a, float %b) #0 { @@ -43,9 +53,12 @@ %call = tail call double @copysign(double %a, double %conv) #0 ret double %call -; CHECK-LABEL: @foo_ds -; CHECK: fcpsgn 1, 2, 1 -; CHECK: blr +; NOVSX-LABEL: @foo_ds +; NOVSX: fcpsgn 1, 2, 1 +; NOVSX: blr +; VSX-LABEL: @foo_ds +; VSX: fcpsgn 1, 2, 1 +; VSX: blr } attributes #0 = { nounwind readnone } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fma.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fma.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fma.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mattr=-vsx -march=ppc32 -fp-contract=fast | FileCheck %s declare double @dummy1(double) #0 declare double @dummy2(double, double) #0 Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fnabs.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fnabs.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fnabs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | grep fnabs +; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fnabs declare double @fabs(double) Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp-branch.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp-branch.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp-branch.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | grep fcmp | count 1 +; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fcmp | count 1 declare i1 @llvm.isunordered.f64(double, double) Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp_to_uint.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp_to_uint.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fp_to_uint.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1 +; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fctiwz | count 1 define i16 @foo(float %a) { entry: Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsel.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsel.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsel.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=CHECK-FM %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=-vsx | FileCheck -check-prefix=CHECK-FM %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-no-infs-fp-math -enable-no-nans-fp-math -mattr=+vsx | FileCheck -check-prefix=CHECK-FM-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -16,6 +17,10 @@ ; CHECK-FM: @zerocmp1 ; CHECK-FM: fsel 1, 1, 2, 3 ; CHECK-FM: blr + +; CHECK-FM-VSX: @zerocmp1 +; CHECK-FM-VSX: fsel 1, 1, 2, 3 +; CHECK-FM-VSX: blr } define double @zerocmp2(double %a, double %y, double %z) #0 { @@ -32,6 +37,11 @@ ; CHECK-FM: fneg [[REG:[0-9]+]], 1 ; CHECK-FM: fsel 1, [[REG]], 3, 2 ; CHECK-FM: blr + +; CHECK-FM-VSX: @zerocmp2 +; CHECK-FM-VSX: xsnegdp [[REG:[0-9]+]], 1 +; CHECK-FM-VSX: fsel 1, [[REG]], 3, 2 +; CHECK-FM-VSX: blr } define double @zerocmp3(double %a, double %y, double %z) #0 { @@ -49,6 +59,12 @@ ; CHECK-FM: fneg [[REG2:[0-9]+]], 1 ; CHECK-FM: fsel 1, [[REG2]], [[REG]], 3 ; CHECK-FM: blr + +; CHECK-FM-VSX: @zerocmp3 +; CHECK-FM-VSX: xsnegdp [[REG2:[0-9]+]], 1 +; CHECK-FM-VSX: fsel [[REG:[0-9]+]], 1, 2, 3 +; CHECK-FM-VSX: fsel 1, [[REG2]], [[REG]], 3 +; CHECK-FM-VSX: blr } define double @min1(double %a, double %b) #0 { @@ -65,6 +81,11 @@ ; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 ; CHECK-FM: fsel 1, [[REG]], 1, 2 ; CHECK-FM: blr + +; CHECK-FM-VSX: @min1 +; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1 +; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2 +; CHECK-FM-VSX: blr } define double @max1(double %a, double %b) #0 { @@ -81,6 +102,11 @@ ; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 ; CHECK-FM: fsel 1, [[REG]], 1, 2 ; CHECK-FM: blr + +; CHECK-FM-VSX: @max1 +; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-FM-VSX: fsel 1, [[REG]], 1, 2 +; CHECK-FM-VSX: blr } define double @cmp1(double %a, double %b, double %y, double %z) #0 { @@ -97,6 +123,11 @@ ; CHECK-FM: fsub [[REG:[0-9]+]], 1, 2 ; CHECK-FM: fsel 1, [[REG]], 3, 4 ; CHECK-FM: blr + +; CHECK-FM-VSX: @cmp1 +; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-FM-VSX: fsel 1, [[REG]], 3, 4 +; CHECK-FM-VSX: blr } define double @cmp2(double %a, double %b, double %y, double %z) #0 { @@ -113,6 +144,11 @@ ; CHECK-FM: fsub [[REG:[0-9]+]], 2, 1 ; CHECK-FM: fsel 1, [[REG]], 4, 3 ; CHECK-FM: blr + +; CHECK-FM-VSX: @cmp2 +; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 2, 1 +; CHECK-FM-VSX: fsel 1, [[REG]], 4, 3 +; CHECK-FM-VSX: blr } define double @cmp3(double %a, double %b, double %y, double %z) #0 { @@ -131,6 +167,13 @@ ; CHECK-FM: fneg [[REG3:[0-9]+]], [[REG]] ; CHECK-FM: fsel 1, [[REG3]], [[REG2]], 4 ; CHECK-FM: blr + +; CHECK-FM-VSX: @cmp3 +; CHECK-FM-VSX: xssubdp [[REG:[0-9]+]], 1, 2 +; CHECK-FM-VSX: xsnegdp [[REG3:[0-9]+]], [[REG]] +; CHECK-FM-VSX: fsel [[REG2:[0-9]+]], [[REG]], 3, 4 +; CHECK-FM-VSX: fsel 1, [[REG3]], [[REG2]], 4 +; CHECK-FM-VSX: blr } attributes #0 = { nounwind readnone } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsqrt.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsqrt.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/fsqrt.ll @@ -1,13 +1,13 @@ ; fsqrt should be generated when the fsqrt feature is enabled, but not ; otherwise. -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \ ; RUN: grep "fsqrt f1, f1" -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \ ; RUN: grep "fsqrt f1, f1" -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \ ; RUN: not grep "fsqrt f1, f1" -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \ ; RUN: not grep "fsqrt f1, f1" declare double @llvm.sqrt.f64(double) Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/i64_fp.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/i64_fp.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/i64_fp.ll @@ -1,21 +1,21 @@ ; fcfid and fctid should be generated when the 64bit feature is enabled, but not ; otherwise. -; RUN: llc < %s -march=ppc32 -mattr=+64bit | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+64bit | \ ; RUN: grep fcfid -; RUN: llc < %s -march=ppc32 -mattr=+64bit | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=+64bit | \ ; RUN: grep fctidz -; RUN: llc < %s -march=ppc32 -mcpu=g5 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g5 | \ ; RUN: grep fcfid -; RUN: llc < %s -march=ppc32 -mcpu=g5 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g5 | \ ; RUN: grep fctidz -; RUN: llc < %s -march=ppc32 -mattr=-64bit | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=-64bit | \ ; RUN: not grep fcfid -; RUN: llc < %s -march=ppc32 -mattr=-64bit | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mattr=-64bit | \ ; RUN: not grep fctidz -; RUN: llc < %s -march=ppc32 -mcpu=g4 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g4 | \ ; RUN: not grep fcfid -; RUN: llc < %s -march=ppc32 -mcpu=g4 | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 -mcpu=g4 | \ ; RUN: not grep fctidz define double @X(double %Y) { Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-12.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-12.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-12.ll @@ -1,4 +1,5 @@ -; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck %s +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium -mattr=-vsx < %s | FileCheck -check-prefix=NOVSX %s +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium -mattr=+vsx < %s | FileCheck -check-prefix=VSX %s ; Test peephole optimization for medium code model (32-bit TOC offsets) ; for loading a value from the constant pool (TOC-relative). @@ -11,8 +12,15 @@ ret double 0x3F4FD4920B498CF0 } -; CHECK: [[VAR:[a-z0-9A-Z_.]+]]: -; CHECK: .quad 4562098671269285104 -; CHECK-LABEL: test_double_const: -; CHECK: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha -; CHECK: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) +; NOVSX: [[VAR:[a-z0-9A-Z_.]+]]: +; NOVSX: .quad 4562098671269285104 +; NOVSX-LABEL: test_double_const: +; NOVSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; NOVSX: lfd {{[0-9]+}}, [[VAR]]@toc@l([[REG1]]) + +; VSX: [[VAR:[a-z0-9A-Z_.]+]]: +; VSX: .quad 4562098671269285104 +; VSX-LABEL: test_double_const: +; VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; VSX: addi [[REG1]], {{[0-9]+}}, [[VAR]]@toc@l +; VSX: lxsdx {{[0-9]+}}, 0, [[REG1]] Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-4.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-4.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/mcm-4.ll @@ -1,5 +1,7 @@ -; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false <%s | FileCheck -check-prefix=MEDIUM %s -; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false <%s | FileCheck -check-prefix=LARGE %s +; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=MEDIUM %s +; RUN: llc -mcpu=pwr7 -O0 -code-model=medium -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=MEDIUM-VSX %s +; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=-vsx <%s | FileCheck -check-prefix=LARGE %s +; RUN: llc -mcpu=pwr7 -O0 -code-model=large -fast-isel=false -mattr=+vsx <%s | FileCheck -check-prefix=LARGE-VSX %s ; Test correct code generation for medium and large code model ; for loading a value from the constant pool (TOC-relative). @@ -19,9 +21,23 @@ ; MEDIUM: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l ; MEDIUM: lfd {{[0-9]+}}, 0([[REG2]]) +; MEDIUM-VSX: [[VAR:[a-z0-9A-Z_.]+]]: +; MEDIUM-VSX: .quad 4562098671269285104 +; MEDIUM-VSX-LABEL: test_double_const: +; MEDIUM-VSX: addis [[REG1:[0-9]+]], 2, [[VAR]]@toc@ha +; MEDIUM-VSX: addi [[REG2:[0-9]+]], [[REG1]], [[VAR]]@toc@l +; MEDIUM-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]] + ; LARGE: [[VAR:[a-z0-9A-Z_.]+]]: ; LARGE: .quad 4562098671269285104 ; LARGE-LABEL: test_double_const: ; LARGE: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha ; LARGE: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]]) ; LARGE: lfd {{[0-9]+}}, 0([[REG2]]) + +; LARGE-VSX: [[VAR:[a-z0-9A-Z_.]+]]: +; LARGE-VSX: .quad 4562098671269285104 +; LARGE-VSX-LABEL: test_double_const: +; LARGE-VSX: addis [[REG1:[0-9]+]], 2, [[VAR2:[a-z0-9A-Z_.]+]]@toc@ha +; LARGE-VSX: ld [[REG2:[0-9]+]], [[VAR2]]@toc@l([[REG1]]) +; LARGE-VSX: lxsdx {{[0-9]+}}, 0, [[REG2]] Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/ppc64-align-long-double.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/ppc64-align-long-double.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/ppc64-align-long-double.ll @@ -1,4 +1,5 @@ -; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false -mattr=-vsx < %s | FileCheck %s +; RUN: llc -mcpu=pwr7 -O0 -fast-isel=false -mattr=+vsx < %s | FileCheck -check-prefix=VSX %s ; Verify internal alignment of long double in a struct. The double ; argument comes in in GPR3; GPR4 is skipped; GPRs 5 and 6 contain @@ -24,3 +25,12 @@ ; CHECK: lfd 1, 64(1) ; CHECK: lfd 2, 72(1) +; VSX: std 6, 72(1) +; VSX: std 5, 64(1) +; VSX: std 4, 56(1) +; VSX: std 3, 48(1) +; VSX: li 3, 16 +; VSX: addi 4, 1, 48 +; VSX: lxsdx 1, 4, 3 +; VSX: li 3, 24 +; VSX: lxsdx 2, 4, 3 Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/recipest.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/recipest.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/recipest.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=+vsx | FileCheck -check-prefix=CHECK-VSX %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=CHECK-SAFE-VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -24,10 +26,27 @@ ; CHECK: fmul ; CHECK: blr +; CHECK-VSX: @foo +; CHECK-DAG-VSX: xsrsqrtedp +; CHECK-DAG-VSX: xsnmsubadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmaddmdp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmuldp +; CHECK-VSX: blr + ; CHECK-SAFE: @foo ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @foo +; CHECK-SAFE-VSX: xssqrtdp +; CHECK-SAFE-VSX: xsdivdp +; CHECK-SAFE-VSX: blr } define double @foof(double %a, float %b) nounwind { @@ -45,10 +64,24 @@ ; CHECK-NEXT: fmul ; CHECK-NEXT: blr +; CHECK-VSX: @foof +; CHECK-DAG-VSX: frsqrtes +; CHECK-DAG-VSX: fnmsubs +; CHECK-VSX: fmuls +; CHECK-VSX: fmadds +; CHECK-VSX: fmuls +; CHECK-VSX: xsmuldp +; CHECK-VSX: blr + ; CHECK-SAFE: @foof ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @foof +; CHECK-SAFE-VSX: fsqrts +; CHECK-SAFE-VSX: xsdivdp +; CHECK-SAFE-VSX: blr } define float @food(float %a, double %b) nounwind { @@ -57,7 +90,7 @@ %r = fdiv float %a, %y ret float %r -; CHECK: @foo +; CHECK: @food ; CHECK-DAG: frsqrte ; CHECK-DAG: fnmsub ; CHECK: fmul @@ -70,10 +103,28 @@ ; CHECK-NEXT: fmuls ; CHECK-NEXT: blr -; CHECK-SAFE: @foo +; CHECK-VSX: @food +; CHECK-DAG-VSX: xsrsqrtedp +; CHECK-DAG-VSX: xsnmsubadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmaddmdp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: frsp +; CHECK-VSX: fmuls +; CHECK-VSX: blr + +; CHECK-SAFE: @food ; CHECK-SAFE: fsqrt ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @food +; CHECK-SAFE-VSX: xssqrtdp +; CHECK-SAFE-VSX: fdivs +; CHECK-SAFE-VSX: blr } define float @goo(float %a, float %b) nounwind { @@ -90,10 +141,24 @@ ; CHECK: fmuls ; CHECK: blr +; CHECK-VSX: @goo +; CHECK-DAG-VSX: frsqrtes +; CHECK-DAG-VSX: fnmsubs +; CHECK-VSX: fmuls +; CHECK-VSX: fmadds +; CHECK-VSX: fmuls +; CHECK-VSX: fmuls +; CHECK-VSX: blr + ; CHECK-SAFE: @goo ; CHECK-SAFE: fsqrts ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @goo +; CHECK-SAFE-VSX: fsqrts +; CHECK-SAFE-VSX: fdivs +; CHECK-SAFE-VSX: blr } define <4 x float> @hoo(<4 x float> %a, <4 x float> %b) nounwind { @@ -104,9 +169,17 @@ ; CHECK: @hoo ; CHECK: vrsqrtefp +; CHECK-VSX: @hoo +; CHECK-VSX: xvrsqrtesp + ; CHECK-SAFE: @hoo ; CHECK-SAFE-NOT: vrsqrtefp ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @hoo +; CHECK-SAFE-VSX: xvsqrtsp +; CHECK-SAFE-VSX: xvdivsp +; CHECK-SAFE-VSX: blr } define double @foo2(double %a, double %b) nounwind { @@ -122,9 +195,22 @@ ; CHECK: fmul ; CHECK: blr +; CHECK-VSX: @foo2 +; CHECK-DAG-VSX: xsredp +; CHECK-DAG-VSX: xsnmsubadp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsnmsubadp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: blr + ; CHECK-SAFE: @foo2 ; CHECK-SAFE: fdiv ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @foo2 +; CHECK-SAFE-VSX: xsdivdp +; CHECK-SAFE-VSX: blr } define float @goo2(float %a, float %b) nounwind { @@ -138,9 +224,20 @@ ; CHECK: fmuls ; CHECK: blr +; CHECK-VSX: @goo2 +; CHECK-DAG-VSX: fres +; CHECK-DAG-VSX: fnmsubs +; CHECK-VSX: fmadds +; CHECK-VSX: fmuls +; CHECK-VSX: blr + ; CHECK-SAFE: @goo2 ; CHECK-SAFE: fdivs ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @goo2 +; CHECK-SAFE-VSX: fdivs +; CHECK-SAFE-VSX: blr } define <4 x float> @hoo2(<4 x float> %a, <4 x float> %b) nounwind { @@ -150,15 +247,22 @@ ; CHECK: @hoo2 ; CHECK: vrefp +; CHECK-VSX: @hoo2 +; CHECK-VSX: xvresp + ; CHECK-SAFE: @hoo2 ; CHECK-SAFE-NOT: vrefp ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @hoo2 +; CHECK-SAFEVSX: xvdivsp +; CHECK-SAFE-VSX: blr } define double @foo3(double %a) nounwind { %r = call double @llvm.sqrt.f64(double %a) ret double %r - + ; CHECK: @foo3 ; CHECK: fcmpu ; CHECK-DAG: frsqrte @@ -176,9 +280,31 @@ ; CHECK: fmadd ; CHECK: blr +; CHECK-VSX: @foo3 +; CHECK-VSX: xscmpudp +; CHECK-DAG-VSX: xsrsqrtedp +; CHECK-DAG-VSX: xsnmsubadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: fmr +; CHECK-VSX: xsmaddmdp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsmuldp +; CHECK-VSX: xsredp +; CHECK-VSX: xsnmsubadp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: xsnmsubadp +; CHECK-VSX: xsmaddadp +; CHECK-VSX: blr + ; CHECK-SAFE: @foo3 ; CHECK-SAFE: fsqrt ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @foo3 +; CHECK-SAFE-VSX: xssqrtdp +; CHECK-SAFE-VSX: blr } define float @goo3(float %a) nounwind { @@ -197,9 +323,25 @@ ; CHECK: fmadds ; CHECK: blr +; CHECK-VSX: @goo3 +; CHECK-VSX: fcmpu +; CHECK-DAG-VSX: frsqrtes +; CHECK-DAG-VSX: fnmsubs +; CHECK-VSX: fmuls +; CHECK-VSX: fmadds +; CHECK-VSX: fmuls +; CHECK-VSX: fres +; CHECK-VSX: fnmsubs +; CHECK-VSX: fmadds +; CHECK-VSX: blr + ; CHECK-SAFE: @goo3 ; CHECK-SAFE: fsqrts ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @goo3 +; CHECK-SAFE-VSX: fsqrts +; CHECK-SAFE-VSX: blr } define <4 x float> @hoo3(<4 x float> %a) nounwind { @@ -211,8 +353,17 @@ ; CHECK-DAG: vrefp ; CHECK-DAG: vcmpeqfp +; CHECK-VSX: @hoo3 +; CHECK-VSX: xvrsqrtesp +; CHECK-DAG-VSX: xvresp +; CHECK-DAG-VSX: xvcmpeqsp + ; CHECK-SAFE: @hoo3 ; CHECK-SAFE-NOT: vrsqrtefp ; CHECK-SAFE: blr + +; CHECK-SAFE-VSX: @hoo3 +; CHECK-SAFE-VSX: xvsqrtsp +; CHECK-SAFE-VSX: blr } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/rounding-ops.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/rounding-ops.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/rounding-ops.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx < %s | FileCheck -check-prefix=NOVSX %s +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx < %s | FileCheck -check-prefix=VSX %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -6,8 +7,10 @@ %call = tail call float @floorf(float %x) nounwind readnone ret float %call -; CHECK-LABEL: test1: -; CHECK: frim 1, 1 +; NOVSX-LABEL: test1: +; NOVSX: frim 1, 1 +; VSX-LABEL: test1: +; VSX: frim 1, 1 } declare float @floorf(float) nounwind readnone @@ -16,8 +19,10 @@ %call = tail call double @floor(double %x) nounwind readnone ret double %call -; CHECK-LABEL: test2: -; CHECK: frim 1, 1 +; NOVSX-LABEL: test2: +; NOVSX: frim 1, 1 +; VSX-LABEL: test2: +; VSX: xsrdpim 1, 1 } declare double @floor(double) nounwind readnone @@ -26,8 +31,10 @@ %call = tail call float @roundf(float %x) nounwind readnone ret float %call -; CHECK-LABEL: test3: -; CHECK: frin 1, 1 +; NOVSX-LABEL: test3: +; NOVSX: frin 1, 1 +; VSX-LABEL: test3: +; VSX: frin 1, 1 } declare float @roundf(float) nounwind readnone @@ -36,8 +43,10 @@ %call = tail call double @round(double %x) nounwind readnone ret double %call -; CHECK-LABEL: test4: -; CHECK: frin 1, 1 +; NOVSX-LABEL: test4: +; NOVSX: frin 1, 1 +; VSX-LABEL: test4: +; VSX: xsrdpi 1, 1 } declare double @round(double) nounwind readnone @@ -46,8 +55,10 @@ %call = tail call float @ceilf(float %x) nounwind readnone ret float %call -; CHECK-LABEL: test5: -; CHECK: frip 1, 1 +; NOVSX-LABEL: test5: +; NOVSX: frip 1, 1 +; VSX-LABEL: test5: +; VSX: frip 1, 1 } declare float @ceilf(float) nounwind readnone @@ -56,8 +67,10 @@ %call = tail call double @ceil(double %x) nounwind readnone ret double %call -; CHECK-LABEL: test6: -; CHECK: frip 1, 1 +; NOVSX-LABEL: test6: +; NOVSX: frip 1, 1 +; VSX-LABEL: test6: +; VSX: xsrdpip 1, 1 } declare double @ceil(double) nounwind readnone @@ -66,8 +79,10 @@ %call = tail call float @truncf(float %x) nounwind readnone ret float %call -; CHECK-LABEL: test9: -; CHECK: friz 1, 1 +; NOVSX-LABEL: test9: +; NOVSX: friz 1, 1 +; VSX-LABEL: test9: +; VSX: friz 1, 1 } declare float @truncf(float) nounwind readnone @@ -76,8 +91,10 @@ %call = tail call double @trunc(double %x) nounwind readnone ret double %call -; CHECK-LABEL: test10: -; CHECK: friz 1, 1 +; NOVSX-LABEL: test10: +; NOVSX: friz 1, 1 +; VSX-LABEL: test10: +; VSX: xsrdpiz 1, 1 } declare double @trunc(double) nounwind readnone Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/select-cc.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/select-cc.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/select-cc.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 +; RUN: llc < %s -mattr=-vsx -march=ppc32 ; PR3011 define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind { Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unaligned.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unaligned.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unaligned.ll @@ -1,4 +1,6 @@ -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck %s +target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -check-prefix=VSX %s target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32" define void @foo1(i16* %p, i16* %r) nounwind { @@ -10,6 +12,10 @@ ; CHECK: @foo1 ; CHECK: lhz ; CHECK: sth + +; VSX: @foo1 +; VSX: lhz +; VSX: sth } define void @foo2(i32* %p, i32* %r) nounwind { @@ -21,6 +27,10 @@ ; CHECK: @foo2 ; CHECK: lwz ; CHECK: stw + +; VSX: @foo2 +; VSX: lwz +; VSX: stw } define void @foo3(i64* %p, i64* %r) nounwind { @@ -32,6 +42,10 @@ ; CHECK: @foo3 ; CHECK: ld ; CHECK: std + +; VSX: @foo3 +; VSX: ld +; VSX: std } define void @foo4(float* %p, float* %r) nounwind { @@ -43,6 +57,10 @@ ; CHECK: @foo4 ; CHECK: lfs ; CHECK: stfs + +; VSX: @foo4 +; VSX: lfs +; VSX: stfs } define void @foo5(double* %p, double* %r) nounwind { @@ -54,6 +72,10 @@ ; CHECK: @foo5 ; CHECK: lfd ; CHECK: stfd + +; VSX: @foo5 +; VSX: lxsdx +; VSX: stxsdx } define void @foo6(<4 x float>* %p, <4 x float>* %r) nounwind { @@ -69,5 +91,11 @@ ; CHECK-DAG: ld ; CHECK-DAG: stdx ; CHECK: stdx + +; VSX: @foo6 +; VSX-DAG: ld +; VSX-DAG: ld +; VSX-DAG: stdx +; VSX: stdx } Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unsafe-math.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unsafe-math.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/unsafe-math.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=ppc32 | grep fmul | count 2 -; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \ +; RUN: llc < %s -mattr=-vsx -march=ppc32 | grep fmul | count 2 +; RUN: llc < %s -mattr=-vsx -march=ppc32 -enable-unsafe-fp-math | \ ; RUN: grep fmul | count 1 define double @foo(double %X) nounwind { Index: /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/vec_mul.ll =================================================================== --- /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/vec_mul.ll +++ /home/seurer/llvm/llvm-test/test/CodeGen/PowerPC/vec_mul.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s -; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec | FileCheck %s -check-prefix=CHECK-LE +; RUN: llc < %s -mattr=-vsx -mtriple=powerpc-unknown-linux-gnu -march=ppc32 -mattr=+altivec -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -march=ppc64 -mattr=+altivec -mattr=-vsx | FileCheck %s -check-prefix=CHECK-LE define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) { %tmp = load <4 x i32>* %X ; <<4 x i32>> [#uses=1]