Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -3145,6 +3145,8 @@ .. option:: -mdirect-move, -mno-direct-move +.. option:: -mefpu2 + .. option:: -mfloat128, -mno-float128 .. option:: -mfprnd, -mno-fprnd Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2600,6 +2600,7 @@ def mno_pcrel: Flag<["-"], "mno-pcrel">, Group; def mspe : Flag<["-"], "mspe">, Group; def mno_spe : Flag<["-"], "mno-spe">, Group; +def mefpu2 : Flag<["-"], "mefpu2">, Group; def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group, Flags<[CC1Option]>, HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile and nonvolatile vector registers">; def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group, Flags<[CC1Option]>, Index: clang/lib/Basic/Targets/PPC.cpp =================================================================== --- clang/lib/Basic/Targets/PPC.cpp +++ clang/lib/Basic/Targets/PPC.cpp @@ -56,7 +56,7 @@ HasP10Vector = true; } else if (Feature == "+pcrelative-memops") { HasPCRelativeMemops = true; - } else if (Feature == "+spe") { + } else if (Feature == "+spe" || Feature == "+efpu2") { HasSPE = true; LongDoubleWidth = LongDoubleAlign = 64; LongDoubleFormat = &llvm::APFloat::IEEEdouble(); @@ -401,6 +401,8 @@ void PPCTargetInfo::setFeatureEnabled(llvm::StringMap &Features, StringRef Name, bool Enabled) const { if (Enabled) { + if (Name == "efpu2") + Features["spe"] = true; // If we're enabling any of the vsx based features then enable vsx and // altivec. We'll diagnose any problems later. bool FeatureHasVSX = llvm::StringSwitch(Name) @@ -424,6 +426,8 @@ else Features[Name] = true; } else { + if (Name == "spe") + Features["efpu2"] = false; // If we're disabling altivec or vsx go ahead and disable all of the vsx // features. if ((Name == "altivec") || (Name == "vsx")) Index: clang/test/Driver/ppc-features.cpp =================================================================== --- clang/test/Driver/ppc-features.cpp +++ clang/test/Driver/ppc-features.cpp @@ -147,6 +147,9 @@ // CHECK-SPE: "-target-feature" "+spe" // CHECK-NOSPE: "-target-feature" "-spe" +// RUN: %clang -target powerpc %s -mefpu2 -c -### 2>&1 | FileCheck -check-prefix=CHECK-EFPU2 %s +// CHECK-EFPU2: "-target-feature" "+efpu2" + // Assembler features // RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -o %t.o -no-integrated-as 2>&1 | FileCheck -check-prefix=CHECK_BE_AS_ARGS %s // CHECK_BE_AS_ARGS: "-mppc64" Index: llvm/lib/Target/PowerPC/PPC.td =================================================================== --- llvm/lib/Target/PowerPC/PPC.td +++ llvm/lib/Target/PowerPC/PPC.td @@ -72,6 +72,9 @@ def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", "Enable SPE instructions", [FeatureHardFloat]>; +def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", + "Enable Embedded Floating-Point APU 2 instructions", + [FeatureSPE]>; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -151,7 +151,9 @@ if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::GPRCRegClass); - addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + // EFPU2 APU only supports f32 + if (!Subtarget.hasEFPU2()) + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); Index: llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -100,6 +100,7 @@ bool HasAltivec; bool HasFPU; bool HasSPE; + bool HasEFPU2; bool HasVSX; bool NeedsTwoConstNR; bool HasP8Vector; @@ -260,6 +261,7 @@ bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } + bool hasEFPU2() const { return HasEFPU2; } bool hasFPU() const { return HasFPU; } bool hasVSX() const { return HasVSX; } bool needsTwoConstNR() const { return NeedsTwoConstNR; } Index: llvm/lib/Target/PowerPC/PPCSubtarget.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -77,6 +77,7 @@ HasHardFloat = false; HasAltivec = false; HasSPE = false; + HasEFPU2 = false; HasFPU = false; HasVSX = false; NeedsTwoConstNR = false; Index: llvm/test/CodeGen/PowerPC/spe.ll =================================================================== --- llvm/test/CodeGen/PowerPC/spe.ll +++ llvm/test/CodeGen/PowerPC/spe.ll @@ -1,6 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ -; RUN: -mattr=+spe | FileCheck %s +; RUN: split-file %s %t +; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/single.ll +; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/double.ll -check-prefix=SPE +; RUN: llc -verify-machineinstrs < %t/hwdouble.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+spe | FileCheck %t/hwdouble.ll -check-prefix=SPE +; RUN: llc -verify-machineinstrs < %t/single.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+efpu2 | FileCheck %t/single.ll +; RUN: llc -verify-machineinstrs < %t/double.ll -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+efpu2 | FileCheck %t/double.ll -check-prefix=EFPU2 + +;--- single.ll +; single tests (identical for -mattr=+spe and -mattr=+efpu2) declare float @llvm.fabs.float(float) define float @test_float_abs(float %a) #0 { @@ -75,30 +87,19 @@ ret float %v } -define float @test_dtos(double %a) { -; CHECK-LABEL: test_dtos: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efscfd 3, 3 -; CHECK-NEXT: blr - entry: - %v = fptrunc double %a to float - ret float %v -} - define i32 @test_fcmpgt(float %a, float %b) { ; CHECK-LABEL: test_fcmpgt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB8_2 +; CHECK-NEXT: ble 0, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB8_3 -; CHECK-NEXT: .LBB8_2: # %fa +; CHECK-NEXT: b .LBB7_3 +; CHECK-NEXT: .LBB7_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB8_3: # %ret +; CHECK-NEXT: .LBB7_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -124,19 +125,19 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: bc 4, 1, .LBB8_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: bc 4, 1, .LBB8_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB9_4 +; CHECK-NEXT: bc 12, 1, .LBB8_4 ; CHECK-NEXT: # %bb.3: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB9_5 -; CHECK-NEXT: .LBB9_4: # %tr +; CHECK-NEXT: b .LBB8_5 +; CHECK-NEXT: .LBB8_4: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB9_5: # %ret +; CHECK-NEXT: .LBB8_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -162,19 +163,19 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: bc 4, 1, .LBB9_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: bc 4, 1, .LBB9_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB10_4 +; CHECK-NEXT: bc 12, 1, .LBB9_4 ; CHECK-NEXT: # %bb.3: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB10_5 -; CHECK-NEXT: .LBB10_4: # %fa +; CHECK-NEXT: b .LBB9_5 +; CHECK-NEXT: .LBB9_4: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB10_5: # %ret +; CHECK-NEXT: .LBB9_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -200,13 +201,13 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpgt 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB11_2 +; CHECK-NEXT: bgt 0, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB11_3 -; CHECK-NEXT: .LBB11_2: # %fa +; CHECK-NEXT: b .LBB10_3 +; CHECK-NEXT: .LBB10_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB11_3: # %ret +; CHECK-NEXT: .LBB10_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -233,13 +234,13 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB12_2 +; CHECK-NEXT: ble 0, .LBB11_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB12_3 -; CHECK-NEXT: .LBB12_2: # %fa +; CHECK-NEXT: b .LBB11_3 +; CHECK-NEXT: .LBB11_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB12_3: # %ret +; CHECK-NEXT: .LBB11_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -267,11 +268,11 @@ ; CHECK-NEXT: efscmpeq 1, 4, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB13_2 +; CHECK-NEXT: bc 12, 20, .LBB12_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB13_2: # %entry +; CHECK-NEXT: .LBB12_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -286,11 +287,11 @@ ; CHECK-NEXT: efscmpeq 1, 3, 3 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB14_2 +; CHECK-NEXT: bc 12, 20, .LBB13_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB14_2: # %entry +; CHECK-NEXT: .LBB13_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -307,11 +308,11 @@ ; CHECK-NEXT: efscmpeq 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnor 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB15_2 +; CHECK-NEXT: bc 12, 20, .LBB14_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB15_2: # %entry +; CHECK-NEXT: .LBB14_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -328,11 +329,11 @@ ; CHECK-NEXT: efscmpeq 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crorc 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB16_2 +; CHECK-NEXT: bc 12, 20, .LBB15_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB16_2: # %entry +; CHECK-NEXT: .LBB15_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -346,13 +347,13 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB17_2 +; CHECK-NEXT: bgt 0, .LBB16_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB17_3 -; CHECK-NEXT: .LBB17_2: # %fa +; CHECK-NEXT: b .LBB16_3 +; CHECK-NEXT: .LBB16_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB17_3: # %ret +; CHECK-NEXT: .LBB16_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -378,13 +379,13 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: ble 0, .LBB18_2 +; CHECK-NEXT: ble 0, .LBB17_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB18_3 -; CHECK-NEXT: .LBB18_2: # %fa +; CHECK-NEXT: b .LBB17_3 +; CHECK-NEXT: .LBB17_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB18_3: # %ret +; CHECK-NEXT: .LBB17_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -413,11 +414,11 @@ ; CHECK-NEXT: efscmplt 0, 3, 4 ; CHECK-NEXT: li 5, 1 ; CHECK-NEXT: crnor 20, 1, 20 -; CHECK-NEXT: bc 12, 20, .LBB19_2 +; CHECK-NEXT: bc 12, 20, .LBB18_2 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: ori 3, 5, 0 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB19_2: # %entry +; CHECK-NEXT: .LBB18_2: # %entry ; CHECK-NEXT: li 3, 0 ; CHECK-NEXT: blr entry: @@ -431,19 +432,19 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: bc 4, 1, .LBB19_4 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: efscmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: bc 4, 1, .LBB19_4 ; CHECK-NEXT: # %bb.2: # %entry ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB20_4 +; CHECK-NEXT: bc 12, 1, .LBB19_4 ; CHECK-NEXT: # %bb.3: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB20_5 -; CHECK-NEXT: .LBB20_4: # %fa +; CHECK-NEXT: b .LBB19_5 +; CHECK-NEXT: .LBB19_4: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB20_5: # %ret +; CHECK-NEXT: .LBB19_5: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -469,13 +470,13 @@ ; CHECK-NEXT: stwu 1, -16(1) ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: efscmplt 0, 3, 4 -; CHECK-NEXT: bgt 0, .LBB21_2 +; CHECK-NEXT: bgt 0, .LBB20_2 ; CHECK-NEXT: # %bb.1: # %tr ; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB21_3 -; CHECK-NEXT: .LBB21_2: # %fa +; CHECK-NEXT: b .LBB20_3 +; CHECK-NEXT: .LBB20_2: # %fa ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB21_3: # %ret +; CHECK-NEXT: .LBB20_3: # %ret ; CHECK-NEXT: stw 3, 12(1) ; CHECK-NEXT: lwz 3, 12(1) ; CHECK-NEXT: addi 1, 1, 16 @@ -554,15 +555,48 @@ ; Check that it's not loading a double } +;--- double.ll ; Double tests +; results depend on -mattr=+spe or -mattr=+efpu2 + +define float @test_dtos(double %a) { +; SPE-LABEL: test_dtos: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efscfd 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtos: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -16(1) +; EFPU2-NEXT: .cfi_def_cfa_offset 16 +; EFPU2-NEXT: .cfi_offset lr, 4 +; EFPU2-NEXT: bl __truncdfsf2 +; EFPU2-NEXT: lwz 0, 20(1) +; EFPU2-NEXT: addi 1, 1, 16 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr + entry: + %v = fptrunc double %a to float + ret float %v +} define void @test_double_abs(double * %aa) #0 { -; CHECK-LABEL: test_double_abs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evldd 4, 0(3) -; CHECK-NEXT: efdabs 4, 4 -; CHECK-NEXT: evstdd 4, 0(3) -; CHECK-NEXT: blr +; SPE-LABEL: test_double_abs: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evldd 4, 0(3) +; SPE-NEXT: efdabs 4, 4 +; SPE-NEXT: evstdd 4, 0(3) +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_double_abs: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: lwz 4, 0(3) +; EFPU2-NEXT: clrlwi 4, 4, 1 +; EFPU2-NEXT: stw 4, 0(3) +; EFPU2-NEXT: blr entry: %0 = load double, double * %aa %1 = tail call double @llvm.fabs.f64(double %0) #2 @@ -574,12 +608,19 @@ declare double @llvm.fabs.f64(double) #1 define void @test_dnabs(double * %aa) #0 { -; CHECK-LABEL: test_dnabs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evldd 4, 0(3) -; CHECK-NEXT: efdnabs 4, 4 -; CHECK-NEXT: evstdd 4, 0(3) -; CHECK-NEXT: blr +; SPE-LABEL: test_dnabs: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evldd 4, 0(3) +; SPE-NEXT: efdnabs 4, 4 +; SPE-NEXT: evstdd 4, 0(3) +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dnabs: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: lwz 4, 0(3) +; EFPU2-NEXT: oris 4, 4, 32768 +; EFPU2-NEXT: stw 4, 0(3) +; EFPU2-NEXT: blr entry: %0 = load double, double * %aa %1 = tail call double @llvm.fabs.f64(double %0) #2 @@ -589,88 +630,107 @@ } define double @test_ddiv(double %a, double %b) { -; CHECK-LABEL: test_ddiv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efddiv 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_ddiv: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efddiv 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_ddiv: +; EFPU2: bl __divdf3 entry: %v = fdiv double %a, %b ret double %v - } define double @test_dmul(double %a, double %b) { -; CHECK-LABEL: test_dmul: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdmul 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dmul: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdmul 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dmul: +; EFPU2: bl __muldf3 entry: %v = fmul double %a, %b ret double %v } define double @test_dadd(double %a, double %b) { -; CHECK-LABEL: test_dadd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdadd 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dadd: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdadd 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dadd: +; EFPU2: bl __adddf3 entry: %v = fadd double %a, %b ret double %v } define double @test_dsub(double %a, double %b) { -; CHECK-LABEL: test_dsub: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdsub 4, 3, 5 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dsub: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdsub 4, 3, 5 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dsub: +; EFPU2: bl __subdf3 entry: %v = fsub double %a, %b ret double %v } define double @test_dneg(double %a) { -; CHECK-LABEL: test_dneg: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdneg 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dneg: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdneg 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dneg: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: xoris 3, 3, 32768 +; EFPU2-NEXT: blr entry: %v = fsub double -0.0, %a ret double %v } define double @test_stod(float %a) { -; CHECK-LABEL: test_stod: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfs 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_stod: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfs 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_stod: +; EFPU2: bl __extendsfdf2 entry: %v = fpext float %a to double ret double %v @@ -678,66 +738,75 @@ ; (un)ordered tests are expanded to une and oeq so verify define i1 @test_dcmpuno(double %a, double %b) { -; CHECK-LABEL: test_dcmpuno: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: efdcmpeq 1, 5, 5 -; CHECK-NEXT: crand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB35_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB35_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpuno: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: efdcmpeq 1, 5, 5 +; SPE-NEXT: crand 20, 5, 1 +; SPE-NEXT: bc 12, 20, .LBB9_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB9_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpuno: +; EFPU2: bl __unorddf2 entry: %r = fcmp uno double %a, %b ret i1 %r } define i1 @test_dcmpord(double %a, double %b) { -; CHECK-LABEL: test_dcmpord: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: crnand 20, 5, 1 -; CHECK-NEXT: bc 12, 20, .LBB36_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB36_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpord: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: crnand 20, 5, 1 +; SPE-NEXT: bc 12, 20, .LBB10_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB10_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpord: +; EFPU2: bl __unorddf2 entry: %r = fcmp ord double %a, %b ret i1 %r } define i32 @test_dcmpgt(double %a, double %b) { -; CHECK-LABEL: test_dcmpgt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB37_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB37_3 -; CHECK-NEXT: .LBB37_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB37_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpgt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: ble 0, .LBB11_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB11_3 +; SPE-NEXT: .LBB11_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB11_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpgt: +; EFPU2: bl __gtdf2 entry: %r = alloca i32, align 4 %c = fcmp ogt double %a, %b @@ -754,30 +823,33 @@ } define i32 @test_dcmpugt(double %a, double %b) { -; CHECK-LABEL: test_dcmpugt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB38_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB38_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmpgt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB38_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB38_5 -; CHECK-NEXT: .LBB38_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB38_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpugt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB12_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB12_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmpgt 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB12_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB12_5 +; SPE-NEXT: .LBB12_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB12_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpugt: +; EFPU2: bl __ledf2 entry: %r = alloca i32, align 4 %c = fcmp ugt double %a, %b @@ -794,24 +866,27 @@ } define i32 @test_dcmple(double %a, double %b) { -; CHECK-LABEL: test_dcmple: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB39_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB39_3 -; CHECK-NEXT: .LBB39_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB39_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmple: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB13_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB13_3 +; SPE-NEXT: .LBB13_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB13_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmple: +; EFPU2: bl __gtdf2 entry: %r = alloca i32, align 4 %c = fcmp ule double %a, %b @@ -828,24 +903,27 @@ } define i32 @test_dcmpule(double %a, double %b) { -; CHECK-LABEL: test_dcmpule: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpgt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB40_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB40_3 -; CHECK-NEXT: .LBB40_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB40_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpule: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpgt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB14_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB14_3 +; SPE-NEXT: .LBB14_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB14_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpule: +; EFPU2: bl __gtdf2 entry: %r = alloca i32, align 4 %c = fcmp ule double %a, %b @@ -863,24 +941,27 @@ ; The type of comparison found in C's if (x == y) define i32 @test_dcmpeq(double %a, double %b) { -; CHECK-LABEL: test_dcmpeq: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpeq 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB41_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB41_3 -; CHECK-NEXT: .LBB41_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB41_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpeq: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpeq 0, 3, 5 +; SPE-NEXT: ble 0, .LBB15_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB15_3 +; SPE-NEXT: .LBB15_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB15_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpeq: +; EFPU2: bl __nedf2 entry: %r = alloca i32, align 4 %c = fcmp oeq double %a, %b @@ -897,30 +978,34 @@ } define i32 @test_dcmpueq(double %a, double %b) { -; CHECK-LABEL: test_dcmpueq: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB42_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB42_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB42_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB42_5 -; CHECK-NEXT: .LBB42_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB42_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpueq: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB16_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB16_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB16_5 +; SPE-NEXT: .LBB16_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB16_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpueq: +; EFPU2: bl __eqdf2 +; EFPU2: bl __unorddf2 entry: %r = alloca i32, align 4 %c = fcmp ueq double %a, %b @@ -937,47 +1022,54 @@ } define i1 @test_dcmpne(double %a, double %b) { -; CHECK-LABEL: test_dcmpne: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: efdcmpeq 5, 3, 4 -; CHECK-NEXT: crand 24, 5, 1 -; CHECK-NEXT: crorc 20, 21, 24 -; CHECK-NEXT: bc 12, 20, .LBB43_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB43_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpne: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: efdcmpeq 5, 3, 4 +; SPE-NEXT: crand 24, 5, 1 +; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: bc 12, 20, .LBB17_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB17_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpne: +; EFPU2: bl __unorddf2 +; EFPU2: bl __eqdf2 entry: %r = fcmp one double %a, %b ret i1 %r } define i32 @test_dcmpune(double %a, double %b) { -; CHECK-LABEL: test_dcmpune: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmpeq 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB44_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB44_3 -; CHECK-NEXT: .LBB44_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB44_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpune: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmpeq 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB18_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB18_3 +; SPE-NEXT: .LBB18_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB18_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpune: +; EFPU2: bl __eqdf2 entry: %r = alloca i32, align 4 %c = fcmp une double %a, %b @@ -994,24 +1086,27 @@ } define i32 @test_dcmplt(double %a, double %b) { -; CHECK-LABEL: test_dcmplt: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmplt 0, 3, 5 -; CHECK-NEXT: ble 0, .LBB45_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB45_3 -; CHECK-NEXT: .LBB45_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB45_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmplt: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: ble 0, .LBB19_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB19_3 +; SPE-NEXT: .LBB19_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB19_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmplt: +; EFPU2: bl __ltdf2 entry: %r = alloca i32, align 4 %c = fcmp olt double %a, %b @@ -1028,30 +1123,33 @@ } define i32 @test_dcmpult(double %a, double %b) { -; CHECK-LABEL: test_dcmpult: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: bc 4, 1, .LBB46_4 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: efdcmpeq 0, 3, 3 -; CHECK-NEXT: bc 4, 1, .LBB46_4 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: efdcmplt 0, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB46_4 -; CHECK-NEXT: # %bb.3: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: b .LBB46_5 -; CHECK-NEXT: .LBB46_4: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: .LBB46_5: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpult: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: bc 4, 1, .LBB20_4 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: efdcmpeq 0, 3, 3 +; SPE-NEXT: bc 4, 1, .LBB20_4 +; SPE-NEXT: # %bb.2: # %entry +; SPE-NEXT: efdcmplt 0, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB20_4 +; SPE-NEXT: # %bb.3: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: b .LBB20_5 +; SPE-NEXT: .LBB20_4: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: .LBB20_5: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpult: +; EFPU2: bl __gedf2 entry: %r = alloca i32, align 4 %c = fcmp ult double %a, %b @@ -1068,47 +1166,53 @@ } define i1 @test_dcmpge(double %a, double %b) { -; CHECK-LABEL: test_dcmpge: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evmergelo 4, 5, 6 -; CHECK-NEXT: li 7, 1 -; CHECK-NEXT: efdcmpeq 0, 4, 4 -; CHECK-NEXT: efdcmpeq 1, 3, 3 -; CHECK-NEXT: efdcmplt 5, 3, 4 -; CHECK-NEXT: crand 24, 5, 1 -; CHECK-NEXT: crorc 20, 21, 24 -; CHECK-NEXT: bc 12, 20, .LBB47_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: ori 3, 7, 0 -; CHECK-NEXT: blr -; CHECK-NEXT: .LBB47_2: # %entry -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpge: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evmergelo 4, 5, 6 +; SPE-NEXT: li 7, 1 +; SPE-NEXT: efdcmpeq 0, 4, 4 +; SPE-NEXT: efdcmpeq 1, 3, 3 +; SPE-NEXT: efdcmplt 5, 3, 4 +; SPE-NEXT: crand 24, 5, 1 +; SPE-NEXT: crorc 20, 21, 24 +; SPE-NEXT: bc 12, 20, .LBB21_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: ori 3, 7, 0 +; SPE-NEXT: blr +; SPE-NEXT: .LBB21_2: # %entry +; SPE-NEXT: li 3, 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpge: +; EFPU2: bl __gedf2 entry: %r = fcmp oge double %a, %b ret i1 %r } define i32 @test_dcmpuge(double %a, double %b) { -; CHECK-LABEL: test_dcmpuge: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdcmplt 0, 3, 5 -; CHECK-NEXT: bgt 0, .LBB48_2 -; CHECK-NEXT: # %bb.1: # %tr -; CHECK-NEXT: li 3, 1 -; CHECK-NEXT: b .LBB48_3 -; CHECK-NEXT: .LBB48_2: # %fa -; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: .LBB48_3: # %ret -; CHECK-NEXT: stw 3, 12(1) -; CHECK-NEXT: lwz 3, 12(1) -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr +; SPE-LABEL: test_dcmpuge: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdcmplt 0, 3, 5 +; SPE-NEXT: bgt 0, .LBB22_2 +; SPE-NEXT: # %bb.1: # %tr +; SPE-NEXT: li 3, 1 +; SPE-NEXT: b .LBB22_3 +; SPE-NEXT: .LBB22_2: # %fa +; SPE-NEXT: li 3, 0 +; SPE-NEXT: .LBB22_3: # %ret +; SPE-NEXT: stw 3, 12(1) +; SPE-NEXT: lwz 3, 12(1) +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dcmpuge: +; EFPU2: bl __ltdf2 entry: %r = alloca i32, align 4 %c = fcmp uge double %a, %b @@ -1125,97 +1229,102 @@ } define double @test_dselect(double %a, double %b, i1 %c) { -; CHECK-LABEL: test_dselect: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi. 7, 7, 1 -; CHECK-NEXT: evmergelo 5, 5, 6 -; CHECK-NEXT: evmergelo 4, 3, 4 -; CHECK-NEXT: bc 12, 1, .LBB49_2 -; CHECK-NEXT: # %bb.1: # %entry -; CHECK-NEXT: evor 4, 5, 5 -; CHECK-NEXT: .LBB49_2: # %entry -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dselect: +; SPE: # %bb.0: # %entry +; SPE-NEXT: andi. 7, 7, 1 +; SPE-NEXT: evmergelo 5, 5, 6 +; SPE-NEXT: evmergelo 4, 3, 4 +; SPE-NEXT: bc 12, 1, .LBB23_2 +; SPE-NEXT: # %bb.1: # %entry +; SPE-NEXT: evor 4, 5, 5 +; SPE-NEXT: .LBB23_2: # %entry +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dselect: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: andi. 7, 7, 1 +; EFPU2-NEXT: bclr 12, 1, 0 +; EFPU2-NEXT: # %bb.1: # %entry +; EFPU2-NEXT: ori 3, 5, 0 +; EFPU2-NEXT: ori 4, 6, 0 +; EFPU2-NEXT: blr entry: %r = select i1 %c, double %a, double %b ret double %r } define i32 @test_dtoui(double %a) { -; CHECK-LABEL: test_dtoui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdctuiz 3, 3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dtoui: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdctuiz 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtoui: +; EFPU2: bl __fixunsdfsi entry: %v = fptoui double %a to i32 ret i32 %v } define i32 @test_dtosi(double %a) { -; CHECK-LABEL: test_dtosi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: efdctsiz 3, 3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dtosi: +; SPE: # %bb.0: # %entry +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: efdctsiz 3, 3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dtosi: +; EFPU2: bl __fixdfsi entry: %v = fptosi double %a to i32 ret i32 %v } define double @test_dfromui(i32 %a) { -; CHECK-LABEL: test_dfromui: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfui 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dfromui: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfui 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dfromui: +; EFPU2: bl __floatunsidf entry: %v = uitofp i32 %a to double ret double %v } define double @test_dfromsi(i32 %a) { -; CHECK-LABEL: test_dfromsi: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: efdcfsi 4, 3 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: blr +; SPE-LABEL: test_dfromsi: +; SPE: # %bb.0: # %entry +; SPE-NEXT: efdcfsi 4, 3 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_dfromsi: +; EFPU2: bl __floatsidf entry: %v = sitofp i32 %a to double ret double %v } -define i32 @test_dasmconst(double %x) { -; CHECK-LABEL: test_dasmconst: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: stwu 1, -16(1) -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: evstdd 3, 8(1) -; CHECK-NEXT: #APP -; CHECK-NEXT: efdctsi 3, 3 -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addi 1, 1, 16 -; CHECK-NEXT: blr -entry: - %x.addr = alloca double, align 8 - store double %x, double* %x.addr, align 8 - %0 = load double, double* %x.addr, align 8 - %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) - ret i32 %1 -} - declare double @test_spill_spe_regs(double, double); define dso_local void @test_func2() #0 { -; CHECK-LABEL: test_func2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: blr +; SPE-LABEL: test_func2: +; SPE: # %bb.0: # %entry +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_func2: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: blr entry: ret void } @@ -1223,120 +1332,174 @@ declare void @test_memset(i8* nocapture writeonly, i8, i32, i1) @global_var1 = global i32 0, align 4 define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind { -; CHECK-LABEL: test_spill: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -352(1) -; CHECK-NEXT: li 5, 256 -; CHECK-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill -; CHECK-NEXT: li 5, 264 -; CHECK-NEXT: evstddx 31, 1, 5 # 8-byte Folded Spill -; CHECK-NEXT: li 5, .LCPI56_0@l -; CHECK-NEXT: lis 6, .LCPI56_0@ha -; CHECK-NEXT: evlddx 5, 6, 5 -; CHECK-NEXT: stw 14, 280(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 15, 284(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 16, 288(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 17, 292(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 18, 296(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 19, 300(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 20, 304(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 21, 308(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 22, 312(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 23, 316(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 24, 320(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 25, 324(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 26, 328(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 27, 332(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 28, 336(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 29, 340(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 344(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 31, 348(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 17, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 18, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 19, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 20, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 21, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 22, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 23, 200(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 24, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 25, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 26, 224(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 27, 232(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill -; CHECK-NEXT: evmergelo 3, 3, 4 -; CHECK-NEXT: lwz 4, 360(1) -; CHECK-NEXT: efdadd 3, 3, 3 -; CHECK-NEXT: efdadd 3, 3, 5 -; CHECK-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill -; CHECK-NEXT: stw 4, 20(1) # 4-byte Folded Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: addi 3, 1, 76 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 5, 24 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: li 30, 0 -; CHECK-NEXT: bl test_memset -; CHECK-NEXT: lwz 3, 20(1) # 4-byte Folded Reload -; CHECK-NEXT: stw 30, 0(3) -; CHECK-NEXT: bl test_func2 -; CHECK-NEXT: addi 3, 1, 32 -; CHECK-NEXT: li 4, 0 -; CHECK-NEXT: li 5, 20 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: bl test_memset -; CHECK-NEXT: evldd 4, 24(1) # 8-byte Folded Reload -; CHECK-NEXT: li 5, 264 -; CHECK-NEXT: evmergehi 3, 4, 4 -; CHECK-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload -; CHECK-NEXT: li 5, 256 -; CHECK-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: # kill: def $r4 killed $r4 killed $s4 -; CHECK-NEXT: evldd 29, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 28, 240(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 27, 232(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 26, 224(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 25, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 24, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 23, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 22, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 21, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 20, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 19, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 18, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 17, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 16, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 15, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: evldd 14, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 31, 348(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 30, 344(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 340(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 28, 336(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 27, 332(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 26, 328(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 25, 324(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 24, 320(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 23, 316(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 22, 312(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 21, 308(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 20, 304(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 19, 300(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 18, 296(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 17, 292(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 16, 288(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 15, 284(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 14, 280(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 356(1) -; CHECK-NEXT: addi 1, 1, 352 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_spill: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -352(1) +; SPE-NEXT: li 5, 256 +; SPE-NEXT: evstddx 30, 1, 5 # 8-byte Folded Spill +; SPE-NEXT: li 5, 264 +; SPE-NEXT: evstddx 31, 1, 5 # 8-byte Folded Spill +; SPE-NEXT: li 5, .LCPI29_0@l +; SPE-NEXT: lis 6, .LCPI29_0@ha +; SPE-NEXT: evlddx 5, 6, 5 +; SPE-NEXT: stw 14, 280(1) # 4-byte Folded Spill +; SPE-NEXT: stw 15, 284(1) # 4-byte Folded Spill +; SPE-NEXT: stw 16, 288(1) # 4-byte Folded Spill +; SPE-NEXT: stw 17, 292(1) # 4-byte Folded Spill +; SPE-NEXT: stw 18, 296(1) # 4-byte Folded Spill +; SPE-NEXT: stw 19, 300(1) # 4-byte Folded Spill +; SPE-NEXT: stw 20, 304(1) # 4-byte Folded Spill +; SPE-NEXT: stw 21, 308(1) # 4-byte Folded Spill +; SPE-NEXT: stw 22, 312(1) # 4-byte Folded Spill +; SPE-NEXT: stw 23, 316(1) # 4-byte Folded Spill +; SPE-NEXT: stw 24, 320(1) # 4-byte Folded Spill +; SPE-NEXT: stw 25, 324(1) # 4-byte Folded Spill +; SPE-NEXT: stw 26, 328(1) # 4-byte Folded Spill +; SPE-NEXT: stw 27, 332(1) # 4-byte Folded Spill +; SPE-NEXT: stw 28, 336(1) # 4-byte Folded Spill +; SPE-NEXT: stw 29, 340(1) # 4-byte Folded Spill +; SPE-NEXT: stw 30, 344(1) # 4-byte Folded Spill +; SPE-NEXT: stw 31, 348(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 14, 128(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 15, 136(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 16, 144(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 17, 152(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 18, 160(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 19, 168(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 20, 176(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 21, 184(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 22, 192(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 23, 200(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 24, 208(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 25, 216(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 26, 224(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 27, 232(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 28, 240(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 29, 248(1) # 8-byte Folded Spill +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: lwz 4, 360(1) +; SPE-NEXT: efdadd 3, 3, 3 +; SPE-NEXT: efdadd 3, 3, 5 +; SPE-NEXT: evstdd 3, 24(1) # 8-byte Folded Spill +; SPE-NEXT: stw 4, 20(1) # 4-byte Folded Spill +; SPE-NEXT: #APP +; SPE-NEXT: #NO_APP +; SPE-NEXT: addi 3, 1, 76 +; SPE-NEXT: li 4, 0 +; SPE-NEXT: li 5, 24 +; SPE-NEXT: li 6, 1 +; SPE-NEXT: li 30, 0 +; SPE-NEXT: bl test_memset +; SPE-NEXT: lwz 3, 20(1) # 4-byte Folded Reload +; SPE-NEXT: stw 30, 0(3) +; SPE-NEXT: bl test_func2 +; SPE-NEXT: addi 3, 1, 32 +; SPE-NEXT: li 4, 0 +; SPE-NEXT: li 5, 20 +; SPE-NEXT: li 6, 1 +; SPE-NEXT: bl test_memset +; SPE-NEXT: evldd 4, 24(1) # 8-byte Folded Reload +; SPE-NEXT: li 5, 264 +; SPE-NEXT: evmergehi 3, 4, 4 +; SPE-NEXT: evlddx 31, 1, 5 # 8-byte Folded Reload +; SPE-NEXT: li 5, 256 +; SPE-NEXT: evlddx 30, 1, 5 # 8-byte Folded Reload +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: # kill: def $r4 killed $r4 killed $s4 +; SPE-NEXT: evldd 29, 248(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 28, 240(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 27, 232(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 26, 224(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 25, 216(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 24, 208(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 23, 200(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 22, 192(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 21, 184(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 20, 176(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 19, 168(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 18, 160(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 17, 152(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 16, 144(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 15, 136(1) # 8-byte Folded Reload +; SPE-NEXT: evldd 14, 128(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 31, 348(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 30, 344(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 340(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 28, 336(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 27, 332(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 26, 328(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 25, 324(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 24, 320(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 23, 316(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 22, 312(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 21, 308(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 20, 304(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 19, 300(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 18, 296(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 17, 292(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 16, 288(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 15, 284(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 14, 280(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 356(1) +; SPE-NEXT: addi 1, 1, 352 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_spill: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -176(1) +; EFPU2-NEXT: mr 5, 3 +; EFPU2-NEXT: mr 6, 4 +; EFPU2-NEXT: stw 27, 156(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 160(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 164(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 168(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 27, 104(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 28, 112(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 120(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 128(1) # 8-byte Folded Spill +; EFPU2-NEXT: lwz 28, 184(1) +; EFPU2-NEXT: bl __adddf3 +; EFPU2-NEXT: lis 5, 16393 +; EFPU2-NEXT: lis 6, -4069 +; EFPU2-NEXT: ori 5, 5, 8697 +; EFPU2-NEXT: ori 6, 6, 34414 +; EFPU2-NEXT: #APP +; EFPU2-NEXT: #NO_APP +; EFPU2-NEXT: bl __adddf3 +; EFPU2-NEXT: mr 30, 3 +; EFPU2-NEXT: mr 29, 4 +; EFPU2-NEXT: addi 3, 1, 52 +; EFPU2-NEXT: li 4, 0 +; EFPU2-NEXT: li 5, 24 +; EFPU2-NEXT: li 6, 1 +; EFPU2-NEXT: li 27, 0 +; EFPU2-NEXT: bl test_memset +; EFPU2-NEXT: stw 27, 0(28) +; EFPU2-NEXT: bl test_func2 +; EFPU2-NEXT: addi 3, 1, 8 +; EFPU2-NEXT: li 4, 0 +; EFPU2-NEXT: li 5, 20 +; EFPU2-NEXT: li 6, 1 +; EFPU2-NEXT: bl test_memset +; EFPU2-NEXT: mr 3, 30 +; EFPU2-NEXT: mr 4, 29 +; EFPU2-NEXT: evldd 30, 128(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 29, 120(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 28, 112(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 27, 104(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 168(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 164(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 160(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 156(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 180(1) +; EFPU2-NEXT: addi 1, 1, 176 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %v1 = alloca [13 x i32], align 4 %v2 = alloca [11 x i32], align 4 @@ -1357,49 +1520,93 @@ } define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { -; CHECK-LABEL: test_fma: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 4 -; CHECK-NEXT: .cfi_offset r29, -12 -; CHECK-NEXT: .cfi_offset r30, -8 -; CHECK-NEXT: .cfi_offset r29, -40 -; CHECK-NEXT: .cfi_offset r30, -32 -; CHECK-NEXT: cmpwi 3, 1 -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill -; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill -; CHECK-NEXT: blt 0, .LBB57_3 -; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: li 29, 0 -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB57_2: # %for.body -; CHECK-NEXT: # -; CHECK-NEXT: efscfsi 3, 29 -; CHECK-NEXT: mr 4, 3 -; CHECK-NEXT: bl fmaf -; CHECK-NEXT: addi 29, 29, 1 -; CHECK-NEXT: cmplw 30, 29 -; CHECK-NEXT: mr 5, 3 -; CHECK-NEXT: bne 0, .LBB57_2 -; CHECK-NEXT: b .LBB57_4 -; CHECK-NEXT: .LBB57_3: -; CHECK-NEXT: # implicit-def: $r5 -; CHECK-NEXT: .LBB57_4: # %for.cond.cleanup -; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: mr 3, 5 -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 52(1) -; CHECK-NEXT: addi 1, 1, 48 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +; SPE-LABEL: test_fma: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -48(1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -32 +; SPE-NEXT: cmpwi 3, 1 +; SPE-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; SPE-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; SPE-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; SPE-NEXT: blt 0, .LBB30_3 +; SPE-NEXT: # %bb.1: # %for.body.preheader +; SPE-NEXT: mr 30, 3 +; SPE-NEXT: li 29, 0 +; SPE-NEXT: # implicit-def: $r5 +; SPE-NEXT: .LBB30_2: # %for.body +; SPE-NEXT: # +; SPE-NEXT: efscfsi 3, 29 +; SPE-NEXT: mr 4, 3 +; SPE-NEXT: bl fmaf +; SPE-NEXT: addi 29, 29, 1 +; SPE-NEXT: cmplw 30, 29 +; SPE-NEXT: mr 5, 3 +; SPE-NEXT: bne 0, .LBB30_2 +; SPE-NEXT: b .LBB30_4 +; SPE-NEXT: .LBB30_3: +; SPE-NEXT: # implicit-def: $r5 +; SPE-NEXT: .LBB30_4: # %for.cond.cleanup +; SPE-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; SPE-NEXT: mr 3, 5 +; SPE-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; SPE-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 52(1) +; SPE-NEXT: addi 1, 1, 48 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; +; EFPU2-LABEL: test_fma: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -48(1) +; EFPU2-NEXT: .cfi_def_cfa_offset 48 +; EFPU2-NEXT: .cfi_offset lr, 4 +; EFPU2-NEXT: .cfi_offset r29, -12 +; EFPU2-NEXT: .cfi_offset r30, -8 +; EFPU2-NEXT: .cfi_offset r29, -40 +; EFPU2-NEXT: .cfi_offset r30, -32 +; EFPU2-NEXT: cmpwi 3, 1 +; EFPU2-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; EFPU2-NEXT: blt 0, .LBB30_3 +; EFPU2-NEXT: # %bb.1: # %for.body.preheader +; EFPU2-NEXT: mr 30, 3 +; EFPU2-NEXT: li 29, 0 +; EFPU2-NEXT: # implicit-def: $r5 +; EFPU2-NEXT: .LBB30_2: # %for.body +; EFPU2-NEXT: # =>This Inner Loop Header: Depth=1 +; EFPU2-NEXT: efscfsi 3, 29 +; EFPU2-NEXT: mr 4, 3 +; EFPU2-NEXT: bl fmaf +; EFPU2-NEXT: addi 29, 29, 1 +; EFPU2-NEXT: cmplw 30, 29 +; EFPU2-NEXT: mr 5, 3 +; EFPU2-NEXT: bne 0, .LBB30_2 +; EFPU2-NEXT: b .LBB30_4 +; EFPU2-NEXT: .LBB30_3: +; EFPU2-NEXT: # implicit-def: $r5 +; EFPU2-NEXT: .LBB30_4: # %for.cond.cleanup +; EFPU2-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; EFPU2-NEXT: mr 3, 5 +; EFPU2-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 52(1) +; EFPU2-NEXT: addi 1, 1, 48 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %cmp8 = icmp sgt i32 %d, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -1426,49 +1633,106 @@ %struct.a = type { float, float } define void @d(%struct.a* %e, %struct.a* %f) { -; CHECK-LABEL: d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mflr 0 -; CHECK-NEXT: stw 0, 4(1) -; CHECK-NEXT: stwu 1, -48(1) -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset lr, 4 -; CHECK-NEXT: .cfi_offset r29, -12 -; CHECK-NEXT: .cfi_offset r30, -8 -; CHECK-NEXT: .cfi_offset r29, -40 -; CHECK-NEXT: .cfi_offset r30, -32 -; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill -; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill -; CHECK-NEXT: efdcfs 29, 4 -; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill -; CHECK-NEXT: mr 4, 29 -; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill -; CHECK-NEXT: efdcfs 30, 3 -; CHECK-NEXT: evmergehi 3, 29, 29 -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: bctrl -; CHECK-NEXT: evmergehi 3, 30, 30 -; CHECK-NEXT: mr 4, 30 -; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3 -; CHECK-NEXT: bctrl -; CHECK-NEXT: li 3, .LCPI58_0@l -; CHECK-NEXT: lis 4, .LCPI58_0@ha -; CHECK-NEXT: evlddx 3, 4, 3 -; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload -; CHECK-NEXT: efdmul 3, 29, 3 -; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload -; CHECK-NEXT: efscfd 3, 3 -; CHECK-NEXT: stw 3, 0(3) -; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload -; CHECK-NEXT: lwz 0, 52(1) -; CHECK-NEXT: addi 1, 1, 48 -; CHECK-NEXT: mtlr 0 -; CHECK-NEXT: blr +; SPE-LABEL: d: +; SPE: # %bb.0: # %entry +; SPE-NEXT: mflr 0 +; SPE-NEXT: stw 0, 4(1) +; SPE-NEXT: stwu 1, -48(1) +; SPE-NEXT: .cfi_def_cfa_offset 48 +; SPE-NEXT: .cfi_offset lr, 4 +; SPE-NEXT: .cfi_offset r29, -12 +; SPE-NEXT: .cfi_offset r30, -8 +; SPE-NEXT: .cfi_offset r29, -40 +; SPE-NEXT: .cfi_offset r30, -32 +; SPE-NEXT: lwz 4, 0(4) +; SPE-NEXT: lwz 3, 0(3) +; SPE-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; SPE-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; SPE-NEXT: efdcfs 29, 4 +; SPE-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; SPE-NEXT: mr 4, 29 +; SPE-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; SPE-NEXT: efdcfs 30, 3 +; SPE-NEXT: evmergehi 3, 29, 29 +; SPE-NEXT: mtctr 3 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bctrl +; SPE-NEXT: evmergehi 3, 30, 30 +; SPE-NEXT: mr 4, 30 +; SPE-NEXT: mtctr 3 +; SPE-NEXT: # kill: def $r3 killed $r3 killed $s3 +; SPE-NEXT: bctrl +; SPE-NEXT: li 3, .LCPI31_0@l +; SPE-NEXT: lis 4, .LCPI31_0@ha +; SPE-NEXT: evlddx 3, 4, 3 +; SPE-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; SPE-NEXT: efdmul 3, 29, 3 +; SPE-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; SPE-NEXT: efscfd 3, 3 +; SPE-NEXT: stw 3, 0(3) +; SPE-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; SPE-NEXT: lwz 0, 52(1) +; SPE-NEXT: addi 1, 1, 48 +; SPE-NEXT: mtlr 0 +; SPE-NEXT: blr +; EFPU2-LABEL: d: +; EFPU2: # %bb.0: # %entry +; EFPU2-NEXT: mflr 0 +; EFPU2-NEXT: stw 0, 4(1) +; EFPU2-NEXT: stwu 1, -80(1) +; EFPU2-NEXT: .cfi_def_cfa_offset 80 +; EFPU2-NEXT: .cfi_offset lr, 4 +; EFPU2-NEXT: .cfi_offset r27, -20 +; EFPU2-NEXT: .cfi_offset r28, -16 +; EFPU2-NEXT: .cfi_offset r29, -12 +; EFPU2-NEXT: .cfi_offset r30, -8 +; EFPU2-NEXT: .cfi_offset r27, -72 +; EFPU2-NEXT: .cfi_offset r28, -64 +; EFPU2-NEXT: .cfi_offset r29, -56 +; EFPU2-NEXT: .cfi_offset r30, -48 +; EFPU2-NEXT: lwz 3, 0(3) +; EFPU2-NEXT: stw 27, 60(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 28, 64(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; EFPU2-NEXT: stw 30, 72(1) # 4-byte Folded Spill +; EFPU2-NEXT: evstdd 27, 8(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill +; EFPU2-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill +; EFPU2-NEXT: mr 30, 4 +; EFPU2-NEXT: bl __extendsfdf2 +; EFPU2-NEXT: mr 29, 3 +; EFPU2-NEXT: lwz 3, 0(30) +; EFPU2-NEXT: mr 28, 4 +; EFPU2-NEXT: bl __extendsfdf2 +; EFPU2-NEXT: mtctr 3 +; EFPU2-NEXT: mr 30, 3 +; EFPU2-NEXT: mr 27, 4 +; EFPU2-NEXT: bctrl +; EFPU2-NEXT: mtctr 3 +; EFPU2-NEXT: mr 3, 29 +; EFPU2-NEXT: mr 4, 28 +; EFPU2-NEXT: bctrl +; EFPU2-NEXT: mr 3, 30 +; EFPU2-NEXT: mr 4, 27 +; EFPU2-NEXT: li 5, 0 +; EFPU2-NEXT: li 6, 0 +; EFPU2-NEXT: bl __muldf3 +; EFPU2-NEXT: bl __truncdfsf2 +; EFPU2-NEXT: stw 3, 0(3) +; EFPU2-NEXT: evldd 30, 32(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 29, 24(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 28, 16(1) # 8-byte Folded Reload +; EFPU2-NEXT: evldd 27, 8(1) # 8-byte Folded Reload +; EFPU2-NEXT: lwz 30, 72(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 29, 68(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 28, 64(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 27, 60(1) # 4-byte Folded Reload +; EFPU2-NEXT: lwz 0, 84(1) +; EFPU2-NEXT: addi 1, 1, 80 +; EFPU2-NEXT: mtlr 0 +; EFPU2-NEXT: blr entry: %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 %1 = load float, float* undef @@ -1483,3 +1747,25 @@ store float %l, float* undef ret void } + +;--- hwdouble.ll +; split into separate file because the efd* instructions are invalid on efpu2 +define i32 @test_dasmconst(double %x) { +; SPE-LABEL: test_dasmconst: +; SPE: # %bb.0: # %entry +; SPE-NEXT: stwu 1, -16(1) +; SPE-NEXT: .cfi_def_cfa_offset 16 +; SPE-NEXT: evmergelo 3, 3, 4 +; SPE-NEXT: evstdd 3, 8(1) +; SPE-NEXT: #APP +; SPE-NEXT: efdctsi 3, 3 +; SPE-NEXT: #NO_APP +; SPE-NEXT: addi 1, 1, 16 +; SPE-NEXT: blr +entry: + %x.addr = alloca double, align 8 + store double %x, double* %x.addr, align 8 + %0 = load double, double* %x.addr, align 8 + %1 = call i32 asm sideeffect "efdctsi $0, $1", "=d,d"(double %0) + ret i32 %1 +}