Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2596,6 +2596,7 @@ def mno_pcrel: Flag<["-"], "mno-pcrel">, Group; def mspe : Flag<["-"], "mspe">, Group; def mno_spe : Flag<["-"], "mno-spe">, Group; +def mefpu2 : Flag<["-"], "mefpu2">, Group; def mabi_EQ_vec_extabi : Flag<["-"], "mabi=vec-extabi">, Group, Flags<[CC1Option]>, HelpText<"Enable the extended Altivec ABI on AIX (AIX only). Uses volatile and nonvolatile vector registers">; def mabi_EQ_vec_default : Flag<["-"], "mabi=vec-default">, Group, Flags<[CC1Option]>, Index: llvm/lib/Target/PowerPC/PPC.td =================================================================== --- llvm/lib/Target/PowerPC/PPC.td +++ llvm/lib/Target/PowerPC/PPC.td @@ -72,6 +72,9 @@ def FeatureSPE : SubtargetFeature<"spe","HasSPE", "true", "Enable SPE instructions", [FeatureHardFloat]>; +def FeatureEFPU2 : SubtargetFeature<"efpu2", "HasEFPU2", "true", + "Enable Embedded Floating-Point APU 2 instructions", + [FeatureSPE]>; def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true", "Enable the MFOCRF instruction">; def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true", Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -703,6 +703,8 @@ bool useSoftFloat() const override; bool hasSPE() const; + + bool hasEFPU2() const; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override { return MVT::i32; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -151,7 +151,9 @@ if (!useSoftFloat()) { if (hasSPE()) { addRegisterClass(MVT::f32, &PPC::GPRCRegClass); - addRegisterClass(MVT::f64, &PPC::SPERCRegClass); + // EFPU2 APU only supports f32 + if (!hasEFPU2()) + addRegisterClass(MVT::f64, &PPC::SPERCRegClass); } else { addRegisterClass(MVT::f32, &PPC::F4RCRegClass); addRegisterClass(MVT::f64, &PPC::F8RCRegClass); @@ -1429,6 +1431,10 @@ return Subtarget.hasSPE(); } +bool PPCTargetLowering::hasEFPU2() const { + return Subtarget.hasEFPU2(); +} + bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } Index: llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -100,6 +100,7 @@ bool HasAltivec; bool HasFPU; bool HasSPE; + bool HasEFPU2; bool HasVSX; bool NeedsTwoConstNR; bool HasP8Vector; @@ -260,6 +261,7 @@ bool hasFPCVT() const { return HasFPCVT; } bool hasAltivec() const { return HasAltivec; } bool hasSPE() const { return HasSPE; } + bool hasEFPU2() const { return HasEFPU2; } bool hasFPU() const { return HasFPU; } bool hasVSX() const { return HasVSX; } bool needsTwoConstNR() const { return NeedsTwoConstNR; } Index: llvm/test/CodeGen/PowerPC/efpu2.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/efpu2.ll @@ -0,0 +1,1141 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc-unknown-linux-gnu \ +; RUN: -mattr=+efpu2 | FileCheck %s + +; Single tests +; identical to tests in spe.ll + +declare float @llvm.fabs.float(float) +define float @test_float_abs(float %a) #0 { +; CHECK-LABEL: test_float_abs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsabs 3, 3 +; CHECK-NEXT: blr + entry: + %0 = tail call float @llvm.fabs.float(float %a) + ret float %0 +} + +define float @test_fnabs(float %a) #0 { +; CHECK-LABEL: test_fnabs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsnabs 3, 3 +; CHECK-NEXT: blr + entry: + %0 = tail call float @llvm.fabs.float(float %a) + %sub = fsub float -0.000000e+00, %0 + ret float %sub +} + +define float @test_fdiv(float %a, float %b) { +; CHECK-LABEL: test_fdiv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsdiv 3, 3, 4 +; CHECK-NEXT: blr +entry: + %v = fdiv float %a, %b + ret float %v + +} + +define float @test_fmul(float %a, float %b) { +; CHECK-LABEL: test_fmul: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsmul 3, 3, 4 +; CHECK-NEXT: blr + entry: + %v = fmul float %a, %b + ret float %v +} + +define float @test_fadd(float %a, float %b) { +; CHECK-LABEL: test_fadd: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsadd 3, 3, 4 +; CHECK-NEXT: blr + entry: + %v = fadd float %a, %b + ret float %v +} + +define float @test_fsub(float %a, float %b) { +; CHECK-LABEL: test_fsub: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efssub 3, 3, 4 +; CHECK-NEXT: blr + entry: + %v = fsub float %a, %b + ret float %v +} + +define float @test_fneg(float %a) { +; CHECK-LABEL: test_fneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efsneg 3, 3 +; CHECK-NEXT: blr + entry: + %v = fsub float -0.0, %a + ret float %v +} + +; todo +define float @test_dtos(double %a) { +; CHECK-LABEL: test_dtos: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: bl __truncdfsf2 +; CHECK-NEXT: lwz 0, 20(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + entry: + %v = fptrunc double %a to float + ret float %v +} + +define i32 @test_fcmpgt(float %a, float %b) { +; CHECK-LABEL: test_fcmpgt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: ble 0, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB8_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp ogt float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_fcmpugt(float %a, float %b) { +; CHECK-LABEL: test_fcmpugt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpeq 0, 4, 4 +; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: bc 4, 1, .LBB9_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: bc 12, 1, .LBB9_4 +; CHECK-NEXT: # %bb.3: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: b .LBB9_5 +; CHECK-NEXT: .LBB9_4: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: .LBB9_5: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp ugt float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_fcmple(float %a, float %b) { +; CHECK-LABEL: test_fcmple: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: efscmpeq 0, 4, 4 +; CHECK-NEXT: bc 4, 1, .LBB10_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: bc 12, 1, .LBB10_4 +; CHECK-NEXT: # %bb.3: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_4: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB10_5: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp ole float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_fcmpule(float %a, float %b) { +; CHECK-LABEL: test_fcmpule: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpgt 0, 3, 4 +; CHECK-NEXT: bgt 0, .LBB11_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB11_3 +; CHECK-NEXT: .LBB11_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB11_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp ule float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +; The type of comparison found in C's if (x == y) +define i32 @test_fcmpeq(float %a, float %b) { +; CHECK-LABEL: test_fcmpeq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: ble 0, .LBB12_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB12_3 +; CHECK-NEXT: .LBB12_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB12_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp oeq float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +; (un)ordered tests are expanded to une and oeq so verify +define i1 @test_fcmpuno(float %a, float %b) { +; CHECK-LABEL: test_fcmpuno: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: efscmpeq 1, 4, 4 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: crand 20, 5, 1 +; CHECK-NEXT: bc 12, 20, .LBB13_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB13_2: # %entry +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + entry: + %r = fcmp uno float %a, %b + ret i1 %r +} + +define i1 @test_fcmpord(float %a, float %b) { +; CHECK-LABEL: test_fcmpord: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efscmpeq 0, 4, 4 +; CHECK-NEXT: efscmpeq 1, 3, 3 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: crnand 20, 5, 1 +; CHECK-NEXT: bc 12, 20, .LBB14_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB14_2: # %entry +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + entry: + %r = fcmp ord float %a, %b + ret i1 %r +} + +define i1 @test_fcmpueq(float %a, float %b) { +; CHECK-LABEL: test_fcmpueq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: efscmpeq 1, 4, 4 +; CHECK-NEXT: crnand 20, 5, 1 +; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: crnor 20, 1, 20 +; CHECK-NEXT: bc 12, 20, .LBB15_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB15_2: # %entry +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + entry: + %r = fcmp ueq float %a, %b + ret i1 %r +} + +define i1 @test_fcmpne(float %a, float %b) { +; CHECK-LABEL: test_fcmpne: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efscmpeq 0, 4, 4 +; CHECK-NEXT: efscmpeq 1, 3, 3 +; CHECK-NEXT: crand 20, 5, 1 +; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: crorc 20, 1, 20 +; CHECK-NEXT: bc 12, 20, .LBB16_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB16_2: # %entry +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + entry: + %r = fcmp one float %a, %b + ret i1 %r +} + +define i32 @test_fcmpune(float %a, float %b) { +; CHECK-LABEL: test_fcmpune: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpeq 0, 3, 4 +; CHECK-NEXT: bgt 0, .LBB17_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB17_3 +; CHECK-NEXT: .LBB17_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB17_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp une float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_fcmplt(float %a, float %b) { +; CHECK-LABEL: test_fcmplt: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: ble 0, .LBB18_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB18_3 +; CHECK-NEXT: .LBB18_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB18_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp olt float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i1 @test_fcmpult(float %a, float %b) { +; CHECK-LABEL: test_fcmpult: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: efscmpeq 1, 4, 4 +; CHECK-NEXT: crnand 20, 5, 1 +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: li 5, 1 +; CHECK-NEXT: crnor 20, 1, 20 +; CHECK-NEXT: bc 12, 20, .LBB19_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB19_2: # %entry +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: blr + entry: + %r = fcmp ult float %a, %b + ret i1 %r +} + +define i32 @test_fcmpge(float %a, float %b) { +; CHECK-LABEL: test_fcmpge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmpeq 0, 3, 3 +; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: efscmpeq 0, 4, 4 +; CHECK-NEXT: bc 4, 1, .LBB20_4 +; CHECK-NEXT: # %bb.2: # %entry +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: bc 12, 1, .LBB20_4 +; CHECK-NEXT: # %bb.3: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB20_5 +; CHECK-NEXT: .LBB20_4: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB20_5: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp oge float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_fcmpuge(float %a, float %b) { +; CHECK-LABEL: test_fcmpuge: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -16(1) +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: efscmplt 0, 3, 4 +; CHECK-NEXT: bgt 0, .LBB21_2 +; CHECK-NEXT: # %bb.1: # %tr +; CHECK-NEXT: li 3, 1 +; CHECK-NEXT: b .LBB21_3 +; CHECK-NEXT: .LBB21_2: # %fa +; CHECK-NEXT: li 3, 0 +; CHECK-NEXT: .LBB21_3: # %ret +; CHECK-NEXT: stw 3, 12(1) +; CHECK-NEXT: lwz 3, 12(1) +; CHECK-NEXT: addi 1, 1, 16 +; CHECK-NEXT: blr + entry: + %r = alloca i32, align 4 + %c = fcmp uge float %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + + +define i32 @test_ftoui(float %a) { +; CHECK-LABEL: test_ftoui: +; CHECK: # %bb.0: +; CHECK-NEXT: efsctuiz 3, 3 +; CHECK-NEXT: blr + %v = fptoui float %a to i32 + ret i32 %v +} + +define i32 @test_ftosi(float %a) { +; CHECK-LABEL: test_ftosi: +; CHECK: # %bb.0: +; CHECK-NEXT: efsctsiz 3, 3 +; CHECK-NEXT: blr + %v = fptosi float %a to i32 + ret i32 %v +} + +define float @test_ffromui(i32 %a) { +; CHECK-LABEL: test_ffromui: +; CHECK: # %bb.0: +; CHECK-NEXT: efscfui 3, 3 +; CHECK-NEXT: blr + %v = uitofp i32 %a to float + ret float %v +} + +define float @test_ffromsi(i32 %a) { +; CHECK-LABEL: test_ffromsi: +; CHECK: # %bb.0: +; CHECK-NEXT: efscfsi 3, 3 +; CHECK-NEXT: blr + %v = sitofp i32 %a to float + ret float %v +} + +define i32 @test_fasmconst(float %x) { +; CHECK-LABEL: test_fasmconst: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stwu 1, -32(1) +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stw 3, 20(1) +; CHECK-NEXT: stw 3, 24(1) +; CHECK-NEXT: lwz 3, 20(1) +; CHECK-NEXT: #APP +; CHECK-NEXT: efsctsi 3, 3 +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: addi 1, 1, 32 +; CHECK-NEXT: blr +entry: + %x.addr = alloca float, align 8 + store float %x, float* %x.addr, align 8 + %0 = load float, float* %x.addr, align 8 + %1 = call i32 asm sideeffect "efsctsi $0, $1", "=f,f"(float %0) + ret i32 %1 +; Check that it's not loading a double +} + +; Double tests +; efpu2 devices do not have a hardware-double implementation, +; check that soft-double implementation is used. + +define void @test_double_abs(double * %aa) #0 { +; CHECK-LABEL: test_double_abs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz 4, 0(3) +; CHECK-NEXT: clrlwi 4, 4, 1 +; CHECK-NEXT: stw 4, 0(3) +; CHECK-NEXT: blr + entry: + %0 = load double, double * %aa + %1 = tail call double @llvm.fabs.f64(double %0) #2 + store double %1, double * %aa + ret void +} + +; Function Attrs: nounwind readnone +declare double @llvm.fabs.f64(double) #1 + +define void @test_dnabs(double * %aa) #0 { +; CHECK-LABEL: test_dnabs: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lwz 4, 0(3) +; CHECK-NEXT: oris 4, 4, 32768 +; CHECK-NEXT: stw 4, 0(3) +; CHECK-NEXT: blr + entry: + %0 = load double, double * %aa + %1 = tail call double @llvm.fabs.f64(double %0) #2 + %sub = fsub double -0.000000e+00, %1 + store double %sub, double * %aa + ret void +} + +define double @test_ddiv(double %a, double %b) { +; CHECK-LABEL: test_ddiv: +; CHECK: bl __divdf3 +entry: + %v = fdiv double %a, %b + ret double %v + +} + +define double @test_dmul(double %a, double %b) { +; CHECK-LABEL: test_dmul: +; CHECK: bl __muldf3 + entry: + %v = fmul double %a, %b + ret double %v +} + +define double @test_dadd(double %a, double %b) { +; CHECK-LABEL: test_dadd: +; CHECK: bl __adddf3 + entry: + %v = fadd double %a, %b + ret double %v +} + +define double @test_dsub(double %a, double %b) { +; CHECK-LABEL: test_dsub: +; CHECK: bl __subdf3 + entry: + %v = fsub double %a, %b + ret double %v +} + +define double @test_dneg(double %a) { +; CHECK-LABEL: test_dneg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xoris 3, 3, 32768 +; CHECK-NEXT: blr + entry: + %v = fsub double -0.0, %a + ret double %v +} + +define double @test_stod(float %a) { +; CHECK-LABEL: test_stod: +; CHECK: bl __extendsfdf2 + entry: + %v = fpext float %a to double + ret double %v +} + +; (un)ordered tests are expanded to une and oeq so verify +define i1 @test_dcmpuno(double %a, double %b) { +; CHECK-LABEL: test_dcmpuno: +; CHECK: bl __unorddf2 + entry: + %r = fcmp uno double %a, %b + ret i1 %r +} + +define i1 @test_dcmpord(double %a, double %b) { +; CHECK-LABEL: test_dcmpord: +; CHECK: bl __unorddf2 + entry: + %r = fcmp ord double %a, %b + ret i1 %r +} + +define i32 @test_dcmpgt(double %a, double %b) { +; CHECK-LABEL: test_dcmpgt: +; CHECK: bl __gtdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ogt double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmpugt(double %a, double %b) { +; CHECK-LABEL: test_dcmpugt: +; CHECK: bl __ledf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ugt double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmple(double %a, double %b) { +; CHECK-LABEL: test_dcmple: +; CHECK: bl __gtdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ule double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmpule(double %a, double %b) { +; CHECK-LABEL: test_dcmpule: +; CHECK: bl __gtdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ule double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +; The type of comparison found in C's if (x == y) +define i32 @test_dcmpeq(double %a, double %b) { +; CHECK-LABEL: test_dcmpeq: +; CHECK: bl __nedf2 + entry: + %r = alloca i32, align 4 + %c = fcmp oeq double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmpueq(double %a, double %b) { +; CHECK-LABEL: test_dcmpueq: +; CHECK: bl __eqdf2 +; CHECK: bl __unorddf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ueq double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i1 @test_dcmpne(double %a, double %b) { +; CHECK-LABEL: test_dcmpne: +; CHECK: bl __unorddf2 +; CHECK: bl __eqdf2 + entry: + %r = fcmp one double %a, %b + ret i1 %r +} + +define i32 @test_dcmpune(double %a, double %b) { +; CHECK-LABEL: test_dcmpune: +; CHECK: bl __eqdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp une double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmplt(double %a, double %b) { +; CHECK-LABEL: test_dcmplt: +; CHECK: bl __ltdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp olt double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i32 @test_dcmpult(double %a, double %b) { +; CHECK-LABEL: test_dcmpult: +; CHECK: bl __gedf2 + entry: + %r = alloca i32, align 4 + %c = fcmp ult double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define i1 @test_dcmpge(double %a, double %b) { +; CHECK-LABEL: test_dcmpge: +; CHECK: bl __gedf2 + entry: + %r = fcmp oge double %a, %b + ret i1 %r +} + +define i32 @test_dcmpuge(double %a, double %b) { +; CHECK-LABEL: test_dcmpuge: +; CHECK: bl __ltdf2 + entry: + %r = alloca i32, align 4 + %c = fcmp uge double %a, %b + br i1 %c, label %tr, label %fa +tr: + store i32 1, i32* %r, align 4 + br label %ret +fa: + store i32 0, i32* %r, align 4 + br label %ret +ret: + %0 = load i32, i32* %r, align 4 + ret i32 %0 +} + +define double @test_dselect(double %a, double %b, i1 %c) { +; CHECK-LABEL: test_dselect: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi. 7, 7, 1 +; CHECK-NEXT: bclr 12, 1, 0 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: ori 3, 5, 0 +; CHECK-NEXT: ori 4, 6, 0 +; CHECK-NEXT: blr +entry: + %r = select i1 %c, double %a, double %b + ret double %r +} + +define i32 @test_dtoui(double %a) { +; CHECK-LABEL: test_dtoui: +; CHECK: bl __fixunsdfsi +entry: + %v = fptoui double %a to i32 + ret i32 %v +} + +define i32 @test_dtosi(double %a) { +; CHECK-LABEL: test_dtosi: +; CHECK: bl __fixdfsi +entry: + %v = fptosi double %a to i32 + ret i32 %v +} + +define double @test_dfromui(i32 %a) { +; CHECK-LABEL: test_dfromui: +; CHECK: bl __floatunsidf +entry: + %v = uitofp i32 %a to double + ret double %v +} + +define double @test_dfromsi(i32 %a) { +; CHECK-LABEL: test_dfromsi: +; CHECK: bl __floatsidf +entry: + %v = sitofp i32 %a to double + ret double %v +} + +declare double @test_spill_spe_regs(double, double); +define dso_local void @test_func2() #0 { +; CHECK-LABEL: test_func2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + ret void +} + +declare void @test_memset(i8* nocapture writeonly, i8, i32, i1) +@global_var1 = global i32 0, align 4 +define double @test_spill(double %a, i32 %a1, i64 %a2, i8 * %a3, i32 *%a4, i32* %a5) nounwind { +; CHECK-LABEL: test_spill: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -176(1) +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: mr 6, 4 +; CHECK-NEXT: stw 27, 156(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 28, 160(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 29, 164(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 30, 168(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 27, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 28, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 29, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 30, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: lwz 28, 184(1) +; CHECK-NEXT: bl __adddf3 +; CHECK-NEXT: lis 5, 16393 +; CHECK-NEXT: lis 6, -4069 +; CHECK-NEXT: ori 5, 5, 8697 +; CHECK-NEXT: ori 6, 6, 34414 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bl __adddf3 +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: mr 29, 4 +; CHECK-NEXT: addi 3, 1, 52 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: li 5, 24 +; CHECK-NEXT: li 6, 1 +; CHECK-NEXT: li 27, 0 +; CHECK-NEXT: bl test_memset +; CHECK-NEXT: stw 27, 0(28) +; CHECK-NEXT: bl test_func2 +; CHECK-NEXT: addi 3, 1, 8 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: li 5, 20 +; CHECK-NEXT: li 6, 1 +; CHECK-NEXT: bl test_memset +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: mr 4, 29 +; CHECK-NEXT: evldd 30, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 29, 120(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 28, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 27, 104(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 30, 168(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 164(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 28, 160(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 27, 156(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 0, 180(1) +; CHECK-NEXT: addi 1, 1, 176 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %v1 = alloca [13 x i32], align 4 + %v2 = alloca [11 x i32], align 4 + %0 = fadd double %a, %a + call void asm sideeffect "","~{s0},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9},~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19},~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29},~{s30},~{s31}"() nounwind + %1 = fadd double %0, 3.14159 + %2 = bitcast [13 x i32]* %v1 to i8* + call void @test_memset(i8* align 4 %2, i8 0, i32 24, i1 true) + store i32 0, i32* %a5, align 4 + call void @test_func2() + %3 = bitcast [11 x i32]* %v2 to i8* + call void @test_memset(i8* align 4 %3, i8 0, i32 20, i1 true) + br label %return + +return: + ret double %1 + +} + +define dso_local float @test_fma(i32 %d) local_unnamed_addr #0 { +; CHECK-LABEL: test_fma: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -48(1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: .cfi_offset r29, -12 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: .cfi_offset r29, -40 +; CHECK-NEXT: .cfi_offset r30, -32 +; CHECK-NEXT: cmpwi 3, 1 +; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: blt 0, .LBB56_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: li 29, 0 +; CHECK-NEXT: # implicit-def: $r5 +; CHECK-NEXT: .LBB56_2: # %for.body +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: efscfsi 3, 29 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: bl fmaf +; CHECK-NEXT: addi 29, 29, 1 +; CHECK-NEXT: cmplw 30, 29 +; CHECK-NEXT: mr 5, 3 +; CHECK-NEXT: bne 0, .LBB56_2 +; CHECK-NEXT: b .LBB56_4 +; CHECK-NEXT: .LBB56_3: +; CHECK-NEXT: # implicit-def: $r5 +; CHECK-NEXT: .LBB56_4: # %for.cond.cleanup +; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 0, 52(1) +; CHECK-NEXT: addi 1, 1, 48 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %cmp8 = icmp sgt i32 %d, 0 + br i1 %cmp8, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.body, %entry + %e.0.lcssa = phi float [ undef, %entry ], [ %0, %for.body ] + ret float %e.0.lcssa + +for.body: ; preds = %for.body, %entry + %f.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ] + %e.09 = phi float [ %0, %for.body ], [ undef, %entry ] + %conv = sitofp i32 %f.010 to float + %0 = tail call float @llvm.fma.f32(float %conv, float %conv, float %e.09) + %inc = add nuw nsw i32 %f.010, 1 + %exitcond = icmp eq i32 %inc, %d + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.fma.f32(float, float, float) #1 + +attributes #1 = { nounwind readnone speculatable willreturn } + +%struct.a = type { float, float } + +define void @d(%struct.a* %e, %struct.a* %f) { +; CHECK-LABEL: d: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stw 0, 4(1) +; CHECK-NEXT: stwu 1, -80(1) +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 4 +; CHECK-NEXT: .cfi_offset r27, -20 +; CHECK-NEXT: .cfi_offset r28, -16 +; CHECK-NEXT: .cfi_offset r29, -12 +; CHECK-NEXT: .cfi_offset r30, -8 +; CHECK-NEXT: .cfi_offset r27, -72 +; CHECK-NEXT: .cfi_offset r28, -64 +; CHECK-NEXT: .cfi_offset r29, -56 +; CHECK-NEXT: .cfi_offset r30, -48 +; CHECK-NEXT: lwz 3, 0(3) +; CHECK-NEXT: stw 27, 60(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 28, 64(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 29, 68(1) # 4-byte Folded Spill +; CHECK-NEXT: stw 30, 72(1) # 4-byte Folded Spill +; CHECK-NEXT: evstdd 27, 8(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 28, 16(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 29, 24(1) # 8-byte Folded Spill +; CHECK-NEXT: evstdd 30, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 30, 4 +; CHECK-NEXT: bl __extendsfdf2 +; CHECK-NEXT: mr 29, 3 +; CHECK-NEXT: lwz 3, 0(30) +; CHECK-NEXT: mr 28, 4 +; CHECK-NEXT: bl __extendsfdf2 +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: mr 27, 4 +; CHECK-NEXT: bctrl +; CHECK-NEXT: mtctr 3 +; CHECK-NEXT: mr 3, 29 +; CHECK-NEXT: mr 4, 28 +; CHECK-NEXT: bctrl +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: mr 4, 27 +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: bl __muldf3 +; CHECK-NEXT: bl __truncdfsf2 +; CHECK-NEXT: stw 3, 0(3) +; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 29, 24(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 28, 16(1) # 8-byte Folded Reload +; CHECK-NEXT: evldd 27, 8(1) # 8-byte Folded Reload +; CHECK-NEXT: lwz 30, 72(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 29, 68(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 28, 64(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 27, 60(1) # 4-byte Folded Reload +; CHECK-NEXT: lwz 0, 84(1) +; CHECK-NEXT: addi 1, 1, 80 +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +entry: + %0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0 + %1 = load float, float* undef + %conv = fpext float %1 to double + %2 = load float, float* %0 + %g = fpext float %2 to double + %3 = call i32 undef(double %g) + %h = call i32 undef(double %conv) + %n = sitofp i32 %3 to double + %k = fmul double %g, %n + %l = fptrunc double %k to float + store float %l, float* undef + ret void +}