diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1332,6 +1332,7 @@ bool convertToNonDenormSingle(APInt &ArgAPInt); bool convertToNonDenormSingle(APFloat &ArgAPFloat); + bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat); } // end namespace llvm diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8820,6 +8820,18 @@ return Success; } +// Nondestructive check for convertTonNonDenormSingle. +bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) { + // Only convert if it loses info, since XXSPLTIDP should + // handle the other case. + APFloat APFloatToConvert = ArgAPFloat; + bool LosesInfo = true; + APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, + &LosesInfo); + + return (!LosesInfo && !APFloatToConvert.isDenormal()); +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -16115,10 +16127,8 @@ case MVT::f32: case MVT::f64: if (Subtarget.hasPrefixInstrs()) { - // With prefixed instructions, we can materialize anything that can be - // represented with a 32-bit immediate, not just positive zero. - APFloat APFloatOfImm = Imm; - return convertToNonDenormSingle(APFloatOfImm); + // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP. + return true; } LLVM_FALLTHROUGH; case MVT::ppcf128: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1108,6 +1108,7 @@ case PPC::XXLXORspz: case PPC::XXLXORdpz: case PPC::XXLEQVOnes: + case PPC::XXSPLTI32DX: case PPC::V_SET0B: case PPC::V_SET0H: case PPC::V_SET0: diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -399,6 +399,30 @@ SDLoc(N), MVT::i32); }]>; +// Check if the value can be converted to be single precision immediate, which +// can be exploited by XXSPLTIDP. Ensure that it cannot be converted to single +// precision before exploiting with XXSPLTI32DX. +def nzFPImmAsi64 : PatLeaf<(fpimm), [{ + APFloat APFloatOfN = N->getValueAPF(); + return !N->isExactlyValue(+0.0) && !checkConvertToNonDenormSingle(APFloatOfN); +}]>; + +// Get the Hi bits of a 64 bit immediate. +def getFPAs64BitIntHi : SDNodeXFormgetValueAPF(); + uint32_t Hi = (uint32_t)((APFloatOfN.bitcastToAPInt().getZExtValue() & + 0xFFFFFFFF00000000LL) >> 32); + return CurDAG->getTargetConstant(Hi, SDLoc(N), MVT::i32); +}]>; + +// Get the Lo bits of a 64 bit immediate. +def getFPAs64BitIntLo : SDNodeXFormgetValueAPF(); + uint32_t Lo = (uint32_t)(APFloatOfN.bitcastToAPInt().getZExtValue() & + 0xFFFFFFFF); + return CurDAG->getTargetConstant(Lo, SDLoc(N), MVT::i32); +}]>; + def imm34 : PatLeaf<(imm), [{ return isInt<34>(N->getSExtValue()); }]>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td --- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td +++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td @@ -1867,14 +1867,6 @@ "xxspltidp $XT, $IMM32", IIC_VecGeneral, [(set v2f64:$XT, (PPCxxspltidp i32:$IMM32))]>; - def XXSPLTI32DX : - 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), - (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32), - "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, - [(set v2i64:$XT, - (PPCxxsplti32dx v2i64:$XTi, i32:$IX, - i32:$IMM32))]>, - RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; def XXPERMX : 8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC, u3imm:$UIM), @@ -1898,6 +1890,19 @@ IIC_VecGeneral, []>; } +// XXSPLI32DX needs extra flags to make sure the compiler does not attempt +// to spill part of the instruction when the values are similar. +let isReMaterializable = 1, isMoveImm = 1, Predicates = [PrefixInstrs] in { + def XXSPLTI32DX : + 8RR_DForm_IMM32_XT6_IX<32, 0, (outs vsrc:$XT), + (ins vsrc:$XTi, u1imm:$IX, i32imm:$IMM32), + "xxsplti32dx $XT, $IX, $IMM32", IIC_VecGeneral, + [(set v2i64:$XT, + (PPCxxsplti32dx v2i64:$XTi, i32:$IX, + i32:$IMM32))]>, + RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">; +} + let Predicates = [IsISA3_1] in { def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI), "setbc $RT, $BI", IIC_IntCompare, []>; @@ -2623,6 +2628,19 @@ (COPY_TO_REGCLASS (XXSPLTIDP (getFPAs32BitInt fpimm:$A)), VSFRC)>; +// To replace constant pool with XXSPLTI32DX for scalars. +def : Pat<(f32 nzFPImmAsi64:$A), + (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX(IMPLICIT_DEF), 0, + (getFPAs64BitIntHi $A)), + 1, (getFPAs64BitIntLo $A)), + VSRC)>; + +def : Pat<(f64 nzFPImmAsi64:$A), + (COPY_TO_REGCLASS (XXSPLTI32DX (XXSPLTI32DX (IMPLICIT_DEF), 0, + (getFPAs64BitIntHi $A)), + 1, (getFPAs64BitIntLo $A)), + VSRC)>; + // Anonymous patterns for XXEVAL // AND // and(A, B, C) diff --git a/llvm/test/CodeGen/PowerPC/constant-pool.ll b/llvm/test/CodeGen/PowerPC/constant-pool.ll --- a/llvm/test/CodeGen/PowerPC/constant-pool.ll +++ b/llvm/test/CodeGen/PowerPC/constant-pool.ll @@ -9,7 +9,9 @@ define float @FloatConstantPool() { ; CHECK-LABEL: FloatConstantPool: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfs f1, .LCPI0_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 0 +; CHECK-NEXT: xxsplti32dx vs1, 1, 8388577 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: FloatConstantPool: @@ -24,7 +26,9 @@ define double @DoubleConstantPool() { ; CHECK-LABEL: DoubleConstantPool: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f1, .LCPI1_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1048574 +; CHECK-NEXT: xxsplti32dx vs1, 1, 780229072 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: DoubleConstantPool: @@ -39,8 +43,12 @@ define ppc_fp128 @LongDoubleConstantPool() { ; CHECK-LABEL: LongDoubleConstantPool: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f1, .LCPI2_0@PCREL(0), 1 -; CHECK-NEXT: plfd f2, .LCPI2_1@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 56623104 +; CHECK-NEXT: xxsplti32dx vs2, 0, -2146625897 +; CHECK-NEXT: xxsplti32dx vs1, 1, -609716532 +; CHECK-NEXT: xxsplti32dx vs2, 1, 1339675259 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 +; CHECK-NEXT: # kill: def $f2 killed $f2 killed $vsl2 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: LongDoubleConstantPool: @@ -185,9 +193,11 @@ define double @two_constants(double %a) { ; CHECK-LABEL: two_constants: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f0, .LCPI11_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs0, 0, 1074446467 +; CHECK-NEXT: xxsplti32dx vs0, 1, 309237645 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: plfd f1, .LCPI11_1@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1073922179 +; CHECK-NEXT: xxsplti32dx vs1, 1, 309237645 ; CHECK-NEXT: xsadddp f1, f0, f1 ; CHECK-NEXT: blr ; @@ -212,11 +222,15 @@ ; CHECK-NEXT: cmplwi r3, 0 ; CHECK-NEXT: beq cr0, .LBB12_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: plfd f1, .LCPI12_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1074935889 +; CHECK-NEXT: xxsplti32dx vs1, 1, -343597384 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB12_2: # %if.end -; CHECK-NEXT: plfd f0, .LCPI12_1@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs0, 0, 1076085391 +; CHECK-NEXT: xxsplti32dx vs0, 1, 1546188227 ; CHECK-NEXT: xsadddp f1, f1, f0 +; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NEXT: blr ; ; CHECK-P9-LABEL: two_constants_two_bb: @@ -248,11 +262,14 @@ define double @three_constants_f64(double %a, double %c) { ; CHECK-LABEL: three_constants_f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: plfd f0, .LCPI13_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs0, 0, 1074446467 +; CHECK-NEXT: xxsplti32dx vs0, 1, 309237645 ; CHECK-NEXT: xsadddp f0, f1, f0 -; CHECK-NEXT: plfd f1, .LCPI13_1@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1073922179 +; CHECK-NEXT: xxsplti32dx vs1, 1, 309237645 ; CHECK-NEXT: xsadddp f0, f0, f1 -; CHECK-NEXT: plfd f1, .LCPI13_2@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1073948393 +; CHECK-NEXT: xxsplti32dx vs1, 1, 2027224564 ; CHECK-NEXT: xsadddp f1, f0, f1 ; CHECK-NEXT: blr ; @@ -340,21 +357,26 @@ define ppc_fp128 @three_constants_ppcf128(ppc_fp128 %a, ppc_fp128 %c) { ; CHECK-LABEL: three_constants_ppcf128: -; CHECK: .localentry three_constants_ppcf128, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: plfd f3, .LCPI16_0@PCREL(0), 1 -; CHECK-NEXT: xxlxor f4, f4, f4 +; CHECK-DAG: xxlxor f4, f4, f4 +; CHECK-DAG: xxsplti32dx vs3, 0, 1074935889 +; CHECK-NEXT: xxsplti32dx vs3, 1, -343597384 +; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3 ; CHECK-NEXT: bl __gcc_qadd@notoc -; CHECK-NEXT: plfd f3, .LCPI16_1@PCREL(0), 1 -; CHECK-NEXT: xxlxor f4, f4, f4 +; CHECK-DAG: xxlxor f4, f4, f4 +; CHECK-DAG: xxsplti32dx vs3, 0, 1074935889 +; CHECK-NEXT: xxsplti32dx vs3, 1, -1719329096 +; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3 ; CHECK-NEXT: bl __gcc_qadd@notoc -; CHECK-NEXT: plfd f3, .LCPI16_2@PCREL(0), 1 -; CHECK-NEXT: xxlxor f4, f4, f4 +; CHECK-DAG: xxlxor f4, f4, f4 +; CHECK-DAG: xxsplti32dx vs3, 0, 1074935889 +; CHECK-NEXT: xxsplti32dx vs3, 1, 8724152 +; CHECK-NEXT: # kill: def $f3 killed $f3 killed $vsl3 ; CHECK-NEXT: bl __gcc_qadd@notoc ; CHECK-NEXT: addi r1, r1, 32 ; CHECK-NEXT: ld r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll --- a/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll @@ -122,19 +122,23 @@ define dso_local double @testDoubleNonRepresentableScalar() local_unnamed_addr { ; CHECK-LE-LABEL: testDoubleNonRepresentableScalar: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-LE-NEXT: xxsplti32dx vs1, 0, 1081435463 +; CHECK-LE-NEXT: xxsplti32dx vs1, 1, -1374389535 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-LE-NEXT: blr ; ; CHECK-NOPCREL-BE-LABEL: testDoubleNonRepresentableScalar: ; CHECK-NOPCREL-BE: # %bb.0: # %entry -; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NOPCREL-BE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 0, 1081435463 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 1, -1374389535 +; CHECK-NOPCREL-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-BE-NEXT: blr ; ; CHECK-NOPCREL-LE-LABEL: testDoubleNonRepresentableScalar: ; CHECK-NOPCREL-LE: # %bb.0: # %entry -; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NOPCREL-LE-NEXT: lfd f1, .LCPI3_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 0, 1081435463 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 1, -1374389535 +; CHECK-NOPCREL-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-LE-NEXT: blr ; ; CHECK-NOPREFIX-LABEL: testDoubleNonRepresentableScalar: @@ -145,7 +149,9 @@ ; ; CHECK-BE-LABEL: testDoubleNonRepresentableScalar: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1 +; CHECK-BE-NEXT: xxsplti32dx vs1, 0, 1081435463 +; CHECK-BE-NEXT: xxsplti32dx vs1, 1, -1374389535 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-BE-NEXT: blr entry: ret double 3.423300e+02 @@ -154,19 +160,23 @@ define dso_local float @testFloatDenormScalar() local_unnamed_addr { ; CHECK-LE-LABEL: testFloatDenormScalar: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-LE-NEXT: xxsplti32dx vs1, 0, 0 +; CHECK-LE-NEXT: xxsplti32dx vs1, 1, 7136238 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-LE-NEXT: blr ; ; CHECK-NOPCREL-BE-LABEL: testFloatDenormScalar: ; CHECK-NOPCREL-BE: # %bb.0: # %entry -; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 0, 0 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 1, 7136238 +; CHECK-NOPCREL-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-BE-NEXT: blr ; ; CHECK-NOPCREL-LE-LABEL: testFloatDenormScalar: ; CHECK-NOPCREL-LE: # %bb.0: # %entry -; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI4_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 0, 0 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 1, 7136238 +; CHECK-NOPCREL-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-LE-NEXT: blr ; ; CHECK-NOPREFIX-LABEL: testFloatDenormScalar: @@ -177,7 +187,9 @@ ; ; CHECK-BE-LABEL: testFloatDenormScalar: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1 +; CHECK-BE-NEXT: xxsplti32dx vs1, 0, 0 +; CHECK-BE-NEXT: xxsplti32dx vs1, 1, 7136238 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-BE-NEXT: blr entry: ret float 0x380B38FB80000000 @@ -186,19 +198,23 @@ define dso_local double @testFloatDenormToDoubleScalar() local_unnamed_addr { ; CHECK-LE-LABEL: testFloatDenormToDoubleScalar: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-LE-NEXT: xxsplti32dx vs1, 0, 940259579 +; CHECK-LE-NEXT: xxsplti32dx vs1, 1, -2147483648 +; CHECK-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-LE-NEXT: blr ; ; CHECK-NOPCREL-BE-LABEL: testFloatDenormToDoubleScalar: ; CHECK-NOPCREL-BE: # %bb.0: # %entry -; CHECK-NOPCREL-BE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-NOPCREL-BE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 0, 940259579 +; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs1, 1, -2147483648 +; CHECK-NOPCREL-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-BE-NEXT: blr ; ; CHECK-NOPCREL-LE-LABEL: testFloatDenormToDoubleScalar: ; CHECK-NOPCREL-LE: # %bb.0: # %entry -; CHECK-NOPCREL-LE-NEXT: addis r3, r2, .LCPI5_0@toc@ha -; CHECK-NOPCREL-LE-NEXT: lfs f1, .LCPI5_0@toc@l(r3) +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 0, 940259579 +; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs1, 1, -2147483648 +; CHECK-NOPCREL-LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-NOPCREL-LE-NEXT: blr ; ; CHECK-NOPREFIX-LABEL: testFloatDenormToDoubleScalar: @@ -209,7 +225,9 @@ ; ; CHECK-BE-LABEL: testFloatDenormToDoubleScalar: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1 +; CHECK-BE-NEXT: xxsplti32dx vs1, 0, 940259579 +; CHECK-BE-NEXT: xxsplti32dx vs1, 1, -2147483648 +; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-BE-NEXT: blr entry: ret double 0x380B38FB80000000 diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll @@ -173,7 +173,9 @@ ; CHECK-LARGE: add r2, r2, r12 ; CHECK-S-NOT: .localentry ; CHECK-ALL: # %bb.0: # %entry -; CHECK-S-NEXT: plfd f1, .LCPI7_0@PCREL(0), 1 +; CHECK-S-NEXT: xxsplti32dx vs1, 0, 1078011044 +; CHECK-S-NEXT: xxsplti32dx vs1, 1, -337824948 +; CHECK-S-NEXT: # kill: def $f1 killed $f1 killed $vsl1 ; CHECK-S-NEXT: blr entry: ret double 0x404124A4EBDD334C diff --git a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll --- a/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel-linkeropt.ll @@ -35,6 +35,9 @@ @FuncPtrOut = external local_unnamed_addr global void (...)*, align 8 define dso_local void @ReadWrite8() local_unnamed_addr #0 { +; In this test the stb r3, 0(r4) cannot be optimized because it +; uses the register r3 and that register is defined by lbz r3, 0(r3) +; which is defined between the pld and the stb. ; CHECK-LABEL: ReadWrite8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input8@got@pcrel(0), 1 @@ -44,9 +47,6 @@ ; CHECK-NEXT: lbz r3, 0(r3) ; CHECK-NEXT: stb r3, 0(r4) ; CHECK-NEXT: blr -; In this test the stb r3, 0(r4) cannot be optimized because it -; uses the register r3 and that register is defined by lbz r3, 0(r3) -; which is defined between the pld and the stb. entry: %0 = load i8, i8* @input8, align 1 store i8 %0, i8* @output8, align 1 @@ -54,6 +54,9 @@ } define dso_local void @ReadWrite16() local_unnamed_addr #0 { +; In this test the sth r3, 0(r4) cannot be optimized because it +; uses the register r3 and that register is defined by lhz r3, 0(r3) +; which is defined between the pld and the sth. ; CHECK-LABEL: ReadWrite16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, input16@got@pcrel(0), 1 @@ -63,9 +66,6 @@ ; CHECK-NEXT: lhz r3, 0(r3) ; CHECK-NEXT: sth r3, 0(r4) ; CHECK-NEXT: blr -; In this test the sth r3, 0(r4) cannot be optimized because it -; uses the register r3 and that register is defined by lhz r3, 0(r3) -; which is defined between the pld and the sth. entry: %0 = load i16, i16* @input16, align 2 store i16 %0, i16* @output16, align 2 @@ -144,7 +144,8 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pld r3, inputf64@got@pcrel(0), 1 ; CHECK-NEXT: .Lpcrel5: -; CHECK-NEXT: plfd f1, .LCPI6_0@PCREL(0), 1 +; CHECK-NEXT: xxsplti32dx vs1, 0, 1075524403 +; CHECK-NEXT: xxsplti32dx vs1, 1, 858993459 ; CHECK-NEXT: .reloc .Lpcrel5-8,R_PPC64_PCREL_OPT,.-(.Lpcrel5-8) ; CHECK-NEXT: lfd f0, 0(r3) ; CHECK-NEXT: pld r3, outputf64@got@pcrel(0), 1 diff --git a/llvm/test/CodeGen/PowerPC/pcrel.ll b/llvm/test/CodeGen/PowerPC/pcrel.ll --- a/llvm/test/CodeGen/PowerPC/pcrel.ll +++ b/llvm/test/CodeGen/PowerPC/pcrel.ll @@ -8,13 +8,14 @@ ; Constant Pool Index. ; CHECK-S-LABEL: ConstPool -; CHECK-S: plfd f1, .LCPI0_0@PCREL(0), 1 +; CHECK-S: xxsplti32dx vs1, 0, 1081002676 +; CHECK-S-NEXT: xxsplti32dx vs1, 1, 962072674 ; CHECK-S: blr ; CHECK-O-LABEL: ConstPool -; CHECK-O: plfd 1, 0(0), 1 -; CHECK-O-NEXT: R_PPC64_PCREL34 .rodata.cst8 -; CHECK-O: blr +; CHECK-O: xxsplti32dx 1, 0, 1081002676 +; CHECK-O-NEXT: xxsplti32dx 1, 1, 962072674 +; CHECK-O-NEXT: blr define dso_local double @ConstPool() local_unnamed_addr { entry: ret double 0x406ECAB439581062